Aniworld/src/server/shared/utils/performance_utils.py
2025-09-29 21:18:42 +02:00

549 lines
20 KiB
Python

"""
Performance & Optimization Module for AniWorld App
This module provides download speed limiting, parallel download support,
caching mechanisms, memory usage monitoring, and download resumption.
"""
import os
import threading
import time
import logging
import queue
import hashlib
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Callable
from dataclasses import dataclass, field
from concurrent.futures import ThreadPoolExecutor, as_completed
import json
import sqlite3
from contextlib import contextmanager
import gc
import psutil
import requests
@dataclass
class DownloadTask:
"""Represents a download task with all necessary information."""
task_id: str
serie_name: str
season: int
episode: int
key: str
language: str
output_path: str
temp_path: str
priority: int = 0 # Higher number = higher priority
retry_count: int = 0
max_retries: int = 3
created_at: datetime = field(default_factory=datetime.now)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
status: str = 'pending' # pending, downloading, completed, failed, paused
progress: Dict[str, Any] = field(default_factory=dict)
error_message: Optional[str] = None
class SpeedLimiter:
"""Control download speeds to prevent bandwidth saturation."""
def __init__(self, max_speed_mbps: float = 0): # 0 = unlimited
self.max_speed_mbps = max_speed_mbps
self.max_bytes_per_second = max_speed_mbps * 1024 * 1024 if max_speed_mbps > 0 else 0
self.download_start_time = None
self.bytes_downloaded = 0
self.lock = threading.Lock()
self.logger = logging.getLogger(__name__)
def set_speed_limit(self, max_speed_mbps: float):
"""Set maximum download speed in MB/s."""
with self.lock:
self.max_speed_mbps = max_speed_mbps
self.max_bytes_per_second = max_speed_mbps * 1024 * 1024 if max_speed_mbps > 0 else 0
self.logger.info(f"Speed limit set to {max_speed_mbps} MB/s")
def start_download(self):
"""Mark the start of a new download session."""
with self.lock:
self.download_start_time = time.time()
self.bytes_downloaded = 0
def update_progress(self, bytes_downloaded: int):
"""Update download progress and apply speed limiting if needed."""
if self.max_bytes_per_second <= 0: # No limit
return
with self.lock:
self.bytes_downloaded += bytes_downloaded
if self.download_start_time:
elapsed_time = time.time() - self.download_start_time
if elapsed_time > 0:
current_speed = self.bytes_downloaded / elapsed_time
if current_speed > self.max_bytes_per_second:
# Calculate required delay
target_time = self.bytes_downloaded / self.max_bytes_per_second
delay = target_time - elapsed_time
if delay > 0:
self.logger.debug(f"Speed limiting: sleeping for {delay:.2f}s")
time.sleep(delay)
def get_current_speed(self) -> float:
"""Get current download speed in MB/s."""
with self.lock:
if self.download_start_time:
elapsed_time = time.time() - self.download_start_time
if elapsed_time > 0:
speed_bps = self.bytes_downloaded / elapsed_time
return speed_bps / (1024 * 1024) # Convert to MB/s
return 0.0
class MemoryMonitor:
"""Monitor and optimize memory usage."""
def __init__(self, warning_threshold_mb: int = 1024, critical_threshold_mb: int = 2048):
self.warning_threshold = warning_threshold_mb * 1024 * 1024
self.critical_threshold = critical_threshold_mb * 1024 * 1024
self.logger = logging.getLogger(__name__)
self.monitoring = False
self.monitor_thread = None
def start_monitoring(self, check_interval: int = 30):
"""Start continuous memory monitoring."""
if self.monitoring:
return
self.monitoring = True
self.monitor_thread = threading.Thread(
target=self._monitoring_loop,
args=(check_interval,),
daemon=True
)
self.monitor_thread.start()
self.logger.info("Memory monitoring started")
def stop_monitoring(self):
"""Stop memory monitoring."""
self.monitoring = False
if self.monitor_thread:
self.monitor_thread.join(timeout=5)
self.logger.info("Memory monitoring stopped")
def _monitoring_loop(self, check_interval: int):
"""Main monitoring loop."""
while self.monitoring:
try:
self.check_memory_usage()
time.sleep(check_interval)
except Exception as e:
self.logger.error(f"Error in memory monitoring: {e}")
time.sleep(check_interval)
def check_memory_usage(self):
"""Check current memory usage and take action if needed."""
try:
process = psutil.Process()
memory_info = process.memory_info()
memory_usage = memory_info.rss
if memory_usage > self.critical_threshold:
self.logger.warning(f"Critical memory usage: {memory_usage / (1024*1024):.1f} MB")
self.force_garbage_collection()
# Check again after GC
memory_info = process.memory_info()
memory_usage = memory_info.rss
if memory_usage > self.critical_threshold:
self.logger.error("Memory usage still critical after garbage collection")
elif memory_usage > self.warning_threshold:
self.logger.info(f"Memory usage warning: {memory_usage / (1024*1024):.1f} MB")
except Exception as e:
self.logger.error(f"Failed to check memory usage: {e}")
def force_garbage_collection(self):
"""Force garbage collection to free memory."""
self.logger.debug("Forcing garbage collection")
collected = gc.collect()
self.logger.debug(f"Garbage collection freed {collected} objects")
def get_memory_stats(self) -> Dict[str, Any]:
"""Get current memory statistics."""
try:
process = psutil.Process()
memory_info = process.memory_info()
return {
'rss_mb': memory_info.rss / (1024 * 1024),
'vms_mb': memory_info.vms / (1024 * 1024),
'percent': process.memory_percent(),
'warning_threshold_mb': self.warning_threshold / (1024 * 1024),
'critical_threshold_mb': self.critical_threshold / (1024 * 1024)
}
except Exception as e:
self.logger.error(f"Failed to get memory stats: {e}")
return {}
class ParallelDownloadManager:
"""Manage parallel downloads with configurable thread count."""
def __init__(self, max_workers: int = 3, speed_limiter: Optional[SpeedLimiter] = None):
self.max_workers = max_workers
self.speed_limiter = speed_limiter or SpeedLimiter()
self.executor = ThreadPoolExecutor(max_workers=max_workers)
self.active_tasks: Dict[str, DownloadTask] = {}
self.pending_queue = queue.PriorityQueue()
self.completed_tasks: List[DownloadTask] = []
self.failed_tasks: List[DownloadTask] = []
self.lock = threading.Lock()
self.logger = logging.getLogger(__name__)
self.running = False
self.worker_thread = None
# Statistics
self.stats = {
'total_tasks': 0,
'completed_tasks': 0,
'failed_tasks': 0,
'active_tasks': 0,
'average_speed_mbps': 0.0
}
def start(self):
"""Start the download manager."""
if self.running:
return
self.running = True
self.worker_thread = threading.Thread(target=self._worker_loop, daemon=True)
self.worker_thread.start()
self.logger.info(f"Download manager started with {self.max_workers} workers")
def stop(self):
"""Stop the download manager."""
self.running = False
# Cancel all pending tasks
with self.lock:
while not self.pending_queue.empty():
try:
_, task = self.pending_queue.get_nowait()
task.status = 'cancelled'
except queue.Empty:
break
# Shutdown executor
self.executor.shutdown(wait=True)
if self.worker_thread:
self.worker_thread.join(timeout=5)
self.logger.info("Download manager stopped")
def add_task(self, task: DownloadTask) -> str:
"""Add a download task to the queue."""
with self.lock:
self.stats['total_tasks'] += 1
# Priority queue uses negative priority for max-heap behavior
self.pending_queue.put((-task.priority, task))
self.logger.info(f"Added download task: {task.task_id}")
return task.task_id
def _worker_loop(self):
"""Main worker loop that processes download tasks."""
while self.running:
try:
# Check for pending tasks
if not self.pending_queue.empty() and len(self.active_tasks) < self.max_workers:
_, task = self.pending_queue.get_nowait()
if task.status == 'pending':
self._start_task(task)
# Check completed tasks
self._check_completed_tasks()
time.sleep(0.1) # Small delay to prevent busy waiting
except queue.Empty:
time.sleep(1)
except Exception as e:
self.logger.error(f"Error in worker loop: {e}")
time.sleep(1)
def _start_task(self, task: DownloadTask):
"""Start a download task."""
with self.lock:
task.status = 'downloading'
task.started_at = datetime.now()
self.active_tasks[task.task_id] = task
self.stats['active_tasks'] = len(self.active_tasks)
# Submit to thread pool
future = self.executor.submit(self._execute_download, task)
task.future = future
self.logger.info(f"Started download task: {task.task_id}")
def _execute_download(self, task: DownloadTask) -> bool:
"""Execute the actual download."""
try:
self.logger.info(f"Executing download: {task.serie_name} S{task.season}E{task.episode}")
# Create progress callback that respects speed limiting
def progress_callback(info):
if 'downloaded_bytes' in info:
self.speed_limiter.update_progress(info.get('downloaded_bytes', 0))
# Update task progress
task.progress.update(info)
self.speed_limiter.start_download()
# Here you would call the actual download function
# For now, simulate download
success = self._simulate_download(task, progress_callback)
return success
except Exception as e:
self.logger.error(f"Download failed for task {task.task_id}: {e}")
task.error_message = str(e)
return False
def _simulate_download(self, task: DownloadTask, progress_callback: Callable) -> bool:
"""Simulate download for testing purposes."""
# This is a placeholder - replace with actual download logic
total_size = 100 * 1024 * 1024 # 100MB simulation
downloaded = 0
chunk_size = 1024 * 1024 # 1MB chunks
while downloaded < total_size and task.status == 'downloading':
# Simulate download chunk
time.sleep(0.1)
downloaded += chunk_size
progress_info = {
'status': 'downloading',
'downloaded_bytes': downloaded,
'total_bytes': total_size,
'percent': (downloaded / total_size) * 100
}
progress_callback(progress_info)
if downloaded >= total_size:
progress_callback({'status': 'finished'})
return True
return False
def _check_completed_tasks(self):
"""Check for completed download tasks."""
completed_task_ids = []
with self.lock:
for task_id, task in self.active_tasks.items():
if hasattr(task, 'future') and task.future.done():
completed_task_ids.append(task_id)
# Process completed tasks
for task_id in completed_task_ids:
self._handle_completed_task(task_id)
def _handle_completed_task(self, task_id: str):
"""Handle a completed download task."""
with self.lock:
task = self.active_tasks.pop(task_id, None)
if not task:
return
task.completed_at = datetime.now()
self.stats['active_tasks'] = len(self.active_tasks)
try:
success = task.future.result()
if success:
task.status = 'completed'
self.completed_tasks.append(task)
self.stats['completed_tasks'] += 1
self.logger.info(f"Task completed successfully: {task_id}")
else:
task.status = 'failed'
self.failed_tasks.append(task)
self.stats['failed_tasks'] += 1
self.logger.warning(f"Task failed: {task_id}")
except Exception as e:
task.status = 'failed'
task.error_message = str(e)
self.failed_tasks.append(task)
self.stats['failed_tasks'] += 1
self.logger.error(f"Task failed with exception: {task_id} - {e}")
def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
"""Get status of a specific task."""
with self.lock:
# Check active tasks
if task_id in self.active_tasks:
task = self.active_tasks[task_id]
return self._task_to_dict(task)
# Check completed tasks
for task in self.completed_tasks:
if task.task_id == task_id:
return self._task_to_dict(task)
# Check failed tasks
for task in self.failed_tasks:
if task.task_id == task_id:
return self._task_to_dict(task)
return None
def _task_to_dict(self, task: DownloadTask) -> Dict[str, Any]:
"""Convert task to dictionary representation."""
return {
'task_id': task.task_id,
'serie_name': task.serie_name,
'season': task.season,
'episode': task.episode,
'status': task.status,
'progress': task.progress,
'created_at': task.created_at.isoformat(),
'started_at': task.started_at.isoformat() if task.started_at else None,
'completed_at': task.completed_at.isoformat() if task.completed_at else None,
'error_message': task.error_message,
'retry_count': task.retry_count
}
def get_all_tasks(self) -> Dict[str, List[Dict[str, Any]]]:
"""Get all tasks grouped by status."""
with self.lock:
return {
'active': [self._task_to_dict(task) for task in self.active_tasks.values()],
'completed': [self._task_to_dict(task) for task in self.completed_tasks[-50:]], # Last 50
'failed': [self._task_to_dict(task) for task in self.failed_tasks[-50:]] # Last 50
}
def get_statistics(self) -> Dict[str, Any]:
"""Get download manager statistics."""
return self.stats.copy()
def set_max_workers(self, max_workers: int):
"""Change the number of worker threads."""
if max_workers <= 0:
raise ValueError("max_workers must be positive")
self.max_workers = max_workers
# Recreate executor with new worker count
old_executor = self.executor
self.executor = ThreadPoolExecutor(max_workers=max_workers)
old_executor.shutdown(wait=False)
self.logger.info(f"Updated worker count to {max_workers}")
class ResumeManager:
"""Manage download resumption for interrupted downloads."""
def __init__(self, resume_dir: str = "./resume"):
self.resume_dir = resume_dir
self.logger = logging.getLogger(__name__)
os.makedirs(resume_dir, exist_ok=True)
def save_resume_info(self, task_id: str, resume_data: Dict[str, Any]):
"""Save resume information for a download."""
try:
resume_file = os.path.join(self.resume_dir, f"{task_id}.json")
with open(resume_file, 'w') as f:
json.dump(resume_data, f, indent=2, default=str)
self.logger.debug(f"Saved resume info for task: {task_id}")
except Exception as e:
self.logger.error(f"Failed to save resume info for {task_id}: {e}")
def load_resume_info(self, task_id: str) -> Optional[Dict[str, Any]]:
"""Load resume information for a download."""
try:
resume_file = os.path.join(self.resume_dir, f"{task_id}.json")
if os.path.exists(resume_file):
with open(resume_file, 'r') as f:
resume_data = json.load(f)
self.logger.debug(f"Loaded resume info for task: {task_id}")
return resume_data
except Exception as e:
self.logger.error(f"Failed to load resume info for {task_id}: {e}")
return None
def clear_resume_info(self, task_id: str):
"""Clear resume information after successful completion."""
try:
resume_file = os.path.join(self.resume_dir, f"{task_id}.json")
if os.path.exists(resume_file):
os.remove(resume_file)
self.logger.debug(f"Cleared resume info for task: {task_id}")
except Exception as e:
self.logger.error(f"Failed to clear resume info for {task_id}: {e}")
def get_resumable_tasks(self) -> List[str]:
"""Get list of tasks that can be resumed."""
try:
resume_files = [f for f in os.listdir(self.resume_dir) if f.endswith('.json')]
task_ids = [os.path.splitext(f)[0] for f in resume_files]
return task_ids
except Exception as e:
self.logger.error(f"Failed to get resumable tasks: {e}")
return []
# Global instances
speed_limiter = SpeedLimiter()
memory_monitor = MemoryMonitor()
download_manager = ParallelDownloadManager(max_workers=3, speed_limiter=speed_limiter)
resume_manager = ResumeManager()
def init_performance_monitoring():
"""Initialize performance monitoring components."""
memory_monitor.start_monitoring()
download_manager.start()
def cleanup_performance_monitoring():
"""Clean up performance monitoring components."""
memory_monitor.stop_monitoring()
download_manager.stop()
# Export main components
__all__ = [
'SpeedLimiter',
'MemoryMonitor',
'ParallelDownloadManager',
'ResumeManager',
'DownloadTask',
'speed_limiter',
'download_cache',
'memory_monitor',
'download_manager',
'resume_manager',
'init_performance_monitoring',
'cleanup_performance_monitoring'
]