This commit is contained in:
2025-10-05 21:56:33 +02:00
parent d30aa7cfea
commit fe2df1514c
77 changed files with 82 additions and 12002 deletions

View File

@@ -0,0 +1,549 @@
"""
Performance & Optimization Module for AniWorld App
This module provides download speed limiting, parallel download support,
caching mechanisms, memory usage monitoring, and download resumption.
"""
import os
import threading
import time
import logging
import queue
import hashlib
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Callable
from dataclasses import dataclass, field
from concurrent.futures import ThreadPoolExecutor, as_completed
import json
import sqlite3
from contextlib import contextmanager
import gc
import psutil
import requests
@dataclass
class DownloadTask:
"""Represents a download task with all necessary information."""
task_id: str
serie_name: str
season: int
episode: int
key: str
language: str
output_path: str
temp_path: str
priority: int = 0 # Higher number = higher priority
retry_count: int = 0
max_retries: int = 3
created_at: datetime = field(default_factory=datetime.now)
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
status: str = 'pending' # pending, downloading, completed, failed, paused
progress: Dict[str, Any] = field(default_factory=dict)
error_message: Optional[str] = None
class SpeedLimiter:
"""Control download speeds to prevent bandwidth saturation."""
def __init__(self, max_speed_mbps: float = 0): # 0 = unlimited
self.max_speed_mbps = max_speed_mbps
self.max_bytes_per_second = max_speed_mbps * 1024 * 1024 if max_speed_mbps > 0 else 0
self.download_start_time = None
self.bytes_downloaded = 0
self.lock = threading.Lock()
self.logger = logging.getLogger(__name__)
def set_speed_limit(self, max_speed_mbps: float):
"""Set maximum download speed in MB/s."""
with self.lock:
self.max_speed_mbps = max_speed_mbps
self.max_bytes_per_second = max_speed_mbps * 1024 * 1024 if max_speed_mbps > 0 else 0
self.logger.info(f"Speed limit set to {max_speed_mbps} MB/s")
def start_download(self):
"""Mark the start of a new download session."""
with self.lock:
self.download_start_time = time.time()
self.bytes_downloaded = 0
def update_progress(self, bytes_downloaded: int):
"""Update download progress and apply speed limiting if needed."""
if self.max_bytes_per_second <= 0: # No limit
return
with self.lock:
self.bytes_downloaded += bytes_downloaded
if self.download_start_time:
elapsed_time = time.time() - self.download_start_time
if elapsed_time > 0:
current_speed = self.bytes_downloaded / elapsed_time
if current_speed > self.max_bytes_per_second:
# Calculate required delay
target_time = self.bytes_downloaded / self.max_bytes_per_second
delay = target_time - elapsed_time
if delay > 0:
self.logger.debug(f"Speed limiting: sleeping for {delay:.2f}s")
time.sleep(delay)
def get_current_speed(self) -> float:
"""Get current download speed in MB/s."""
with self.lock:
if self.download_start_time:
elapsed_time = time.time() - self.download_start_time
if elapsed_time > 0:
speed_bps = self.bytes_downloaded / elapsed_time
return speed_bps / (1024 * 1024) # Convert to MB/s
return 0.0
class MemoryMonitor:
"""Monitor and optimize memory usage."""
def __init__(self, warning_threshold_mb: int = 1024, critical_threshold_mb: int = 2048):
self.warning_threshold = warning_threshold_mb * 1024 * 1024
self.critical_threshold = critical_threshold_mb * 1024 * 1024
self.logger = logging.getLogger(__name__)
self.monitoring = False
self.monitor_thread = None
def start_monitoring(self, check_interval: int = 30):
"""Start continuous memory monitoring."""
if self.monitoring:
return
self.monitoring = True
self.monitor_thread = threading.Thread(
target=self._monitoring_loop,
args=(check_interval,),
daemon=True
)
self.monitor_thread.start()
self.logger.info("Memory monitoring started")
def stop_monitoring(self):
"""Stop memory monitoring."""
self.monitoring = False
if self.monitor_thread:
self.monitor_thread.join(timeout=5)
self.logger.info("Memory monitoring stopped")
def _monitoring_loop(self, check_interval: int):
"""Main monitoring loop."""
while self.monitoring:
try:
self.check_memory_usage()
time.sleep(check_interval)
except Exception as e:
self.logger.error(f"Error in memory monitoring: {e}")
time.sleep(check_interval)
def check_memory_usage(self):
"""Check current memory usage and take action if needed."""
try:
process = psutil.Process()
memory_info = process.memory_info()
memory_usage = memory_info.rss
if memory_usage > self.critical_threshold:
self.logger.warning(f"Critical memory usage: {memory_usage / (1024*1024):.1f} MB")
self.force_garbage_collection()
# Check again after GC
memory_info = process.memory_info()
memory_usage = memory_info.rss
if memory_usage > self.critical_threshold:
self.logger.error("Memory usage still critical after garbage collection")
elif memory_usage > self.warning_threshold:
self.logger.info(f"Memory usage warning: {memory_usage / (1024*1024):.1f} MB")
except Exception as e:
self.logger.error(f"Failed to check memory usage: {e}")
def force_garbage_collection(self):
"""Force garbage collection to free memory."""
self.logger.debug("Forcing garbage collection")
collected = gc.collect()
self.logger.debug(f"Garbage collection freed {collected} objects")
def get_memory_stats(self) -> Dict[str, Any]:
"""Get current memory statistics."""
try:
process = psutil.Process()
memory_info = process.memory_info()
return {
'rss_mb': memory_info.rss / (1024 * 1024),
'vms_mb': memory_info.vms / (1024 * 1024),
'percent': process.memory_percent(),
'warning_threshold_mb': self.warning_threshold / (1024 * 1024),
'critical_threshold_mb': self.critical_threshold / (1024 * 1024)
}
except Exception as e:
self.logger.error(f"Failed to get memory stats: {e}")
return {}
class ParallelDownloadManager:
"""Manage parallel downloads with configurable thread count."""
def __init__(self, max_workers: int = 3, speed_limiter: Optional[SpeedLimiter] = None):
self.max_workers = max_workers
self.speed_limiter = speed_limiter or SpeedLimiter()
self.executor = ThreadPoolExecutor(max_workers=max_workers)
self.active_tasks: Dict[str, DownloadTask] = {}
self.pending_queue = queue.PriorityQueue()
self.completed_tasks: List[DownloadTask] = []
self.failed_tasks: List[DownloadTask] = []
self.lock = threading.Lock()
self.logger = logging.getLogger(__name__)
self.running = False
self.worker_thread = None
# Statistics
self.stats = {
'total_tasks': 0,
'completed_tasks': 0,
'failed_tasks': 0,
'active_tasks': 0,
'average_speed_mbps': 0.0
}
def start(self):
"""Start the download manager."""
if self.running:
return
self.running = True
self.worker_thread = threading.Thread(target=self._worker_loop, daemon=True)
self.worker_thread.start()
self.logger.info(f"Download manager started with {self.max_workers} workers")
def stop(self):
"""Stop the download manager."""
self.running = False
# Cancel all pending tasks
with self.lock:
while not self.pending_queue.empty():
try:
_, task = self.pending_queue.get_nowait()
task.status = 'cancelled'
except queue.Empty:
break
# Shutdown executor
self.executor.shutdown(wait=True)
if self.worker_thread:
self.worker_thread.join(timeout=5)
self.logger.info("Download manager stopped")
def add_task(self, task: DownloadTask) -> str:
"""Add a download task to the queue."""
with self.lock:
self.stats['total_tasks'] += 1
# Priority queue uses negative priority for max-heap behavior
self.pending_queue.put((-task.priority, task))
self.logger.info(f"Added download task: {task.task_id}")
return task.task_id
def _worker_loop(self):
"""Main worker loop that processes download tasks."""
while self.running:
try:
# Check for pending tasks
if not self.pending_queue.empty() and len(self.active_tasks) < self.max_workers:
_, task = self.pending_queue.get_nowait()
if task.status == 'pending':
self._start_task(task)
# Check completed tasks
self._check_completed_tasks()
time.sleep(0.1) # Small delay to prevent busy waiting
except queue.Empty:
time.sleep(1)
except Exception as e:
self.logger.error(f"Error in worker loop: {e}")
time.sleep(1)
def _start_task(self, task: DownloadTask):
"""Start a download task."""
with self.lock:
task.status = 'downloading'
task.started_at = datetime.now()
self.active_tasks[task.task_id] = task
self.stats['active_tasks'] = len(self.active_tasks)
# Submit to thread pool
future = self.executor.submit(self._execute_download, task)
task.future = future
self.logger.info(f"Started download task: {task.task_id}")
def _execute_download(self, task: DownloadTask) -> bool:
"""Execute the actual download."""
try:
self.logger.info(f"Executing download: {task.serie_name} S{task.season}E{task.episode}")
# Create progress callback that respects speed limiting
def progress_callback(info):
if 'downloaded_bytes' in info:
self.speed_limiter.update_progress(info.get('downloaded_bytes', 0))
# Update task progress
task.progress.update(info)
self.speed_limiter.start_download()
# Here you would call the actual download function
# For now, simulate download
success = self._simulate_download(task, progress_callback)
return success
except Exception as e:
self.logger.error(f"Download failed for task {task.task_id}: {e}")
task.error_message = str(e)
return False
def _simulate_download(self, task: DownloadTask, progress_callback: Callable) -> bool:
"""Simulate download for testing purposes."""
# This is a placeholder - replace with actual download logic
total_size = 100 * 1024 * 1024 # 100MB simulation
downloaded = 0
chunk_size = 1024 * 1024 # 1MB chunks
while downloaded < total_size and task.status == 'downloading':
# Simulate download chunk
time.sleep(0.1)
downloaded += chunk_size
progress_info = {
'status': 'downloading',
'downloaded_bytes': downloaded,
'total_bytes': total_size,
'percent': (downloaded / total_size) * 100
}
progress_callback(progress_info)
if downloaded >= total_size:
progress_callback({'status': 'finished'})
return True
return False
def _check_completed_tasks(self):
"""Check for completed download tasks."""
completed_task_ids = []
with self.lock:
for task_id, task in self.active_tasks.items():
if hasattr(task, 'future') and task.future.done():
completed_task_ids.append(task_id)
# Process completed tasks
for task_id in completed_task_ids:
self._handle_completed_task(task_id)
def _handle_completed_task(self, task_id: str):
"""Handle a completed download task."""
with self.lock:
task = self.active_tasks.pop(task_id, None)
if not task:
return
task.completed_at = datetime.now()
self.stats['active_tasks'] = len(self.active_tasks)
try:
success = task.future.result()
if success:
task.status = 'completed'
self.completed_tasks.append(task)
self.stats['completed_tasks'] += 1
self.logger.info(f"Task completed successfully: {task_id}")
else:
task.status = 'failed'
self.failed_tasks.append(task)
self.stats['failed_tasks'] += 1
self.logger.warning(f"Task failed: {task_id}")
except Exception as e:
task.status = 'failed'
task.error_message = str(e)
self.failed_tasks.append(task)
self.stats['failed_tasks'] += 1
self.logger.error(f"Task failed with exception: {task_id} - {e}")
def get_task_status(self, task_id: str) -> Optional[Dict[str, Any]]:
"""Get status of a specific task."""
with self.lock:
# Check active tasks
if task_id in self.active_tasks:
task = self.active_tasks[task_id]
return self._task_to_dict(task)
# Check completed tasks
for task in self.completed_tasks:
if task.task_id == task_id:
return self._task_to_dict(task)
# Check failed tasks
for task in self.failed_tasks:
if task.task_id == task_id:
return self._task_to_dict(task)
return None
def _task_to_dict(self, task: DownloadTask) -> Dict[str, Any]:
"""Convert task to dictionary representation."""
return {
'task_id': task.task_id,
'serie_name': task.serie_name,
'season': task.season,
'episode': task.episode,
'status': task.status,
'progress': task.progress,
'created_at': task.created_at.isoformat(),
'started_at': task.started_at.isoformat() if task.started_at else None,
'completed_at': task.completed_at.isoformat() if task.completed_at else None,
'error_message': task.error_message,
'retry_count': task.retry_count
}
def get_all_tasks(self) -> Dict[str, List[Dict[str, Any]]]:
"""Get all tasks grouped by status."""
with self.lock:
return {
'active': [self._task_to_dict(task) for task in self.active_tasks.values()],
'completed': [self._task_to_dict(task) for task in self.completed_tasks[-50:]], # Last 50
'failed': [self._task_to_dict(task) for task in self.failed_tasks[-50:]] # Last 50
}
def get_statistics(self) -> Dict[str, Any]:
"""Get download manager statistics."""
return self.stats.copy()
def set_max_workers(self, max_workers: int):
"""Change the number of worker threads."""
if max_workers <= 0:
raise ValueError("max_workers must be positive")
self.max_workers = max_workers
# Recreate executor with new worker count
old_executor = self.executor
self.executor = ThreadPoolExecutor(max_workers=max_workers)
old_executor.shutdown(wait=False)
self.logger.info(f"Updated worker count to {max_workers}")
class ResumeManager:
"""Manage download resumption for interrupted downloads."""
def __init__(self, resume_dir: str = "./resume"):
self.resume_dir = resume_dir
self.logger = logging.getLogger(__name__)
os.makedirs(resume_dir, exist_ok=True)
def save_resume_info(self, task_id: str, resume_data: Dict[str, Any]):
"""Save resume information for a download."""
try:
resume_file = os.path.join(self.resume_dir, f"{task_id}.json")
with open(resume_file, 'w') as f:
json.dump(resume_data, f, indent=2, default=str)
self.logger.debug(f"Saved resume info for task: {task_id}")
except Exception as e:
self.logger.error(f"Failed to save resume info for {task_id}: {e}")
def load_resume_info(self, task_id: str) -> Optional[Dict[str, Any]]:
"""Load resume information for a download."""
try:
resume_file = os.path.join(self.resume_dir, f"{task_id}.json")
if os.path.exists(resume_file):
with open(resume_file, 'r') as f:
resume_data = json.load(f)
self.logger.debug(f"Loaded resume info for task: {task_id}")
return resume_data
except Exception as e:
self.logger.error(f"Failed to load resume info for {task_id}: {e}")
return None
def clear_resume_info(self, task_id: str):
"""Clear resume information after successful completion."""
try:
resume_file = os.path.join(self.resume_dir, f"{task_id}.json")
if os.path.exists(resume_file):
os.remove(resume_file)
self.logger.debug(f"Cleared resume info for task: {task_id}")
except Exception as e:
self.logger.error(f"Failed to clear resume info for {task_id}: {e}")
def get_resumable_tasks(self) -> List[str]:
"""Get list of tasks that can be resumed."""
try:
resume_files = [f for f in os.listdir(self.resume_dir) if f.endswith('.json')]
task_ids = [os.path.splitext(f)[0] for f in resume_files]
return task_ids
except Exception as e:
self.logger.error(f"Failed to get resumable tasks: {e}")
return []
# Global instances
speed_limiter = SpeedLimiter()
memory_monitor = MemoryMonitor()
download_manager = ParallelDownloadManager(max_workers=3, speed_limiter=speed_limiter)
resume_manager = ResumeManager()
def init_performance_monitoring():
"""Initialize performance monitoring components."""
memory_monitor.start_monitoring()
download_manager.start()
def cleanup_performance_monitoring():
"""Clean up performance monitoring components."""
memory_monitor.stop_monitoring()
download_manager.stop()
# Export main components
__all__ = [
'SpeedLimiter',
'MemoryMonitor',
'ParallelDownloadManager',
'ResumeManager',
'DownloadTask',
'speed_limiter',
'download_cache',
'memory_monitor',
'download_manager',
'resume_manager',
'init_performance_monitoring',
'cleanup_performance_monitoring'
]

View File

@@ -0,0 +1,293 @@
import threading
import time
from datetime import datetime, timedelta
from typing import Dict, Optional, Callable
import logging
logger = logging.getLogger(__name__)
class ProcessLock:
"""Thread-safe process lock for preventing duplicate operations."""
def __init__(self, name: str, timeout_minutes: int = 60):
self.name = name
self.timeout_minutes = timeout_minutes
self.lock = threading.RLock()
self.locked_at: Optional[datetime] = None
self.locked_by: Optional[str] = None
self.progress_callback: Optional[Callable] = None
self.is_locked = False
self.progress_data = {}
def acquire(self, locked_by: str = "system", progress_callback: Callable = None) -> bool:
"""
Attempt to acquire the lock.
Returns True if lock was acquired, False if already locked.
"""
with self.lock:
# Check if lock has expired
if self.is_locked and self.locked_at:
if datetime.now() - self.locked_at > timedelta(minutes=self.timeout_minutes):
logger.warning(f"Process lock '{self.name}' expired, releasing...")
self._release_internal()
if self.is_locked:
return False
self.is_locked = True
self.locked_at = datetime.now()
self.locked_by = locked_by
self.progress_callback = progress_callback
self.progress_data = {}
logger.info(f"Process lock '{self.name}' acquired by '{locked_by}'")
return True
def release(self) -> bool:
"""Release the lock."""
with self.lock:
if not self.is_locked:
return False
self._release_internal()
logger.info(f"Process lock '{self.name}' released")
return True
def _release_internal(self):
"""Internal method to release lock without logging."""
self.is_locked = False
self.locked_at = None
self.locked_by = None
self.progress_callback = None
self.progress_data = {}
def is_locked_by_other(self, requester: str) -> bool:
"""Check if lock is held by someone other than requester."""
with self.lock:
return self.is_locked and self.locked_by != requester
def get_status(self) -> Dict:
"""Get current lock status."""
with self.lock:
return {
'is_locked': self.is_locked,
'locked_by': self.locked_by,
'locked_at': self.locked_at.isoformat() if self.locked_at else None,
'progress': self.progress_data.copy(),
'timeout_minutes': self.timeout_minutes
}
def update_progress(self, progress_data: Dict):
"""Update progress data for this lock."""
with self.lock:
if self.is_locked:
self.progress_data.update(progress_data)
if self.progress_callback:
try:
self.progress_callback(progress_data)
except Exception as e:
logger.error(f"Progress callback error: {e}")
def __enter__(self):
"""Context manager entry."""
if not self.acquire():
raise ProcessLockError(f"Could not acquire lock '{self.name}'")
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
self.release()
class ProcessLockError(Exception):
"""Exception raised when process lock operations fail."""
pass
class ProcessLockManager:
"""Global manager for all process locks."""
def __init__(self):
self.locks: Dict[str, ProcessLock] = {}
self.manager_lock = threading.RLock()
def get_lock(self, name: str, timeout_minutes: int = 60) -> ProcessLock:
"""Get or create a process lock."""
with self.manager_lock:
if name not in self.locks:
self.locks[name] = ProcessLock(name, timeout_minutes)
return self.locks[name]
def acquire_lock(self, name: str, locked_by: str = "system",
timeout_minutes: int = 60, progress_callback: Callable = None) -> bool:
"""Acquire a named lock."""
lock = self.get_lock(name, timeout_minutes)
return lock.acquire(locked_by, progress_callback)
def release_lock(self, name: str) -> bool:
"""Release a named lock."""
with self.manager_lock:
if name in self.locks:
return self.locks[name].release()
return False
def is_locked(self, name: str) -> bool:
"""Check if a named lock is currently held."""
with self.manager_lock:
if name in self.locks:
return self.locks[name].is_locked
return False
def get_all_locks_status(self) -> Dict:
"""Get status of all locks."""
with self.manager_lock:
return {
name: lock.get_status()
for name, lock in self.locks.items()
}
def cleanup_expired_locks(self) -> int:
"""Clean up any expired locks. Returns number of locks cleaned up."""
cleaned_count = 0
with self.manager_lock:
for lock in self.locks.values():
if lock.is_locked and lock.locked_at:
if datetime.now() - lock.locked_at > timedelta(minutes=lock.timeout_minutes):
lock._release_internal()
cleaned_count += 1
logger.info(f"Cleaned up expired lock: {lock.name}")
return cleaned_count
def force_release_all(self) -> int:
"""Force release all locks. Returns number of locks released."""
released_count = 0
with self.manager_lock:
for lock in self.locks.values():
if lock.is_locked:
lock._release_internal()
released_count += 1
logger.warning(f"Force released lock: {lock.name}")
return released_count
# Global instance
process_lock_manager = ProcessLockManager()
# Predefined lock names for common operations
RESCAN_LOCK = "rescan"
DOWNLOAD_LOCK = "download"
SEARCH_LOCK = "search"
CONFIG_LOCK = "config"
def with_process_lock(lock_name: str, timeout_minutes: int = 60):
"""Decorator to protect functions with process locks."""
def decorator(func):
def wrapper(*args, **kwargs):
locked_by = kwargs.pop('_locked_by', func.__name__)
progress_callback = kwargs.pop('_progress_callback', None)
if not process_lock_manager.acquire_lock(lock_name, locked_by, timeout_minutes, progress_callback):
raise ProcessLockError(f"Process '{lock_name}' is already running")
try:
return func(*args, **kwargs)
finally:
process_lock_manager.release_lock(lock_name)
return wrapper
return decorator
def check_process_locks():
"""Check and clean up any expired process locks."""
return process_lock_manager.cleanup_expired_locks()
def get_process_status(lock_name: str) -> Dict:
"""Get status of a specific process lock."""
lock = process_lock_manager.get_lock(lock_name)
return lock.get_status()
def update_process_progress(lock_name: str, progress_data: Dict):
"""Update progress for a specific process."""
if process_lock_manager.is_locked(lock_name):
lock = process_lock_manager.get_lock(lock_name)
lock.update_progress(progress_data)
def is_process_running(lock_name: str) -> bool:
"""Check if a specific process is currently running."""
return process_lock_manager.is_locked(lock_name)
class QueueDeduplicator:
"""Prevent duplicate episodes in download queue."""
def __init__(self):
self.active_items = set() # Set of (serie_name, season, episode) tuples
self.lock = threading.RLock()
def add_episode(self, serie_name: str, season: int, episode: int) -> bool:
"""
Add episode to active set if not already present.
Returns True if added, False if duplicate.
"""
with self.lock:
episode_key = (serie_name, season, episode)
if episode_key in self.active_items:
return False
self.active_items.add(episode_key)
return True
def remove_episode(self, serie_name: str, season: int, episode: int):
"""Remove episode from active set."""
with self.lock:
episode_key = (serie_name, season, episode)
self.active_items.discard(episode_key)
def is_episode_active(self, serie_name: str, season: int, episode: int) -> bool:
"""Check if episode is currently being processed."""
with self.lock:
episode_key = (serie_name, season, episode)
return episode_key in self.active_items
def get_active_episodes(self) -> list:
"""Get list of all active episodes."""
with self.lock:
return list(self.active_items)
def clear_all(self):
"""Clear all active episodes."""
with self.lock:
self.active_items.clear()
def get_count(self) -> int:
"""Get number of active episodes."""
with self.lock:
return len(self.active_items)
# Global deduplicator instance
episode_deduplicator = QueueDeduplicator()
def add_episode_to_queue_safe(serie_name: str, season: int, episode: int) -> bool:
"""
Safely add episode to queue with deduplication.
Returns True if added, False if duplicate.
"""
return episode_deduplicator.add_episode(serie_name, season, episode)
def remove_episode_from_queue(serie_name: str, season: int, episode: int):
"""Remove episode from deduplication tracking."""
episode_deduplicator.remove_episode(serie_name, season, episode)
def is_episode_in_queue(serie_name: str, season: int, episode: int) -> bool:
"""Check if episode is already in queue/being processed."""
return episode_deduplicator.is_episode_active(serie_name, season, episode)