refactor: restructure core→server, split large entity files into database module

- Move src/core/ → src/server/
- Split SerieList.py (531 lines) and series.py (414 lines) into src/server/database/
- Add database/models.py for SQLAlchemy models
- Update all test imports to reflect new structure
- Remove deprecated test files (test_serie_class.py, test_serie_folder_with_year.py)
This commit is contained in:
2026-06-04 21:11:53 +02:00
parent 09d454d4c0
commit 5526ab884a
76 changed files with 1186 additions and 3574 deletions

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,104 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, List
class Loader(ABC):
"""Abstract base class for anime data loaders/providers."""
@abstractmethod
def subscribe_download_progress(self, handler):
"""Subscribe a handler to the download_progress event.
Args:
handler: Callable to be called with progress dict.
"""
@abstractmethod
def unsubscribe_download_progress(self, handler):
"""Unsubscribe a handler from the download_progress event.
Args:
handler: Callable previously subscribed.
"""
@abstractmethod
def search(self, word: str) -> List[Dict[str, Any]]:
"""Search for anime series by name.
Args:
word: Search term to look for
Returns:
List of found series as dictionaries containing series information
"""
@abstractmethod
def is_language(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
) -> bool:
"""Check if episode exists in specified language.
Args:
season: Season number (1-indexed)
episode: Episode number (1-indexed)
key: Unique series identifier/key
language: Language to check (default: German Dub)
Returns:
True if episode exists in specified language, False otherwise
"""
@abstractmethod
def download(
self,
base_directory: str,
serie_folder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub"
) -> bool:
"""Download episode to specified directory.
Args:
base_directory: Base directory for downloads
serie_folder: Series folder name within base directory
season: Season number (0 for movies, 1+ for series)
episode: Episode number within season
key: Unique series identifier/key
language: Language version to download (default: German Dub)
Returns:
True if download successful, False otherwise
"""
@abstractmethod
def get_site_key(self) -> str:
"""Get the site key/identifier for this provider.
Returns:
Site key string (e.g., 'aniworld.to', 'voe.com')
"""
@abstractmethod
def get_title(self, key: str) -> str:
"""Get the human-readable title of a series.
Args:
key: Unique series identifier/key
Returns:
Series title string
"""
@abstractmethod
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
"""Get season and episode counts for a series.
Args:
slug: Series slug/key identifier
Returns:
Dictionary mapping season number (int) to episode count (int)
"""

View File

@@ -0,0 +1,351 @@
"""Dynamic provider configuration management.
This module provides runtime configuration management for anime providers,
allowing dynamic updates without application restart.
"""
import json
import logging
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class ProviderSettings:
"""Configuration settings for a single provider."""
name: str
enabled: bool = True
priority: int = 0
timeout_seconds: int = 30
max_retries: int = 3
retry_delay_seconds: float = 1.0
max_concurrent_downloads: int = 3
bandwidth_limit_mbps: Optional[float] = None
custom_headers: Optional[Dict[str, str]] = None
custom_params: Optional[Dict[str, Any]] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert settings to dictionary."""
return {
k: v for k, v in asdict(self).items() if v is not None
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ProviderSettings":
"""Create settings from dictionary."""
return cls(**{k: v for k, v in data.items() if hasattr(cls, k)})
class ProviderConfigManager:
"""Manages dynamic configuration for anime providers."""
def __init__(self, config_file: Optional[Path] = None):
"""Initialize provider configuration manager.
Args:
config_file: Path to configuration file (optional).
"""
self._config_file = config_file
self._provider_settings: Dict[str, ProviderSettings] = {}
self._global_settings: Dict[str, Any] = {
"default_timeout": 30,
"default_max_retries": 3,
"default_retry_delay": 1.0,
"enable_health_monitoring": True,
"enable_failover": True,
}
# Load configuration if file exists
if config_file and config_file.exists():
self.load_config()
logger.info("Provider configuration manager initialized")
def get_provider_settings(
self, provider_name: str
) -> Optional[ProviderSettings]:
"""Get settings for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
Provider settings or None if not configured.
"""
return self._provider_settings.get(provider_name)
def set_provider_settings(
self, provider_name: str, settings: ProviderSettings
) -> None:
"""Set settings for a specific provider.
Args:
provider_name: Name of the provider.
settings: Provider settings to apply.
"""
self._provider_settings[provider_name] = settings
logger.info("Updated settings for provider: %s", provider_name)
def update_provider_settings(
self, provider_name: str, **kwargs
) -> bool:
"""Update specific provider settings.
Args:
provider_name: Name of the provider.
**kwargs: Settings to update.
Returns:
True if updated, False if provider not found.
"""
if provider_name not in self._provider_settings:
# Create new settings
self._provider_settings[provider_name] = ProviderSettings(
name=provider_name, **kwargs
)
logger.info("Created new settings for provider: %s", provider_name) # noqa: E501
return True
settings = self._provider_settings[provider_name]
# Update settings
for key, value in kwargs.items():
if hasattr(settings, key):
setattr(settings, key, value)
logger.info(
f"Updated settings for provider {provider_name}: {kwargs}"
)
return True
def get_all_provider_settings(self) -> Dict[str, ProviderSettings]:
"""Get settings for all configured providers.
Returns:
Dictionary mapping provider names to their settings.
"""
return self._provider_settings.copy()
def get_enabled_providers(self) -> List[str]:
"""Get list of enabled providers.
Returns:
List of enabled provider names.
"""
return [
name
for name, settings in self._provider_settings.items()
if settings.enabled
]
def enable_provider(self, provider_name: str) -> bool:
"""Enable a provider.
Args:
provider_name: Name of the provider.
Returns:
True if enabled, False if not found.
"""
if provider_name in self._provider_settings:
self._provider_settings[provider_name].enabled = True
logger.info("Enabled provider: %s", provider_name)
return True
return False
def disable_provider(self, provider_name: str) -> bool:
"""Disable a provider.
Args:
provider_name: Name of the provider.
Returns:
True if disabled, False if not found.
"""
if provider_name in self._provider_settings:
self._provider_settings[provider_name].enabled = False
logger.info("Disabled provider: %s", provider_name)
return True
return False
def set_provider_priority(
self, provider_name: str, priority: int
) -> bool:
"""Set priority for a provider.
Lower priority values = higher priority.
Args:
provider_name: Name of the provider.
priority: Priority value (lower = higher priority).
Returns:
True if updated, False if not found.
"""
if provider_name in self._provider_settings:
self._provider_settings[provider_name].priority = priority
logger.info(
f"Set priority for {provider_name} to {priority}"
)
return True
return False
def get_providers_by_priority(self) -> List[str]:
"""Get providers sorted by priority.
Returns:
List of provider names sorted by priority (low to high).
"""
sorted_providers = sorted(
self._provider_settings.items(),
key=lambda x: x[1].priority,
)
return [name for name, _ in sorted_providers]
def get_global_setting(self, key: str) -> Optional[Any]:
"""Get a global setting value.
Args:
key: Setting key.
Returns:
Setting value or None if not found.
"""
return self._global_settings.get(key)
def set_global_setting(self, key: str, value: Any) -> None:
"""Set a global setting value.
Args:
key: Setting key.
value: Setting value.
"""
self._global_settings[key] = value
logger.info("Updated global setting %s: %s", key, value)
def get_all_global_settings(self) -> Dict[str, Any]:
"""Get all global settings.
Returns:
Dictionary of global settings.
"""
return self._global_settings.copy()
def load_config(self, file_path: Optional[Path] = None) -> bool:
"""Load configuration from file.
Args:
file_path: Path to configuration file (uses default if None).
Returns:
True if loaded successfully, False otherwise.
"""
config_path = file_path or self._config_file
if not config_path or not config_path.exists():
logger.warning(
f"Configuration file not found: {config_path}"
)
return False
try:
with open(config_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Load provider settings
if "providers" in data:
for name, settings_data in data["providers"].items():
self._provider_settings[name] = (
ProviderSettings.from_dict(settings_data)
)
# Load global settings
if "global" in data:
self._global_settings.update(data["global"])
logger.info(
f"Loaded configuration from {config_path} "
f"({len(self._provider_settings)} providers)"
)
return True
except Exception as e:
logger.error(
f"Failed to load configuration from {config_path}: {e}",
exc_info=True,
)
return False
def save_config(self, file_path: Optional[Path] = None) -> bool:
"""Save configuration to file.
Args:
file_path: Path to save to (uses default if None).
Returns:
True if saved successfully, False otherwise.
"""
config_path = file_path or self._config_file
if not config_path:
logger.error("No configuration file path specified")
return False
try:
# Ensure parent directory exists
config_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"providers": {
name: settings.to_dict()
for name, settings in self._provider_settings.items()
},
"global": self._global_settings,
}
with open(config_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
logger.info("Saved configuration to %s", config_path)
return True
except Exception as e:
logger.error(
f"Failed to save configuration to {config_path}: {e}",
exc_info=True,
)
return False
def reset_to_defaults(self) -> None:
"""Reset all settings to defaults."""
self._provider_settings.clear()
self._global_settings = {
"default_timeout": 30,
"default_max_retries": 3,
"default_retry_delay": 1.0,
"enable_health_monitoring": True,
"enable_failover": True,
}
logger.info("Reset configuration to defaults")
# Global configuration manager instance
_config_manager: Optional[ProviderConfigManager] = None
def get_config_manager(
config_file: Optional[Path] = None,
) -> ProviderConfigManager:
"""Get or create global provider configuration manager.
Args:
config_file: Configuration file path (used on first call).
Returns:
Global ProviderConfigManager instance.
"""
global _config_manager
if _config_manager is None:
_config_manager = ProviderConfigManager(config_file=config_file)
return _config_manager

View File

@@ -0,0 +1,998 @@
"""
Enhanced AniWorld Loader with Error Handling and Recovery
This module extends the original AniWorldLoader with comprehensive
error handling, retry mechanisms, and recovery strategies.
"""
import html
import json
import logging
import os
import re
import shutil
from pathlib import Path
from typing import Any, Callable, Dict, Optional
from urllib.parse import quote
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from yt_dlp import YoutubeDL
from ...infrastructure.security.file_integrity import get_integrity_manager
from ..error_handler import (
DownloadError,
NetworkError,
NonRetryableError,
RetryableError,
file_corruption_detector,
recovery_strategies,
with_error_recovery,
)
from ..interfaces.providers import Providers
from .base_provider import Loader
from .provider_config import (
ANIWORLD_HEADERS,
DEFAULT_PROVIDERS,
INVALID_PATH_CHARS,
LULUVDO_USER_AGENT,
ProviderType,
)
def _cleanup_temp_file(
temp_path: str,
logger: Optional[logging.Logger] = None,
) -> None:
"""Remove a temp file and any associated yt-dlp partial files.
Args:
temp_path: Path to the primary temp file.
logger: Optional logger for diagnostic messages.
"""
_log = logger or logging.getLogger(__name__)
candidates = [temp_path]
# yt-dlp creates fragment files like <file>.part
candidates.extend(
str(p) for p in Path(temp_path).parent.glob(
Path(temp_path).name + ".*"
)
)
for path in candidates:
if os.path.exists(path):
try:
os.remove(path)
_log.debug(f"Removed temp file: {path}")
except OSError as exc:
_log.warning(f"Failed to remove temp file {path}: {exc}")
class EnhancedAniWorldLoader(Loader):
"""Aniworld provider with retry and recovery strategies.
Also exposes metrics hooks for download statistics.
"""
def __init__(self) -> None:
super().__init__()
self.logger = logging.getLogger(__name__)
self.SUPPORTED_PROVIDERS = DEFAULT_PROVIDERS
# local copy so modifications don't mutate shared constant
self.AniworldHeaders = dict(ANIWORLD_HEADERS)
self.INVALID_PATH_CHARS = INVALID_PATH_CHARS
self.RANDOM_USER_AGENT = UserAgent().random
self.LULUVDO_USER_AGENT = LULUVDO_USER_AGENT
self.PROVIDER_HEADERS = {
ProviderType.VIDMOLY.value: ['Referer: "https://vidmoly.to"'],
ProviderType.DOODSTREAM.value: [
'Referer: "https://dood.li/"',
'Referer: "https://playmogo.com/"',
],
ProviderType.VOE.value: [f'User-Agent: {self.RANDOM_USER_AGENT}'],
ProviderType.LULUVDO.value: [
f'User-Agent: {self.LULUVDO_USER_AGENT}',
"Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
'Origin: "https://luluvdo.com"',
'Referer: "https://luluvdo.com/"',
],
}
self.ANIWORLD_TO = "https://aniworld.to"
self.DEFAULT_REQUEST_TIMEOUT = 30
# Initialize session with enhanced retry configuration
self.session = self._create_robust_session()
# Cache dictionaries
self._KeyHTMLDict = {}
self._EpisodeHTMLDict = {}
# Provider manager
self.Providers = Providers()
# Download statistics
self.download_stats = {
'total_downloads': 0,
'successful_downloads': 0,
'failed_downloads': 0,
'retried_downloads': 0
}
# Read timeout from environment variable (string->int safely)
self.download_timeout = int(os.getenv("DOWNLOAD_TIMEOUT") or "600")
# Setup logging
self._setup_logging()
def _create_robust_session(self) -> requests.Session:
"""Create a session with robust retry and error handling
configuration.
"""
session = requests.Session()
# Configure retries so transient network problems are retried while we
# still fail fast on permanent errors. The status codes cover
# timeouts, rate limits, and the Cloudflare-origin 52x responses that
# AniWorld occasionally emits under load.
retries = Retry(
total=5,
backoff_factor=2, # More aggressive backoff
status_forcelist=[
408,
429,
500,
502,
503,
504,
520,
521,
522,
523,
524,
],
allowed_methods=["GET", "POST", "HEAD"],
raise_on_status=False, # Handle status errors manually
)
adapter = HTTPAdapter(
max_retries=retries,
pool_connections=10,
pool_maxsize=20,
pool_block=True
)
session.mount("https://", adapter)
session.mount("http://", adapter)
# Set default headers
session.headers.update(self.AniworldHeaders)
return session
def _setup_logging(self):
"""Setup specialized logging for download errors and missing keys."""
# Determine project root so log files land in a predictable location
# regardless of the working directory at runtime.
_project_root = Path(__file__).parent.parent.parent.parent
_logs_dir = _project_root / "logs"
_logs_dir.mkdir(parents=True, exist_ok=True)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Download error logger — records every failed download attempt
self.download_error_logger = logging.getLogger("DownloadErrors")
if not self.download_error_logger.handlers:
handler = logging.FileHandler(str(_logs_dir / "download_errors.log"))
handler.setLevel(logging.ERROR)
handler.setFormatter(formatter)
self.download_error_logger.addHandler(handler)
self.download_error_logger.setLevel(logging.ERROR)
# No-key logger — records episodes for which no stream key was found
self.nokey_logger = logging.getLogger("NoKeyFound")
if not self.nokey_logger.handlers:
handler = logging.FileHandler(str(_logs_dir / "no_key_found.log"))
handler.setLevel(logging.ERROR)
handler.setFormatter(formatter)
self.nokey_logger.addHandler(handler)
self.nokey_logger.setLevel(logging.ERROR)
def ClearCache(self):
"""Clear all cached data."""
self._KeyHTMLDict.clear()
self._EpisodeHTMLDict.clear()
self.logger.debug("Cache cleared")
def RemoveFromCache(self):
"""Remove episode HTML cache."""
self._EpisodeHTMLDict.clear()
self.logger.debug("Episode cache cleared")
@with_error_recovery(max_retries=3, context="anime_search")
def Search(self, word: str) -> list:
"""Search for anime with error handling."""
if not word or not word.strip():
raise ValueError("Search term cannot be empty")
search_url = (
f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
)
try:
return self._fetch_anime_list_with_recovery(search_url)
except Exception as e:
self.logger.error("Search failed for term '%s': %s", word, e)
raise RetryableError(f"Search failed: {e}") from e
def _fetch_anime_list_with_recovery(self, url: str) -> list:
"""Fetch anime list with comprehensive error handling."""
try:
response = recovery_strategies.handle_network_failure(
self.session.get,
url,
timeout=self.DEFAULT_REQUEST_TIMEOUT
)
if not response.ok:
if response.status_code == 404:
raise NonRetryableError(f"URL not found: {url}")
elif response.status_code == 403:
raise NonRetryableError(f"Access forbidden: {url}")
elif response.status_code >= 500:
# Log suspicious server errors for monitoring
self.logger.warning(
f"Server error {response.status_code} from {url} "
f"- will retry"
)
raise RetryableError(f"Server error {response.status_code}")
else:
raise RetryableError(f"HTTP error {response.status_code}")
return self._parse_anime_response(response.text)
except (requests.RequestException, ConnectionError) as e:
raise NetworkError(f"Network error during anime search: {e}") from e
def _parse_anime_response(self, response_text: str) -> list:
"""Parse anime search response with error handling."""
if not response_text or not response_text.strip():
raise ValueError("Empty response from server")
clean_text = response_text.strip()
# Quick fail for obviously non-JSON responses
if not (clean_text.startswith('[') or clean_text.startswith('{')):
# Check if it's HTML error page
if clean_text.lower().startswith('<!doctype') or \
clean_text.lower().startswith('<html'):
raise ValueError("Received HTML instead of JSON")
# If doesn't start with JSON markers, likely not JSON
self.logger.warning(
"Response doesn't start with JSON markers, "
"attempting parse anyway"
)
# Attempt increasingly permissive parsing strategies to cope with
# upstream anomalies such as HTML escaping, stray BOM markers, and
# injected control characters.
parsing_strategies = [
lambda text: json.loads(html.unescape(text)),
lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
lambda text: json.loads(re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text))
]
for i, strategy in enumerate(parsing_strategies):
try:
decoded_data = strategy(clean_text)
if isinstance(decoded_data, list):
msg = (
f"Successfully parsed anime response with "
f"strategy {i + 1}"
)
self.logger.debug(msg)
return decoded_data
else:
msg = (
f"Strategy {i + 1} returned non-list data: "
f"{type(decoded_data)}"
)
self.logger.warning(msg)
except json.JSONDecodeError as e:
msg = f"Parsing strategy {i + 1} failed: {e}"
self.logger.debug(msg)
continue
raise ValueError(
"Could not parse anime search response with any strategy"
)
def _GetLanguageKey(self, language: str) -> int:
"""Get numeric language code."""
language_map = {
"German Dub": 1,
"English Sub": 2,
"German Sub": 3,
}
return language_map.get(language, 0)
@with_error_recovery(max_retries=2, context="language_check")
def IsLanguage(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
) -> bool:
"""Check if episode is available in specified language."""
try:
languageCode = self._GetLanguageKey(language)
if languageCode == 0:
raise ValueError(f"Unknown language: {language}")
episode_response = self._GetEpisodeHTML(season, episode, key)
soup = BeautifulSoup(episode_response.content, "html.parser")
lang_box = soup.find("div", class_="changeLanguageBox")
if not lang_box:
debug_msg = (
f"No language box found for {key} S{season}E{episode}"
)
self.logger.debug(debug_msg)
return False
img_tags = lang_box.find_all("img")
available_languages = []
for img in img_tags:
lang_key = img.get("data-lang-key")
if lang_key and lang_key.isdigit():
available_languages.append(int(lang_key))
is_available = languageCode in available_languages
debug_msg = (
f"Language check for {key} S{season}E{episode}: "
f"Requested={languageCode}, "
f"Available={available_languages}, "
f"Result={is_available}"
)
self.logger.debug(debug_msg)
return is_available
except Exception as e:
error_msg = (
f"Language check failed for {key} S{season}E{episode}: {e}"
)
self.logger.error(error_msg)
raise RetryableError(f"Language check failed: {e}") from e
def Download(
self,
baseDirectory: str,
serieFolder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub",
progress_callback: Optional[Callable] = None,
) -> bool:
"""Download episode with comprehensive error handling.
Args:
baseDirectory: Base download directory path
serieFolder: Filesystem folder name (metadata only, used for
file path construction)
season: Season number (0 for movies)
episode: Episode number
key: Series unique identifier from provider (used for
identification and API calls)
language: Audio language preference (default: German Dub)
progress_callback: Optional callback for download progress
updates
Returns:
bool: True if download succeeded, False otherwise
Raises:
DownloadError: If download fails after all retry attempts
ValueError: If required parameters are missing or invalid
"""
self.download_stats["total_downloads"] += 1
try:
# Validate inputs
if not all([baseDirectory, serieFolder, key]):
raise ValueError("Missing required parameters for download")
if season < 0 or episode < 0:
raise ValueError("Season and episode must be non-negative")
# Prepare file paths
sanitized_anime_title = "".join(
char
for char in self.GetTitle(key)
if char not in self.INVALID_PATH_CHARS
)
if not sanitized_anime_title:
sanitized_anime_title = f"Unknown_{key}"
# Generate output filename
if season == 0:
output_file = (
f"{sanitized_anime_title} - Movie {episode:02} - "
f"({language}).mp4"
)
else:
output_file = (
f"{sanitized_anime_title} - S{season:02}E{episode:03} - "
f"({language}).mp4"
)
# Create directory structure
folder_path = os.path.join(
baseDirectory, serieFolder, f"Season {season}"
)
output_path = os.path.join(folder_path, output_file)
# Check if file already exists and is valid
if os.path.exists(output_path):
is_valid = file_corruption_detector.is_valid_video_file(
output_path
)
# Also verify checksum if available
integrity_mgr = get_integrity_manager()
checksum_valid = True
if integrity_mgr.has_checksum(Path(output_path)):
checksum_valid = integrity_mgr.verify_checksum(
Path(output_path)
)
if not checksum_valid:
self.logger.warning(
f"Checksum verification failed for {output_file}"
)
if is_valid and checksum_valid:
msg = (
f"File already exists and is valid: "
f"{output_file}"
)
self.logger.info(msg)
self.download_stats["successful_downloads"] += 1
return True
else:
warning_msg = (
f"Existing file appears corrupted, removing: "
f"{output_path}"
)
self.logger.warning(warning_msg)
try:
os.remove(output_path)
# Remove checksum entry
integrity_mgr.remove_checksum(Path(output_path))
except OSError as e:
error_msg = f"Failed to remove corrupted file: {e}"
self.logger.error(error_msg)
os.makedirs(folder_path, exist_ok=True)
# Create temp directory
temp_dir = "./Temp/"
os.makedirs(temp_dir, exist_ok=True)
temp_path = os.path.join(temp_dir, output_file)
# Attempt download with recovery strategies
success = self._download_with_recovery(
season,
episode,
key,
language,
temp_path,
output_path,
progress_callback,
)
if success:
self.download_stats["successful_downloads"] += 1
success_msg = f"Successfully downloaded: {output_file}"
self.logger.info(success_msg)
else:
self.download_stats["failed_downloads"] += 1
fail_msg = (
f"Download failed for {key} S{season}E{episode} "
f"({language})"
)
self.download_error_logger.error(fail_msg)
return success
except Exception as e:
self.download_stats["failed_downloads"] += 1
err_msg = (
f"Download error for {key} S{season}E{episode}: {e}"
)
self.download_error_logger.error(err_msg, exc_info=True)
raise DownloadError(f"Download failed: {e}") from e
finally:
self.ClearCache()
def _download_with_recovery(
self,
season: int,
episode: int,
key: str,
language: str,
temp_path: str,
output_path: str,
progress_callback: Optional[Callable],
) -> bool:
"""Attempt download with multiple providers and recovery."""
for provider_name in self.SUPPORTED_PROVIDERS:
try:
info_msg = (
f"Attempting download with provider: {provider_name}"
)
self.logger.info(info_msg)
# Get download link and headers for provider
link, headers = recovery_strategies.handle_network_failure(
self._get_direct_link_from_provider,
season,
episode,
key,
language,
)
if not link:
warn_msg = (
f"No download link found for provider: "
f"{provider_name}"
)
self.logger.warning(warn_msg)
continue
# Configure yt-dlp options
ydl_opts = {
"fragment_retries": float("inf"),
"outtmpl": temp_path,
"quiet": True,
"no_warnings": True,
"progress_with_newline": False,
"nocheckcertificate": True,
"socket_timeout": self.download_timeout,
"http_chunk_size": 1024 * 1024, # 1MB chunks
"logger": self.logger,
# Use ffmpeg for HLS streams and transport stream format
"downloader": "ffmpeg",
"hls_use_mpegts": True,
}
if headers:
ydl_opts['http_headers'] = headers
if progress_callback:
ydl_opts['progress_hooks'] = [progress_callback]
# Perform download with recovery
success = recovery_strategies.handle_download_failure(
self._perform_ytdl_download,
temp_path,
ydl_opts,
link
)
if success and os.path.exists(temp_path):
# Verify downloaded file
if file_corruption_detector.is_valid_video_file(temp_path):
# Move to final location
# Use copyfile instead of copy2 to avoid metadata permission issues
shutil.copyfile(temp_path, output_path)
# Calculate and store checksum for integrity
integrity_mgr = get_integrity_manager()
try:
checksum = integrity_mgr.store_checksum(
Path(output_path)
)
filename = Path(output_path).name
self.logger.info(
f"Stored checksum for {filename}: "
f"{checksum[:16]}..."
)
except Exception as e:
self.logger.warning(
f"Failed to store checksum: {e}"
)
# Clean up temp file
try:
os.remove(temp_path)
except Exception as e:
warn_msg = f"Failed to remove temp file: {e}"
self.logger.warning(warn_msg)
return True
else:
warn_msg = (
f"Downloaded file failed validation: "
f"{temp_path}"
)
self.logger.warning(warn_msg)
try:
os.remove(temp_path)
except OSError as e:
warn_msg = f"Failed to remove temp file: {e}"
self.logger.warning(warn_msg)
except Exception as e:
self.logger.warning("Provider %s failed: %s", provider_name, e)
# Clean up any partial temp files left by this failed attempt
_cleanup_temp_file(temp_path, self.logger)
self.download_stats['retried_downloads'] += 1
continue
# All providers failed make sure no temp remnants are left behind
_cleanup_temp_file(temp_path, self.logger)
return False
def _perform_ytdl_download(
self, ydl_opts: Dict[str, Any], link: str
) -> bool:
"""Perform actual download using yt-dlp."""
try:
with YoutubeDL(ydl_opts) as ydl:
ydl.download([link])
return True
except Exception as e:
self.logger.error("yt-dlp download failed: %s", e)
raise DownloadError(f"Download failed: {e}") from e
@with_error_recovery(max_retries=2, context="get_title")
def GetTitle(self, key: str) -> str:
"""Get anime title with error handling."""
try:
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
title_div = soup.find('div', class_='series-title')
if title_div:
title_span = title_div.find('h1')
if title_span:
span = title_span.find('span')
if span:
return span.text.strip()
self.logger.warning("Could not extract title for key: %s", key)
return f"Unknown_Title_{key}"
except Exception as e:
self.logger.error("Failed to get title for key %s: %s", key, e)
raise RetryableError(f"Title extraction failed: {e}") from e
def GetSiteKey(self) -> str:
"""Get site identifier."""
return "aniworld.to"
@with_error_recovery(max_retries=2, context="get_key_html")
def _GetKeyHTML(self, key: str):
"""Get cached HTML for anime key."""
if key in self._KeyHTMLDict:
return self._KeyHTMLDict[key]
try:
url = f"{self.ANIWORLD_TO}/anime/stream/{key}"
response = recovery_strategies.handle_network_failure(
self.session.get,
url,
timeout=self.DEFAULT_REQUEST_TIMEOUT
)
if not response.ok:
if response.status_code == 404:
msg = f"Anime key not found: {key}"
self.nokey_logger.error(msg)
raise NonRetryableError(msg)
else:
err_msg = (
f"HTTP error {response.status_code} for key {key}"
)
raise RetryableError(err_msg)
self._KeyHTMLDict[key] = response
return self._KeyHTMLDict[key]
except Exception as e:
error_msg = f"Failed to get HTML for key {key}: {e}"
self.logger.error(error_msg)
raise
@with_error_recovery(max_retries=2, context="get_episode_html")
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
"""Get cached HTML for specific episode.
Args:
season: Season number (must be 1-999)
episode: Episode number (must be 1-9999)
key: Series identifier (should be non-empty)
Returns:
Cached or fetched HTML response
Raises:
ValueError: If parameters are invalid
NonRetryableError: If episode not found (404)
RetryableError: If HTTP error occurs
"""
# Validate parameters
if not key or not key.strip():
raise ValueError("Series key cannot be empty")
if season < 1 or season > 999:
raise ValueError(
f"Invalid season number: {season} (must be 1-999)"
)
if episode < 1 or episode > 9999:
raise ValueError(
f"Invalid episode number: {episode} (must be 1-9999)"
)
cache_key = (key, season, episode)
if cache_key in self._EpisodeHTMLDict:
return self._EpisodeHTMLDict[cache_key]
try:
url = (
f"{self.ANIWORLD_TO}/anime/stream/{key}/"
f"staffel-{season}/episode-{episode}"
)
response = recovery_strategies.handle_network_failure(
self.session.get, url, timeout=self.DEFAULT_REQUEST_TIMEOUT
)
if not response.ok:
if response.status_code == 404:
err_msg = (
f"Episode not found: {key} S{season}E{episode}"
)
raise NonRetryableError(err_msg)
else:
err_msg = (
f"HTTP error {response.status_code} for episode"
)
raise RetryableError(err_msg)
self._EpisodeHTMLDict[cache_key] = response
return self._EpisodeHTMLDict[cache_key]
except Exception as e:
error_msg = (
f"Failed to get episode HTML for {key} "
f"S{season}E{episode}: {e}"
)
self.logger.error(error_msg)
raise
def _get_provider_from_html(
self, season: int, episode: int, key: str
) -> dict:
"""Extract providers from HTML with error handling."""
try:
episode_html = self._GetEpisodeHTML(season, episode, key)
soup = BeautifulSoup(episode_html.content, "html.parser")
providers: dict[str, dict] = {}
episode_links = soup.find_all(
"li", class_=lambda x: x and x.startswith("episodeLink")
)
if not episode_links:
warn_msg = (
f"No episode links found for {key} S{season}E{episode}"
)
self.logger.warning(warn_msg)
return providers
for link in episode_links:
provider_name_tag = link.find("h4")
provider_name = (
provider_name_tag.text.strip()
if provider_name_tag
else None
)
redirect_link_tag = link.find("a", class_="watchEpisode")
redirect_link = (
redirect_link_tag["href"]
if redirect_link_tag
else None
)
lang_key = link.get("data-lang-key")
lang_key = (
int(lang_key)
if lang_key and lang_key.isdigit()
else None
)
if provider_name and redirect_link and lang_key:
if provider_name not in providers:
providers[provider_name] = {}
providers[provider_name][lang_key] = (
f"{self.ANIWORLD_TO}{redirect_link}"
)
debug_msg = (
f"Found {len(providers)} providers for "
f"{key} S{season}E{episode}"
)
self.logger.debug(debug_msg)
return providers
except Exception as e:
error_msg = f"Failed to parse providers from HTML: {e}"
self.logger.error(error_msg)
raise RetryableError(f"Provider parsing failed: {e}") from e
def _get_redirect_link(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
):
"""Get redirect link for episode with error handling."""
languageCode = self._GetLanguageKey(language)
if not self.IsLanguage(season, episode, key, language):
err_msg = (
f"Language {language} not available for "
f"{key} S{season}E{episode}"
)
raise NonRetryableError(err_msg)
providers = self._get_provider_from_html(season, episode, key)
for provider_name, lang_dict in providers.items():
if languageCode in lang_dict:
return lang_dict[languageCode], provider_name
err_msg = (
f"No provider found for {language} in "
f"{key} S{season}E{episode}"
)
raise NonRetryableError(err_msg)
def _get_embeded_link(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
):
"""Get embedded link with error handling."""
try:
redirect_link, provider_name = self._get_redirect_link(
season, episode, key, language
)
response = recovery_strategies.handle_network_failure(
self.session.get,
redirect_link,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
headers={"User-Agent": self.RANDOM_USER_AGENT},
)
return response.url
except Exception as e:
error_msg = f"Failed to get embedded link: {e}"
self.logger.error(error_msg)
raise
def _get_direct_link_from_provider(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
):
"""Get direct download link from provider."""
try:
embedded_link = self._get_embeded_link(
season, episode, key, language
)
if not embedded_link:
raise NonRetryableError("No embedded link found")
# Use VOE provider as default (could be made configurable)
provider = self.Providers.GetProvider("VOE")
if not provider:
raise NonRetryableError("VOE provider not available")
return provider.get_link(
embedded_link, self.DEFAULT_REQUEST_TIMEOUT
)
except Exception as e:
error_msg = f"Failed to get direct link from provider: {e}"
self.logger.error(error_msg)
raise
@with_error_recovery(max_retries=2, context="get_season_episode_count")
def get_season_episode_count(self, slug: str) -> dict:
"""Get episode count per season with error handling."""
try:
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"
response = recovery_strategies.handle_network_failure(
requests.get,
base_url,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
)
soup = BeautifulSoup(response.content, "html.parser")
season_meta = soup.find("meta", itemprop="numberOfSeasons")
number_of_seasons = (
int(season_meta["content"]) if season_meta else 0
)
episode_counts = {}
for season in range(1, number_of_seasons + 1):
season_url = f"{base_url}staffel-{season}"
season_response = (
recovery_strategies.handle_network_failure(
requests.get,
season_url,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
)
)
season_soup = BeautifulSoup(
season_response.content, "html.parser"
)
episode_links = season_soup.find_all("a", href=True)
unique_links = set(
link["href"]
for link in episode_links
if f"staffel-{season}/episode-" in link['href']
)
episode_counts[season] = len(unique_links)
return episode_counts
except Exception as e:
self.logger.error("Failed to get episode counts for %s: %s", slug, e)
raise RetryableError(f"Episode count retrieval failed: {e}") from e
def get_download_statistics(self) -> Dict[str, Any]:
"""Get download statistics."""
stats = self.download_stats.copy()
stats['success_rate'] = (
(stats['successful_downloads'] / stats['total_downloads'] * 100)
if stats['total_downloads'] > 0 else 0
)
return stats
def reset_statistics(self):
"""Reset download statistics."""
self.download_stats = {
'total_downloads': 0,
'successful_downloads': 0,
'failed_downloads': 0,
'retried_downloads': 0
}
# For backward compatibility, create wrapper that uses enhanced loader
class AniworldLoader(EnhancedAniWorldLoader):
"""Backward compatibility wrapper for the enhanced loader."""
pass

View File

@@ -0,0 +1,325 @@
"""Provider failover system for automatic fallback on failures.
This module implements automatic failover between multiple providers,
ensuring high availability by switching to backup providers when the
primary fails.
"""
import asyncio
import logging
from typing import Any, Callable, Dict, List, Optional, TypeVar
from src.server.providers.health_monitor import get_health_monitor
from src.server.providers.provider_config import DEFAULT_PROVIDERS
logger = logging.getLogger(__name__)
T = TypeVar("T")
class ProviderFailover:
"""Manages automatic failover between multiple providers."""
def __init__(
self,
providers: Optional[List[str]] = None,
max_retries: int = 3,
retry_delay: float = 1.0,
enable_health_monitoring: bool = True,
):
"""Initialize provider failover manager.
Args:
providers: List of provider names to use (default: all).
max_retries: Maximum retry attempts per provider.
retry_delay: Delay between retries in seconds.
enable_health_monitoring: Whether to use health monitoring.
"""
self._providers = providers or DEFAULT_PROVIDERS.copy()
self._max_retries = max_retries
self._retry_delay = retry_delay
self._enable_health_monitoring = enable_health_monitoring
# Current provider index
self._current_index = 0
# Health monitor
self._health_monitor = (
get_health_monitor() if enable_health_monitoring else None
)
logger.info(
f"Provider failover initialized with "
f"{len(self._providers)} providers"
)
def get_current_provider(self) -> str:
"""Get the current active provider.
Returns:
Name of current provider.
"""
if self._enable_health_monitoring and self._health_monitor:
# Try to get best available provider
best = self._health_monitor.get_best_provider()
if best and best in self._providers:
return best
# Fall back to round-robin selection
return self._providers[self._current_index % len(self._providers)]
def get_next_provider(self) -> Optional[str]:
"""Get the next provider in the failover chain.
Returns:
Name of next provider or None if none available.
"""
if self._enable_health_monitoring and self._health_monitor:
# Get available providers
available = [
p
for p in self._providers
if p in self._health_monitor.get_available_providers()
]
if not available:
logger.warning("No available providers for failover")
return None
# Find next available provider
current = self.get_current_provider()
try:
current_idx = available.index(current)
next_idx = (current_idx + 1) % len(available)
return available[next_idx]
except ValueError:
# Current provider not in available list
return available[0]
# Fall back to simple rotation
self._current_index = (self._current_index + 1) % len(
self._providers
)
return self._providers[self._current_index]
async def execute_with_failover(
self,
operation: Callable[[str], Any],
operation_name: str = "operation",
**kwargs,
) -> Any:
"""Execute an operation with automatic failover.
Args:
operation: Async callable that takes provider name.
operation_name: Name for logging purposes.
**kwargs: Additional arguments to pass to operation.
Returns:
Result from successful operation.
Raises:
Exception: If all providers fail.
"""
providers_tried = []
last_error = None
# Try each provider
for attempt in range(len(self._providers)):
provider = self.get_current_provider()
# Skip if already tried
if provider in providers_tried:
self.get_next_provider()
continue
providers_tried.append(provider)
# Try operation with retries
for retry in range(self._max_retries):
try:
logger.info(
f"Executing {operation_name} with provider "
f"{provider} (attempt {retry + 1}/{self._max_retries})" # noqa: E501
)
# Execute operation
import time
start_time = time.time()
result = await operation(provider, **kwargs)
elapsed_ms = (time.time() - start_time) * 1000
# Record success
if self._health_monitor:
self._health_monitor.record_request(
provider_name=provider,
success=True,
response_time_ms=elapsed_ms,
)
logger.info(
f"{operation_name} succeeded with provider "
f"{provider} in {elapsed_ms:.2f}ms"
)
return result
except Exception as e:
last_error = e
logger.warning(
f"{operation_name} failed with provider "
f"{provider} (attempt {retry + 1}): {e}"
)
# Record failure
if self._health_monitor:
import time
elapsed_ms = (time.time() - start_time) * 1000
self._health_monitor.record_request(
provider_name=provider,
success=False,
response_time_ms=elapsed_ms,
error_message=str(e),
)
# Retry with delay
if retry < self._max_retries - 1:
await asyncio.sleep(self._retry_delay)
# Try next provider
next_provider = self.get_next_provider()
if next_provider is None:
break
# All providers failed
error_msg = (
f"{operation_name} failed with all providers. "
f"Tried: {', '.join(providers_tried)}"
)
logger.error(error_msg)
raise Exception(error_msg) from last_error
def add_provider(self, provider_name: str) -> None:
"""Add a provider to the failover chain.
Args:
provider_name: Name of provider to add.
"""
if provider_name not in self._providers:
self._providers.append(provider_name)
logger.info("Added provider to failover chain: %s", provider_name)
def remove_provider(self, provider_name: str) -> bool:
"""Remove a provider from the failover chain.
Args:
provider_name: Name of provider to remove.
Returns:
True if removed, False if not found.
"""
if provider_name in self._providers:
self._providers.remove(provider_name)
logger.info(
f"Removed provider from failover chain: {provider_name}"
)
return True
return False
def get_providers(self) -> List[str]:
"""Get list of all providers in failover chain.
Returns:
List of provider names.
"""
return self._providers.copy()
def set_provider_priority(
self, provider_name: str, priority_index: int
) -> bool:
"""Set priority of a provider by moving it in the chain.
Args:
provider_name: Name of provider to prioritize.
priority_index: New index position (0 = highest priority).
Returns:
True if updated, False if provider not found.
"""
if provider_name not in self._providers:
return False
self._providers.remove(provider_name)
self._providers.insert(
min(priority_index, len(self._providers)), provider_name
)
logger.info(
f"Set provider {provider_name} priority to index {priority_index}"
)
return True
def get_failover_stats(self) -> Dict[str, Any]:
"""Get failover statistics and configuration.
Returns:
Dictionary with failover stats.
"""
stats = {
"total_providers": len(self._providers),
"providers": self._providers.copy(),
"current_provider": self.get_current_provider(),
"max_retries": self._max_retries,
"retry_delay": self._retry_delay,
"health_monitoring_enabled": self._enable_health_monitoring,
}
if self._health_monitor:
available = self._health_monitor.get_available_providers()
stats["available_providers"] = [
p for p in self._providers if p in available
]
stats["unavailable_providers"] = [
p for p in self._providers if p not in available
]
return stats
# Global failover instance
_failover: Optional[ProviderFailover] = None
def get_failover() -> ProviderFailover:
"""Get or create global provider failover instance.
Returns:
Global ProviderFailover instance.
"""
global _failover
if _failover is None:
_failover = ProviderFailover()
return _failover
def configure_failover(
providers: Optional[List[str]] = None,
max_retries: int = 3,
retry_delay: float = 1.0,
) -> ProviderFailover:
"""Configure global provider failover instance.
Args:
providers: List of provider names to use.
max_retries: Maximum retry attempts per provider.
retry_delay: Delay between retries in seconds.
Returns:
Configured ProviderFailover instance.
"""
global _failover
_failover = ProviderFailover(
providers=providers,
max_retries=max_retries,
retry_delay=retry_delay,
)
return _failover

View File

@@ -0,0 +1,416 @@
"""Provider health monitoring system for tracking availability and performance.
This module provides health monitoring capabilities for anime providers,
tracking metrics like availability, response times, success rates, and
bandwidth usage.
"""
import asyncio
import logging
from collections import defaultdict, deque
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Deque, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class ProviderHealthMetrics:
"""Health metrics for a single provider."""
provider_name: str
is_available: bool = True
last_check_time: Optional[datetime] = None
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
average_response_time_ms: float = 0.0
last_error: Optional[str] = None
last_error_time: Optional[datetime] = None
consecutive_failures: int = 0
total_bytes_downloaded: int = 0
uptime_percentage: float = 100.0
@property
def success_rate(self) -> float:
"""Calculate success rate as percentage."""
if self.total_requests == 0:
return 0.0
return (self.successful_requests / self.total_requests) * 100
@property
def failure_rate(self) -> float:
"""Calculate failure rate as percentage."""
return 100.0 - self.success_rate
def to_dict(self) -> Dict[str, Any]:
"""Convert metrics to dictionary."""
return {
"provider_name": self.provider_name,
"is_available": self.is_available,
"last_check_time": (
self.last_check_time.isoformat()
if self.last_check_time
else None
),
"total_requests": self.total_requests,
"successful_requests": self.successful_requests,
"failed_requests": self.failed_requests,
"success_rate": round(self.success_rate, 2),
"average_response_time_ms": round(
self.average_response_time_ms, 2
),
"last_error": self.last_error,
"last_error_time": (
self.last_error_time.isoformat()
if self.last_error_time
else None
),
"consecutive_failures": self.consecutive_failures,
"total_bytes_downloaded": self.total_bytes_downloaded,
"uptime_percentage": round(self.uptime_percentage, 2),
}
@dataclass
class RequestMetric:
"""Individual request metric."""
timestamp: datetime
success: bool
response_time_ms: float
bytes_transferred: int = 0
error_message: Optional[str] = None
class ProviderHealthMonitor:
"""Monitors health and performance of anime providers."""
def __init__(
self,
max_history_size: int = 1000,
health_check_interval: int = 300, # 5 minutes
failure_threshold: int = 3,
):
"""Initialize provider health monitor.
Args:
max_history_size: Maximum number of request metrics to keep
per provider.
health_check_interval: Interval between health checks in
seconds.
failure_threshold: Number of consecutive failures before
marking unavailable.
"""
self._max_history_size = max_history_size
self._health_check_interval = health_check_interval
self._failure_threshold = failure_threshold
# Provider metrics storage
self._metrics: Dict[str, ProviderHealthMetrics] = {}
self._request_history: Dict[str, Deque[RequestMetric]] = defaultdict(
lambda: deque(maxlen=max_history_size)
)
# Health check task
self._health_check_task: Optional[asyncio.Task] = None
self._is_running = False
logger.info("Provider health monitor initialized")
def start_monitoring(self) -> None:
"""Start background health monitoring."""
if self._is_running:
logger.warning("Health monitoring already running")
return
self._is_running = True
self._health_check_task = asyncio.create_task(
self._health_check_loop()
)
logger.info("Provider health monitoring started")
async def stop_monitoring(self) -> None:
"""Stop background health monitoring."""
self._is_running = False
if self._health_check_task:
self._health_check_task.cancel()
try:
await self._health_check_task
except asyncio.CancelledError:
pass
self._health_check_task = None
logger.info("Provider health monitoring stopped")
async def _health_check_loop(self) -> None:
"""Background health check loop."""
while self._is_running:
try:
await self._perform_health_checks()
await asyncio.sleep(self._health_check_interval)
except asyncio.CancelledError:
break
except Exception as e:
logger.exception("Error in health check loop: %s", e)
await asyncio.sleep(self._health_check_interval)
async def _perform_health_checks(self) -> None:
"""Perform health checks on all registered providers."""
for provider_name in list(self._metrics.keys()):
try:
metrics = self._metrics[provider_name]
metrics.last_check_time = datetime.now()
# Update uptime percentage based on recent history
recent_metrics = self._get_recent_metrics(
provider_name, minutes=60
)
if recent_metrics:
successful = sum(1 for m in recent_metrics if m.success)
metrics.uptime_percentage = (
successful / len(recent_metrics)
) * 100
logger.debug(
f"Health check for {provider_name}: "
f"available={metrics.is_available}, "
f"success_rate={metrics.success_rate:.2f}%"
)
except Exception as e:
logger.error(
f"Error checking health for {provider_name}: {e}",
exc_info=True,
)
def record_request(
self,
provider_name: str,
success: bool,
response_time_ms: float,
bytes_transferred: int = 0,
error_message: Optional[str] = None,
) -> None:
"""Record a provider request for health tracking.
Args:
provider_name: Name of the provider.
success: Whether the request was successful.
response_time_ms: Response time in milliseconds.
bytes_transferred: Number of bytes transferred.
error_message: Error message if request failed.
"""
# Initialize metrics if not exists
if provider_name not in self._metrics:
self._metrics[provider_name] = ProviderHealthMetrics(
provider_name=provider_name
)
metrics = self._metrics[provider_name]
# Update request counts
metrics.total_requests += 1
if success:
metrics.successful_requests += 1
metrics.consecutive_failures = 0
else:
metrics.failed_requests += 1
metrics.consecutive_failures += 1
metrics.last_error = error_message
metrics.last_error_time = datetime.now()
# Update availability based on consecutive failures
if metrics.consecutive_failures >= self._failure_threshold:
if metrics.is_available:
logger.warning(
f"Provider {provider_name} marked as unavailable after "
f"{metrics.consecutive_failures} consecutive failures"
)
metrics.is_available = False
else:
metrics.is_available = True
# Update average response time
total_time = metrics.average_response_time_ms * (
metrics.total_requests - 1
)
metrics.average_response_time_ms = (
total_time + response_time_ms
) / metrics.total_requests
# Update bytes transferred
metrics.total_bytes_downloaded += bytes_transferred
# Store request metric in history
request_metric = RequestMetric(
timestamp=datetime.now(),
success=success,
response_time_ms=response_time_ms,
bytes_transferred=bytes_transferred,
error_message=error_message,
)
self._request_history[provider_name].append(request_metric)
logger.debug(
f"Recorded request for {provider_name}: "
f"success={success}, time={response_time_ms:.2f}ms"
)
def get_provider_metrics(
self, provider_name: str
) -> Optional[ProviderHealthMetrics]:
"""Get health metrics for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
Provider health metrics or None if not found.
"""
return self._metrics.get(provider_name)
def get_all_metrics(self) -> Dict[str, ProviderHealthMetrics]:
"""Get health metrics for all providers.
Returns:
Dictionary mapping provider names to their metrics.
"""
return self._metrics.copy()
def get_available_providers(self) -> List[str]:
"""Get list of currently available providers.
Returns:
List of available provider names.
"""
return [
name
for name, metrics in self._metrics.items()
if metrics.is_available
]
def get_best_provider(self) -> Optional[str]:
"""Get the best performing available provider.
Best is determined by:
1. Availability
2. Success rate
3. Response time
Returns:
Name of best provider or None if none available.
"""
available = [
(name, metrics)
for name, metrics in self._metrics.items()
if metrics.is_available
]
if not available:
return None
# Sort by success rate (descending) then response time (ascending)
available.sort(
key=lambda x: (-x[1].success_rate, x[1].average_response_time_ms)
)
best_provider = available[0][0]
logger.debug("Best provider selected: %s", best_provider)
return best_provider
def _get_recent_metrics(
self, provider_name: str, minutes: int = 60
) -> List[RequestMetric]:
"""Get recent request metrics for a provider.
Args:
provider_name: Name of the provider.
minutes: Number of minutes to look back.
Returns:
List of recent request metrics.
"""
if provider_name not in self._request_history:
return []
cutoff_time = datetime.now() - timedelta(minutes=minutes)
return [
metric
for metric in self._request_history[provider_name]
if metric.timestamp >= cutoff_time
]
def reset_provider_metrics(self, provider_name: str) -> bool:
"""Reset metrics for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
True if reset successful, False if provider not found.
"""
if provider_name not in self._metrics:
return False
self._metrics[provider_name] = ProviderHealthMetrics(
provider_name=provider_name
)
self._request_history[provider_name].clear()
logger.info("Reset metrics for provider: %s", provider_name)
return True
def get_health_summary(self) -> Dict[str, Any]:
"""Get summary of overall provider health.
Returns:
Dictionary with health summary statistics.
"""
total_providers = len(self._metrics)
available_providers = len(self.get_available_providers())
if total_providers == 0:
return {
"total_providers": 0,
"available_providers": 0,
"availability_percentage": 0.0,
"average_success_rate": 0.0,
"average_response_time_ms": 0.0,
}
avg_success_rate = sum(
m.success_rate for m in self._metrics.values()
) / total_providers
avg_response_time = sum(
m.average_response_time_ms for m in self._metrics.values()
) / total_providers
return {
"total_providers": total_providers,
"available_providers": available_providers,
"availability_percentage": (
available_providers / total_providers
)
* 100,
"average_success_rate": round(avg_success_rate, 2),
"average_response_time_ms": round(avg_response_time, 2),
"providers": {
name: metrics.to_dict()
for name, metrics in self._metrics.items()
},
}
# Global health monitor instance
_health_monitor: Optional[ProviderHealthMonitor] = None
def get_health_monitor() -> ProviderHealthMonitor:
"""Get or create global provider health monitor instance.
Returns:
Global ProviderHealthMonitor instance.
"""
global _health_monitor
if _health_monitor is None:
_health_monitor = ProviderHealthMonitor()
return _health_monitor

View File

@@ -0,0 +1,307 @@
"""Performance monitoring wrapper for anime providers.
This module provides a wrapper that adds automatic performance tracking
to any provider implementation.
"""
import logging
import time
from typing import Any, Callable, Dict, List, Optional
from src.server.providers.base_provider import Loader
from src.server.providers.health_monitor import get_health_monitor
logger = logging.getLogger(__name__)
class MonitoredProviderWrapper(Loader):
"""Wrapper that adds performance monitoring to any provider."""
def __init__(
self,
provider: Loader,
enable_monitoring: bool = True,
):
"""Initialize monitored provider wrapper.
Args:
provider: Provider instance to wrap.
enable_monitoring: Whether to enable performance monitoring.
"""
self._provider = provider
self._enable_monitoring = enable_monitoring
self._health_monitor = (
get_health_monitor() if enable_monitoring else None
)
logger.info(
f"Monitoring wrapper initialized for provider: "
f"{provider.get_site_key()}"
)
def _record_operation(
self,
operation_name: str,
start_time: float,
success: bool,
bytes_transferred: int = 0,
error_message: Optional[str] = None,
) -> None:
"""Record operation metrics.
Args:
operation_name: Name of the operation.
start_time: Operation start time (from time.time()).
success: Whether operation succeeded.
bytes_transferred: Number of bytes transferred.
error_message: Error message if operation failed.
"""
if not self._enable_monitoring or not self._health_monitor:
return
elapsed_ms = (time.time() - start_time) * 1000
provider_name = self._provider.get_site_key()
self._health_monitor.record_request(
provider_name=provider_name,
success=success,
response_time_ms=elapsed_ms,
bytes_transferred=bytes_transferred,
error_message=error_message,
)
if success:
logger.debug(
f"{operation_name} succeeded for {provider_name} "
f"in {elapsed_ms:.2f}ms"
)
else:
logger.warning(
f"{operation_name} failed for {provider_name} "
f"in {elapsed_ms:.2f}ms: {error_message}"
)
def search(self, word: str) -> List[Dict[str, Any]]:
"""Search for anime series by name (with monitoring).
Args:
word: Search term to look for.
Returns:
List of found series as dictionaries.
"""
start_time = time.time()
try:
result = self._provider.search(word)
self._record_operation(
operation_name="search",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="search",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
def is_language(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
) -> bool:
"""Check if episode exists in specified language (monitored).
Args:
season: Season number (1-indexed).
episode: Episode number (1-indexed).
key: Unique series identifier/key.
language: Language to check (default: German Dub).
Returns:
True if episode exists in specified language.
"""
start_time = time.time()
try:
result = self._provider.is_language(
season, episode, key, language
)
self._record_operation(
operation_name="is_language",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="is_language",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
def download(
self,
base_directory: str,
serie_folder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub",
progress_callback: Optional[Callable[[str, Dict], None]] = None,
) -> bool:
"""Download episode to specified directory (with monitoring).
Args:
base_directory: Base directory for downloads.
serie_folder: Series folder name.
season: Season number.
episode: Episode number.
key: Unique series identifier/key.
language: Language version to download.
progress_callback: Optional callback for progress updates.
Returns:
True if download successful.
"""
start_time = time.time()
bytes_transferred = 0
# Wrap progress callback to track bytes
if progress_callback and self._enable_monitoring:
def monitored_callback(event_type: str, data: Dict) -> None:
nonlocal bytes_transferred
if event_type == "progress" and "downloaded" in data:
bytes_transferred = data.get("downloaded", 0)
progress_callback(event_type, data)
wrapped_callback = monitored_callback
else:
wrapped_callback = progress_callback
try:
result = self._provider.download(
base_directory=base_directory,
serie_folder=serie_folder,
season=season,
episode=episode,
key=key,
language=language,
progress_callback=wrapped_callback,
)
self._record_operation(
operation_name="download",
start_time=start_time,
success=result,
bytes_transferred=bytes_transferred,
)
return result
except Exception as e:
self._record_operation(
operation_name="download",
start_time=start_time,
success=False,
bytes_transferred=bytes_transferred,
error_message=str(e),
)
raise
def get_site_key(self) -> str:
"""Get the site key/identifier for this provider.
Returns:
Site key string.
"""
return self._provider.get_site_key()
def get_title(self, key: str) -> str:
"""Get the human-readable title of a series.
Args:
key: Unique series identifier/key.
Returns:
Series title string.
"""
start_time = time.time()
try:
result = self._provider.get_title(key)
self._record_operation(
operation_name="get_title",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="get_title",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
"""Get season and episode counts for a series.
Args:
slug: Series slug/key identifier.
Returns:
Dictionary mapping season number to episode count.
"""
start_time = time.time()
try:
result = self._provider.get_season_episode_count(slug)
self._record_operation(
operation_name="get_season_episode_count",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="get_season_episode_count",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
@property
def wrapped_provider(self) -> Loader:
"""Get the underlying provider instance.
Returns:
Wrapped provider instance.
"""
return self._provider
def wrap_provider(
provider: Loader,
enable_monitoring: bool = True,
) -> Loader:
"""Wrap a provider with performance monitoring.
Args:
provider: Provider to wrap.
enable_monitoring: Whether to enable monitoring.
Returns:
Monitored provider wrapper.
"""
if isinstance(provider, MonitoredProviderWrapper):
# Already wrapped
return provider
return MonitoredProviderWrapper(
provider=provider,
enable_monitoring=enable_monitoring,
)

View File

@@ -0,0 +1,79 @@
"""Shared provider configuration constants for AniWorld providers.
Centralizes user-agent strings, provider lists and common headers so
multiple provider implementations can import a single source of truth.
"""
from enum import Enum
from typing import Dict, List
class ProviderType(str, Enum):
"""Enumeration of supported video providers."""
VOE = "VOE"
DOODSTREAM = "Doodstream"
VIDMOLY = "Vidmoly"
VIDOZA = "Vidoza"
SPEEDFILES = "SpeedFiles"
STREAMTAPE = "Streamtape"
LULUVDO = "Luluvdo"
DEFAULT_PROVIDERS: List[str] = [
ProviderType.VOE.value,
ProviderType.DOODSTREAM.value,
ProviderType.VIDMOLY.value,
ProviderType.VIDOZA.value,
ProviderType.SPEEDFILES.value,
ProviderType.STREAMTAPE.value,
ProviderType.LULUVDO.value,
]
ANIWORLD_HEADERS: Dict[str, str] = {
"accept": (
"text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,image/apng,*/*;q=0.8"
),
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": (
"de,de-DE;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"
),
"cache-control": "max-age=0",
"priority": "u=0, i",
"sec-ch-ua": (
'"Chromium";v="136", "Microsoft Edge";v="136", '
'"Not.A/Brand";v="99"'
),
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
),
}
INVALID_PATH_CHARS: List[str] = [
"<",
">",
":",
'"',
"/",
"\\",
"|",
"?",
"*",
"&",
]
LULUVDO_USER_AGENT = (
"Mozilla/5.0 (Android 15; Mobile; rv:132.0) "
"Gecko/132.0 Firefox/132.0"
)
# Default download timeout (seconds)
DEFAULT_DOWNLOAD_TIMEOUT = 600

View File

@@ -0,0 +1,56 @@
"""Provider factory for managing anime content providers.
This module provides a factory class for accessing different anime content
providers (loaders). The factory uses provider identifiers (keys) to return
the appropriate provider instance.
Note: The 'key' parameter in this factory refers to the provider identifier
(e.g., 'aniworld.to'), not to be confused with series keys used within
providers to identify specific anime series.
"""
from typing import Dict
from .aniworld_provider import AniworldLoader
from .base_provider import Loader
class Loaders:
"""Factory class for managing and retrieving anime content providers.
This factory maintains a registry of available providers and provides
access to them via provider keys. Each provider implements the Loader
interface for searching and downloading anime content.
Attributes:
dict: Dictionary mapping provider keys to provider instances.
Provider keys are site identifiers (e.g., 'aniworld.to').
"""
def __init__(self) -> None:
"""Initialize the provider factory with available providers.
Currently supports:
- 'aniworld.to': AniworldLoader for aniworld.to content
"""
self.dict: Dict[str, Loader] = {"aniworld.to": AniworldLoader()}
def GetLoader(self, key: str) -> Loader:
"""Retrieve a provider instance by its provider key.
Args:
key: Provider identifier (e.g., 'aniworld.to').
This is the site/provider key, not a series key.
Returns:
Loader instance for the specified provider.
Raises:
KeyError: If the provider key is not found in the registry.
Note:
The 'key' parameter here identifies the provider/site, while
series-specific operations on the returned Loader use series
keys to identify individual anime series.
"""
return self.dict[key]

View File

@@ -0,0 +1,27 @@
from abc import ABC, abstractmethod
from typing import Any
class Provider(ABC):
"""Abstract base class for streaming providers."""
@abstractmethod
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict[str, Any]]:
"""
Extract direct download link from embedded player link.
Args:
embedded_link: URL of the embedded player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link: str, headers: dict)
- direct_link: Direct URL to download resource
- headers: Dictionary of HTTP headers to use for download
"""
raise NotImplementedError(
"Streaming providers must implement get_link"
)

View File

@@ -0,0 +1,139 @@
import base64
import json
import re
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from .Provider import Provider
# Precompile the different pattern matchers used during extraction:
# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
# script so we can follow the provider's hand-off.
# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
# field once decoded.
# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
# no direct MP4 link is present.
REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
B64_PATTERN = re.compile(r"var a168c='([^']+)'")
HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")
class VOE(Provider):
"""VOE video provider implementation."""
def __init__(self):
self.RANDOM_USER_AGENT = UserAgent().random
self.Header = {"User-Agent": self.RANDOM_USER_AGENT}
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict]:
"""
Extract direct download link from VOE embedded player.
Args:
embedded_link: URL of the embedded VOE player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link, headers)
"""
self.session = requests.Session()
# Configure retries with backoff
retries = Retry(
total=5, # Number of retries
backoff_factor=1, # Delay multiplier (1s, 2s, 4s, ...)
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET"],
)
adapter = HTTPAdapter(max_retries=retries)
self.session.mount("https://", adapter)
timeout = 30
response = self.session.get(
embedded_link,
headers={"User-Agent": self.RANDOM_USER_AGENT},
timeout=timeout,
)
redirect = re.search(r"https?://[^'\"<>]+", response.text)
if not redirect:
raise ValueError("No redirect found.")
redirect_url = redirect.group(0)
parts = redirect_url.strip().split("/")
self.Header["Referer"] = f"{parts[0]}//{parts[2]}/"
response = self.session.get(
redirect_url, headers={"User-Agent": self.RANDOM_USER_AGENT}
)
html = response.content
# Method 1: Extract from script tag
extracted = self.extract_voe_from_script(html)
if extracted:
return extracted, self.Header
# Method 2: Extract from base64 encoded variable
htmlText = html.decode("utf-8")
b64_match = B64_PATTERN.search(htmlText)
if b64_match:
decoded = base64.b64decode(b64_match.group(1)).decode()[::-1]
source = json.loads(decoded).get("source")
if source:
return source, self.Header
# Method 3: Extract HLS source
hls_match = HLS_PATTERN.search(htmlText)
if hls_match:
decoded_hls = base64.b64decode(hls_match.group("hls")).decode()
return decoded_hls, self.Header
raise ValueError("Could not extract download link from VOE")
def shift_letters(self, input_str: str) -> str:
"""Apply ROT13 shift to letters."""
result = ""
for c in input_str:
code = ord(c)
if 65 <= code <= 90:
code = (code - 65 + 13) % 26 + 65
elif 97 <= code <= 122:
code = (code - 97 + 13) % 26 + 97
result += chr(code)
return result
def replace_junk(self, input_str: str) -> str:
"""Replace junk character sequences."""
junk_parts = ["@$", "^^", "~@", "%?", "*~", "!!", "#&"]
for part in junk_parts:
input_str = re.sub(re.escape(part), "_", input_str)
return input_str
def shift_back(self, s: str, n: int) -> str:
"""Shift characters back by n positions."""
return "".join(chr(ord(c) - n) for c in s)
def decode_voe_string(self, encoded: str) -> dict:
"""Decode VOE-encoded string to extract video source."""
step1 = self.shift_letters(encoded)
step2 = self.replace_junk(step1).replace("_", "")
step3 = base64.b64decode(step2).decode()
step4 = self.shift_back(step3, 3)
step5 = base64.b64decode(step4[::-1]).decode()
return json.loads(step5)
def extract_voe_from_script(self, html: bytes) -> str:
"""Extract download link from VOE script tag."""
soup = BeautifulSoup(html, "html.parser")
script = soup.find("script", type="application/json")
return self.decode_voe_string(script.text[2:-2])["source"]