refactor: restructure core→server, split large entity files into database module
- Move src/core/ → src/server/ - Split SerieList.py (531 lines) and series.py (414 lines) into src/server/database/ - Add database/models.py for SQLAlchemy models - Update all test imports to reflect new structure - Remove deprecated test files (test_serie_class.py, test_serie_folder_with_year.py)
This commit is contained in:
0
src/server/providers/__init__.py
Normal file
0
src/server/providers/__init__.py
Normal file
1062
src/server/providers/aniworld_provider.py
Normal file
1062
src/server/providers/aniworld_provider.py
Normal file
File diff suppressed because it is too large
Load Diff
104
src/server/providers/base_provider.py
Normal file
104
src/server/providers/base_provider.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
class Loader(ABC):
|
||||
"""Abstract base class for anime data loaders/providers."""
|
||||
@abstractmethod
|
||||
def subscribe_download_progress(self, handler):
|
||||
"""Subscribe a handler to the download_progress event.
|
||||
Args:
|
||||
handler: Callable to be called with progress dict.
|
||||
"""
|
||||
@abstractmethod
|
||||
def unsubscribe_download_progress(self, handler):
|
||||
"""Unsubscribe a handler from the download_progress event.
|
||||
Args:
|
||||
handler: Callable previously subscribed.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def search(self, word: str) -> List[Dict[str, Any]]:
|
||||
"""Search for anime series by name.
|
||||
|
||||
Args:
|
||||
word: Search term to look for
|
||||
|
||||
Returns:
|
||||
List of found series as dictionaries containing series information
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def is_language(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
) -> bool:
|
||||
"""Check if episode exists in specified language.
|
||||
|
||||
Args:
|
||||
season: Season number (1-indexed)
|
||||
episode: Episode number (1-indexed)
|
||||
key: Unique series identifier/key
|
||||
language: Language to check (default: German Dub)
|
||||
|
||||
Returns:
|
||||
True if episode exists in specified language, False otherwise
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def download(
|
||||
self,
|
||||
base_directory: str,
|
||||
serie_folder: str,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub"
|
||||
) -> bool:
|
||||
"""Download episode to specified directory.
|
||||
|
||||
Args:
|
||||
base_directory: Base directory for downloads
|
||||
serie_folder: Series folder name within base directory
|
||||
season: Season number (0 for movies, 1+ for series)
|
||||
episode: Episode number within season
|
||||
key: Unique series identifier/key
|
||||
language: Language version to download (default: German Dub)
|
||||
|
||||
Returns:
|
||||
True if download successful, False otherwise
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_site_key(self) -> str:
|
||||
"""Get the site key/identifier for this provider.
|
||||
|
||||
Returns:
|
||||
Site key string (e.g., 'aniworld.to', 'voe.com')
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_title(self, key: str) -> str:
|
||||
"""Get the human-readable title of a series.
|
||||
|
||||
Args:
|
||||
key: Unique series identifier/key
|
||||
|
||||
Returns:
|
||||
Series title string
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
|
||||
"""Get season and episode counts for a series.
|
||||
|
||||
Args:
|
||||
slug: Series slug/key identifier
|
||||
|
||||
Returns:
|
||||
Dictionary mapping season number (int) to episode count (int)
|
||||
"""
|
||||
|
||||
351
src/server/providers/config_manager.py
Normal file
351
src/server/providers/config_manager.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""Dynamic provider configuration management.
|
||||
|
||||
This module provides runtime configuration management for anime providers,
|
||||
allowing dynamic updates without application restart.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import asdict, dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProviderSettings:
|
||||
"""Configuration settings for a single provider."""
|
||||
|
||||
name: str
|
||||
enabled: bool = True
|
||||
priority: int = 0
|
||||
timeout_seconds: int = 30
|
||||
max_retries: int = 3
|
||||
retry_delay_seconds: float = 1.0
|
||||
max_concurrent_downloads: int = 3
|
||||
bandwidth_limit_mbps: Optional[float] = None
|
||||
custom_headers: Optional[Dict[str, str]] = None
|
||||
custom_params: Optional[Dict[str, Any]] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert settings to dictionary."""
|
||||
return {
|
||||
k: v for k, v in asdict(self).items() if v is not None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "ProviderSettings":
|
||||
"""Create settings from dictionary."""
|
||||
return cls(**{k: v for k, v in data.items() if hasattr(cls, k)})
|
||||
|
||||
|
||||
class ProviderConfigManager:
|
||||
"""Manages dynamic configuration for anime providers."""
|
||||
|
||||
def __init__(self, config_file: Optional[Path] = None):
|
||||
"""Initialize provider configuration manager.
|
||||
|
||||
Args:
|
||||
config_file: Path to configuration file (optional).
|
||||
"""
|
||||
self._config_file = config_file
|
||||
self._provider_settings: Dict[str, ProviderSettings] = {}
|
||||
self._global_settings: Dict[str, Any] = {
|
||||
"default_timeout": 30,
|
||||
"default_max_retries": 3,
|
||||
"default_retry_delay": 1.0,
|
||||
"enable_health_monitoring": True,
|
||||
"enable_failover": True,
|
||||
}
|
||||
|
||||
# Load configuration if file exists
|
||||
if config_file and config_file.exists():
|
||||
self.load_config()
|
||||
|
||||
logger.info("Provider configuration manager initialized")
|
||||
|
||||
def get_provider_settings(
|
||||
self, provider_name: str
|
||||
) -> Optional[ProviderSettings]:
|
||||
"""Get settings for a specific provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
|
||||
Returns:
|
||||
Provider settings or None if not configured.
|
||||
"""
|
||||
return self._provider_settings.get(provider_name)
|
||||
|
||||
def set_provider_settings(
|
||||
self, provider_name: str, settings: ProviderSettings
|
||||
) -> None:
|
||||
"""Set settings for a specific provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
settings: Provider settings to apply.
|
||||
"""
|
||||
self._provider_settings[provider_name] = settings
|
||||
logger.info("Updated settings for provider: %s", provider_name)
|
||||
|
||||
def update_provider_settings(
|
||||
self, provider_name: str, **kwargs
|
||||
) -> bool:
|
||||
"""Update specific provider settings.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
**kwargs: Settings to update.
|
||||
|
||||
Returns:
|
||||
True if updated, False if provider not found.
|
||||
"""
|
||||
if provider_name not in self._provider_settings:
|
||||
# Create new settings
|
||||
self._provider_settings[provider_name] = ProviderSettings(
|
||||
name=provider_name, **kwargs
|
||||
)
|
||||
logger.info("Created new settings for provider: %s", provider_name) # noqa: E501
|
||||
return True
|
||||
|
||||
settings = self._provider_settings[provider_name]
|
||||
|
||||
# Update settings
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(settings, key):
|
||||
setattr(settings, key, value)
|
||||
|
||||
logger.info(
|
||||
f"Updated settings for provider {provider_name}: {kwargs}"
|
||||
)
|
||||
return True
|
||||
|
||||
def get_all_provider_settings(self) -> Dict[str, ProviderSettings]:
|
||||
"""Get settings for all configured providers.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping provider names to their settings.
|
||||
"""
|
||||
return self._provider_settings.copy()
|
||||
|
||||
def get_enabled_providers(self) -> List[str]:
|
||||
"""Get list of enabled providers.
|
||||
|
||||
Returns:
|
||||
List of enabled provider names.
|
||||
"""
|
||||
return [
|
||||
name
|
||||
for name, settings in self._provider_settings.items()
|
||||
if settings.enabled
|
||||
]
|
||||
|
||||
def enable_provider(self, provider_name: str) -> bool:
|
||||
"""Enable a provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
|
||||
Returns:
|
||||
True if enabled, False if not found.
|
||||
"""
|
||||
if provider_name in self._provider_settings:
|
||||
self._provider_settings[provider_name].enabled = True
|
||||
logger.info("Enabled provider: %s", provider_name)
|
||||
return True
|
||||
return False
|
||||
|
||||
def disable_provider(self, provider_name: str) -> bool:
|
||||
"""Disable a provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
|
||||
Returns:
|
||||
True if disabled, False if not found.
|
||||
"""
|
||||
if provider_name in self._provider_settings:
|
||||
self._provider_settings[provider_name].enabled = False
|
||||
logger.info("Disabled provider: %s", provider_name)
|
||||
return True
|
||||
return False
|
||||
|
||||
def set_provider_priority(
|
||||
self, provider_name: str, priority: int
|
||||
) -> bool:
|
||||
"""Set priority for a provider.
|
||||
|
||||
Lower priority values = higher priority.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
priority: Priority value (lower = higher priority).
|
||||
|
||||
Returns:
|
||||
True if updated, False if not found.
|
||||
"""
|
||||
if provider_name in self._provider_settings:
|
||||
self._provider_settings[provider_name].priority = priority
|
||||
logger.info(
|
||||
f"Set priority for {provider_name} to {priority}"
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_providers_by_priority(self) -> List[str]:
|
||||
"""Get providers sorted by priority.
|
||||
|
||||
Returns:
|
||||
List of provider names sorted by priority (low to high).
|
||||
"""
|
||||
sorted_providers = sorted(
|
||||
self._provider_settings.items(),
|
||||
key=lambda x: x[1].priority,
|
||||
)
|
||||
return [name for name, _ in sorted_providers]
|
||||
|
||||
def get_global_setting(self, key: str) -> Optional[Any]:
|
||||
"""Get a global setting value.
|
||||
|
||||
Args:
|
||||
key: Setting key.
|
||||
|
||||
Returns:
|
||||
Setting value or None if not found.
|
||||
"""
|
||||
return self._global_settings.get(key)
|
||||
|
||||
def set_global_setting(self, key: str, value: Any) -> None:
|
||||
"""Set a global setting value.
|
||||
|
||||
Args:
|
||||
key: Setting key.
|
||||
value: Setting value.
|
||||
"""
|
||||
self._global_settings[key] = value
|
||||
logger.info("Updated global setting %s: %s", key, value)
|
||||
|
||||
def get_all_global_settings(self) -> Dict[str, Any]:
|
||||
"""Get all global settings.
|
||||
|
||||
Returns:
|
||||
Dictionary of global settings.
|
||||
"""
|
||||
return self._global_settings.copy()
|
||||
|
||||
def load_config(self, file_path: Optional[Path] = None) -> bool:
|
||||
"""Load configuration from file.
|
||||
|
||||
Args:
|
||||
file_path: Path to configuration file (uses default if None).
|
||||
|
||||
Returns:
|
||||
True if loaded successfully, False otherwise.
|
||||
"""
|
||||
config_path = file_path or self._config_file
|
||||
if not config_path or not config_path.exists():
|
||||
logger.warning(
|
||||
f"Configuration file not found: {config_path}"
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Load provider settings
|
||||
if "providers" in data:
|
||||
for name, settings_data in data["providers"].items():
|
||||
self._provider_settings[name] = (
|
||||
ProviderSettings.from_dict(settings_data)
|
||||
)
|
||||
|
||||
# Load global settings
|
||||
if "global" in data:
|
||||
self._global_settings.update(data["global"])
|
||||
|
||||
logger.info(
|
||||
f"Loaded configuration from {config_path} "
|
||||
f"({len(self._provider_settings)} providers)"
|
||||
)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to load configuration from {config_path}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
def save_config(self, file_path: Optional[Path] = None) -> bool:
|
||||
"""Save configuration to file.
|
||||
|
||||
Args:
|
||||
file_path: Path to save to (uses default if None).
|
||||
|
||||
Returns:
|
||||
True if saved successfully, False otherwise.
|
||||
"""
|
||||
config_path = file_path or self._config_file
|
||||
if not config_path:
|
||||
logger.error("No configuration file path specified")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Ensure parent directory exists
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = {
|
||||
"providers": {
|
||||
name: settings.to_dict()
|
||||
for name, settings in self._provider_settings.items()
|
||||
},
|
||||
"global": self._global_settings,
|
||||
}
|
||||
|
||||
with open(config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
logger.info("Saved configuration to %s", config_path)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to save configuration to {config_path}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
return False
|
||||
|
||||
def reset_to_defaults(self) -> None:
|
||||
"""Reset all settings to defaults."""
|
||||
self._provider_settings.clear()
|
||||
self._global_settings = {
|
||||
"default_timeout": 30,
|
||||
"default_max_retries": 3,
|
||||
"default_retry_delay": 1.0,
|
||||
"enable_health_monitoring": True,
|
||||
"enable_failover": True,
|
||||
}
|
||||
logger.info("Reset configuration to defaults")
|
||||
|
||||
|
||||
# Global configuration manager instance
|
||||
_config_manager: Optional[ProviderConfigManager] = None
|
||||
|
||||
|
||||
def get_config_manager(
|
||||
config_file: Optional[Path] = None,
|
||||
) -> ProviderConfigManager:
|
||||
"""Get or create global provider configuration manager.
|
||||
|
||||
Args:
|
||||
config_file: Configuration file path (used on first call).
|
||||
|
||||
Returns:
|
||||
Global ProviderConfigManager instance.
|
||||
"""
|
||||
global _config_manager
|
||||
if _config_manager is None:
|
||||
_config_manager = ProviderConfigManager(config_file=config_file)
|
||||
return _config_manager
|
||||
998
src/server/providers/enhanced_provider.py
Normal file
998
src/server/providers/enhanced_provider.py
Normal file
@@ -0,0 +1,998 @@
|
||||
"""
|
||||
Enhanced AniWorld Loader with Error Handling and Recovery
|
||||
|
||||
This module extends the original AniWorldLoader with comprehensive
|
||||
error handling, retry mechanisms, and recovery strategies.
|
||||
"""
|
||||
|
||||
import html
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from fake_useragent import UserAgent
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
from yt_dlp import YoutubeDL
|
||||
|
||||
from ...infrastructure.security.file_integrity import get_integrity_manager
|
||||
from ..error_handler import (
|
||||
DownloadError,
|
||||
NetworkError,
|
||||
NonRetryableError,
|
||||
RetryableError,
|
||||
file_corruption_detector,
|
||||
recovery_strategies,
|
||||
with_error_recovery,
|
||||
)
|
||||
from ..interfaces.providers import Providers
|
||||
from .base_provider import Loader
|
||||
from .provider_config import (
|
||||
ANIWORLD_HEADERS,
|
||||
DEFAULT_PROVIDERS,
|
||||
INVALID_PATH_CHARS,
|
||||
LULUVDO_USER_AGENT,
|
||||
ProviderType,
|
||||
)
|
||||
|
||||
|
||||
def _cleanup_temp_file(
|
||||
temp_path: str,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
) -> None:
|
||||
"""Remove a temp file and any associated yt-dlp partial files.
|
||||
|
||||
Args:
|
||||
temp_path: Path to the primary temp file.
|
||||
logger: Optional logger for diagnostic messages.
|
||||
"""
|
||||
_log = logger or logging.getLogger(__name__)
|
||||
candidates = [temp_path]
|
||||
# yt-dlp creates fragment files like <file>.part
|
||||
candidates.extend(
|
||||
str(p) for p in Path(temp_path).parent.glob(
|
||||
Path(temp_path).name + ".*"
|
||||
)
|
||||
)
|
||||
for path in candidates:
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
os.remove(path)
|
||||
_log.debug(f"Removed temp file: {path}")
|
||||
except OSError as exc:
|
||||
_log.warning(f"Failed to remove temp file {path}: {exc}")
|
||||
|
||||
|
||||
class EnhancedAniWorldLoader(Loader):
|
||||
"""Aniworld provider with retry and recovery strategies.
|
||||
|
||||
Also exposes metrics hooks for download statistics.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.SUPPORTED_PROVIDERS = DEFAULT_PROVIDERS
|
||||
# local copy so modifications don't mutate shared constant
|
||||
self.AniworldHeaders = dict(ANIWORLD_HEADERS)
|
||||
self.INVALID_PATH_CHARS = INVALID_PATH_CHARS
|
||||
self.RANDOM_USER_AGENT = UserAgent().random
|
||||
self.LULUVDO_USER_AGENT = LULUVDO_USER_AGENT
|
||||
|
||||
self.PROVIDER_HEADERS = {
|
||||
ProviderType.VIDMOLY.value: ['Referer: "https://vidmoly.to"'],
|
||||
ProviderType.DOODSTREAM.value: [
|
||||
'Referer: "https://dood.li/"',
|
||||
'Referer: "https://playmogo.com/"',
|
||||
],
|
||||
ProviderType.VOE.value: [f'User-Agent: {self.RANDOM_USER_AGENT}'],
|
||||
ProviderType.LULUVDO.value: [
|
||||
f'User-Agent: {self.LULUVDO_USER_AGENT}',
|
||||
"Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
|
||||
'Origin: "https://luluvdo.com"',
|
||||
'Referer: "https://luluvdo.com/"',
|
||||
],
|
||||
}
|
||||
|
||||
self.ANIWORLD_TO = "https://aniworld.to"
|
||||
self.DEFAULT_REQUEST_TIMEOUT = 30
|
||||
|
||||
# Initialize session with enhanced retry configuration
|
||||
self.session = self._create_robust_session()
|
||||
|
||||
# Cache dictionaries
|
||||
self._KeyHTMLDict = {}
|
||||
self._EpisodeHTMLDict = {}
|
||||
|
||||
# Provider manager
|
||||
self.Providers = Providers()
|
||||
|
||||
# Download statistics
|
||||
self.download_stats = {
|
||||
'total_downloads': 0,
|
||||
'successful_downloads': 0,
|
||||
'failed_downloads': 0,
|
||||
'retried_downloads': 0
|
||||
}
|
||||
|
||||
# Read timeout from environment variable (string->int safely)
|
||||
self.download_timeout = int(os.getenv("DOWNLOAD_TIMEOUT") or "600")
|
||||
|
||||
# Setup logging
|
||||
self._setup_logging()
|
||||
|
||||
def _create_robust_session(self) -> requests.Session:
|
||||
"""Create a session with robust retry and error handling
|
||||
configuration.
|
||||
"""
|
||||
session = requests.Session()
|
||||
|
||||
# Configure retries so transient network problems are retried while we
|
||||
# still fail fast on permanent errors. The status codes cover
|
||||
# timeouts, rate limits, and the Cloudflare-origin 52x responses that
|
||||
# AniWorld occasionally emits under load.
|
||||
retries = Retry(
|
||||
total=5,
|
||||
backoff_factor=2, # More aggressive backoff
|
||||
status_forcelist=[
|
||||
408,
|
||||
429,
|
||||
500,
|
||||
502,
|
||||
503,
|
||||
504,
|
||||
520,
|
||||
521,
|
||||
522,
|
||||
523,
|
||||
524,
|
||||
],
|
||||
allowed_methods=["GET", "POST", "HEAD"],
|
||||
raise_on_status=False, # Handle status errors manually
|
||||
)
|
||||
|
||||
adapter = HTTPAdapter(
|
||||
max_retries=retries,
|
||||
pool_connections=10,
|
||||
pool_maxsize=20,
|
||||
pool_block=True
|
||||
)
|
||||
|
||||
session.mount("https://", adapter)
|
||||
session.mount("http://", adapter)
|
||||
|
||||
# Set default headers
|
||||
session.headers.update(self.AniworldHeaders)
|
||||
|
||||
return session
|
||||
|
||||
def _setup_logging(self):
|
||||
"""Setup specialized logging for download errors and missing keys."""
|
||||
# Determine project root so log files land in a predictable location
|
||||
# regardless of the working directory at runtime.
|
||||
_project_root = Path(__file__).parent.parent.parent.parent
|
||||
_logs_dir = _project_root / "logs"
|
||||
_logs_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# Download error logger — records every failed download attempt
|
||||
self.download_error_logger = logging.getLogger("DownloadErrors")
|
||||
if not self.download_error_logger.handlers:
|
||||
handler = logging.FileHandler(str(_logs_dir / "download_errors.log"))
|
||||
handler.setLevel(logging.ERROR)
|
||||
handler.setFormatter(formatter)
|
||||
self.download_error_logger.addHandler(handler)
|
||||
self.download_error_logger.setLevel(logging.ERROR)
|
||||
|
||||
# No-key logger — records episodes for which no stream key was found
|
||||
self.nokey_logger = logging.getLogger("NoKeyFound")
|
||||
if not self.nokey_logger.handlers:
|
||||
handler = logging.FileHandler(str(_logs_dir / "no_key_found.log"))
|
||||
handler.setLevel(logging.ERROR)
|
||||
handler.setFormatter(formatter)
|
||||
self.nokey_logger.addHandler(handler)
|
||||
self.nokey_logger.setLevel(logging.ERROR)
|
||||
|
||||
def ClearCache(self):
|
||||
"""Clear all cached data."""
|
||||
self._KeyHTMLDict.clear()
|
||||
self._EpisodeHTMLDict.clear()
|
||||
self.logger.debug("Cache cleared")
|
||||
|
||||
def RemoveFromCache(self):
|
||||
"""Remove episode HTML cache."""
|
||||
self._EpisodeHTMLDict.clear()
|
||||
self.logger.debug("Episode cache cleared")
|
||||
|
||||
@with_error_recovery(max_retries=3, context="anime_search")
|
||||
def Search(self, word: str) -> list:
|
||||
"""Search for anime with error handling."""
|
||||
if not word or not word.strip():
|
||||
raise ValueError("Search term cannot be empty")
|
||||
|
||||
search_url = (
|
||||
f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
|
||||
)
|
||||
|
||||
try:
|
||||
return self._fetch_anime_list_with_recovery(search_url)
|
||||
except Exception as e:
|
||||
self.logger.error("Search failed for term '%s': %s", word, e)
|
||||
raise RetryableError(f"Search failed: {e}") from e
|
||||
|
||||
def _fetch_anime_list_with_recovery(self, url: str) -> list:
|
||||
"""Fetch anime list with comprehensive error handling."""
|
||||
try:
|
||||
response = recovery_strategies.handle_network_failure(
|
||||
self.session.get,
|
||||
url,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
if not response.ok:
|
||||
if response.status_code == 404:
|
||||
raise NonRetryableError(f"URL not found: {url}")
|
||||
elif response.status_code == 403:
|
||||
raise NonRetryableError(f"Access forbidden: {url}")
|
||||
elif response.status_code >= 500:
|
||||
# Log suspicious server errors for monitoring
|
||||
self.logger.warning(
|
||||
f"Server error {response.status_code} from {url} "
|
||||
f"- will retry"
|
||||
)
|
||||
raise RetryableError(f"Server error {response.status_code}")
|
||||
else:
|
||||
raise RetryableError(f"HTTP error {response.status_code}")
|
||||
|
||||
return self._parse_anime_response(response.text)
|
||||
|
||||
except (requests.RequestException, ConnectionError) as e:
|
||||
raise NetworkError(f"Network error during anime search: {e}") from e
|
||||
|
||||
def _parse_anime_response(self, response_text: str) -> list:
|
||||
"""Parse anime search response with error handling."""
|
||||
if not response_text or not response_text.strip():
|
||||
raise ValueError("Empty response from server")
|
||||
|
||||
clean_text = response_text.strip()
|
||||
|
||||
# Quick fail for obviously non-JSON responses
|
||||
if not (clean_text.startswith('[') or clean_text.startswith('{')):
|
||||
# Check if it's HTML error page
|
||||
if clean_text.lower().startswith('<!doctype') or \
|
||||
clean_text.lower().startswith('<html'):
|
||||
raise ValueError("Received HTML instead of JSON")
|
||||
# If doesn't start with JSON markers, likely not JSON
|
||||
self.logger.warning(
|
||||
"Response doesn't start with JSON markers, "
|
||||
"attempting parse anyway"
|
||||
)
|
||||
|
||||
# Attempt increasingly permissive parsing strategies to cope with
|
||||
# upstream anomalies such as HTML escaping, stray BOM markers, and
|
||||
# injected control characters.
|
||||
parsing_strategies = [
|
||||
lambda text: json.loads(html.unescape(text)),
|
||||
lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
|
||||
lambda text: json.loads(re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text))
|
||||
]
|
||||
|
||||
for i, strategy in enumerate(parsing_strategies):
|
||||
try:
|
||||
decoded_data = strategy(clean_text)
|
||||
if isinstance(decoded_data, list):
|
||||
msg = (
|
||||
f"Successfully parsed anime response with "
|
||||
f"strategy {i + 1}"
|
||||
)
|
||||
self.logger.debug(msg)
|
||||
return decoded_data
|
||||
else:
|
||||
msg = (
|
||||
f"Strategy {i + 1} returned non-list data: "
|
||||
f"{type(decoded_data)}"
|
||||
)
|
||||
self.logger.warning(msg)
|
||||
except json.JSONDecodeError as e:
|
||||
msg = f"Parsing strategy {i + 1} failed: {e}"
|
||||
self.logger.debug(msg)
|
||||
continue
|
||||
|
||||
raise ValueError(
|
||||
"Could not parse anime search response with any strategy"
|
||||
)
|
||||
|
||||
def _GetLanguageKey(self, language: str) -> int:
|
||||
"""Get numeric language code."""
|
||||
language_map = {
|
||||
"German Dub": 1,
|
||||
"English Sub": 2,
|
||||
"German Sub": 3,
|
||||
}
|
||||
return language_map.get(language, 0)
|
||||
|
||||
@with_error_recovery(max_retries=2, context="language_check")
|
||||
def IsLanguage(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
) -> bool:
|
||||
"""Check if episode is available in specified language."""
|
||||
try:
|
||||
languageCode = self._GetLanguageKey(language)
|
||||
if languageCode == 0:
|
||||
raise ValueError(f"Unknown language: {language}")
|
||||
|
||||
episode_response = self._GetEpisodeHTML(season, episode, key)
|
||||
soup = BeautifulSoup(episode_response.content, "html.parser")
|
||||
|
||||
lang_box = soup.find("div", class_="changeLanguageBox")
|
||||
if not lang_box:
|
||||
debug_msg = (
|
||||
f"No language box found for {key} S{season}E{episode}"
|
||||
)
|
||||
self.logger.debug(debug_msg)
|
||||
return False
|
||||
|
||||
img_tags = lang_box.find_all("img")
|
||||
available_languages = []
|
||||
|
||||
for img in img_tags:
|
||||
lang_key = img.get("data-lang-key")
|
||||
if lang_key and lang_key.isdigit():
|
||||
available_languages.append(int(lang_key))
|
||||
|
||||
is_available = languageCode in available_languages
|
||||
debug_msg = (
|
||||
f"Language check for {key} S{season}E{episode}: "
|
||||
f"Requested={languageCode}, "
|
||||
f"Available={available_languages}, "
|
||||
f"Result={is_available}"
|
||||
)
|
||||
self.logger.debug(debug_msg)
|
||||
|
||||
return is_available
|
||||
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"Language check failed for {key} S{season}E{episode}: {e}"
|
||||
)
|
||||
self.logger.error(error_msg)
|
||||
raise RetryableError(f"Language check failed: {e}") from e
|
||||
|
||||
def Download(
|
||||
self,
|
||||
baseDirectory: str,
|
||||
serieFolder: str,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
progress_callback: Optional[Callable] = None,
|
||||
) -> bool:
|
||||
"""Download episode with comprehensive error handling.
|
||||
|
||||
Args:
|
||||
baseDirectory: Base download directory path
|
||||
serieFolder: Filesystem folder name (metadata only, used for
|
||||
file path construction)
|
||||
season: Season number (0 for movies)
|
||||
episode: Episode number
|
||||
key: Series unique identifier from provider (used for
|
||||
identification and API calls)
|
||||
language: Audio language preference (default: German Dub)
|
||||
progress_callback: Optional callback for download progress
|
||||
updates
|
||||
|
||||
Returns:
|
||||
bool: True if download succeeded, False otherwise
|
||||
|
||||
Raises:
|
||||
DownloadError: If download fails after all retry attempts
|
||||
ValueError: If required parameters are missing or invalid
|
||||
"""
|
||||
self.download_stats["total_downloads"] += 1
|
||||
|
||||
try:
|
||||
# Validate inputs
|
||||
if not all([baseDirectory, serieFolder, key]):
|
||||
raise ValueError("Missing required parameters for download")
|
||||
|
||||
if season < 0 or episode < 0:
|
||||
raise ValueError("Season and episode must be non-negative")
|
||||
|
||||
# Prepare file paths
|
||||
sanitized_anime_title = "".join(
|
||||
char
|
||||
for char in self.GetTitle(key)
|
||||
if char not in self.INVALID_PATH_CHARS
|
||||
)
|
||||
|
||||
if not sanitized_anime_title:
|
||||
sanitized_anime_title = f"Unknown_{key}"
|
||||
|
||||
# Generate output filename
|
||||
if season == 0:
|
||||
output_file = (
|
||||
f"{sanitized_anime_title} - Movie {episode:02} - "
|
||||
f"({language}).mp4"
|
||||
)
|
||||
else:
|
||||
output_file = (
|
||||
f"{sanitized_anime_title} - S{season:02}E{episode:03} - "
|
||||
f"({language}).mp4"
|
||||
)
|
||||
|
||||
# Create directory structure
|
||||
folder_path = os.path.join(
|
||||
baseDirectory, serieFolder, f"Season {season}"
|
||||
)
|
||||
output_path = os.path.join(folder_path, output_file)
|
||||
|
||||
# Check if file already exists and is valid
|
||||
if os.path.exists(output_path):
|
||||
is_valid = file_corruption_detector.is_valid_video_file(
|
||||
output_path
|
||||
)
|
||||
|
||||
# Also verify checksum if available
|
||||
integrity_mgr = get_integrity_manager()
|
||||
checksum_valid = True
|
||||
if integrity_mgr.has_checksum(Path(output_path)):
|
||||
checksum_valid = integrity_mgr.verify_checksum(
|
||||
Path(output_path)
|
||||
)
|
||||
if not checksum_valid:
|
||||
self.logger.warning(
|
||||
f"Checksum verification failed for {output_file}"
|
||||
)
|
||||
|
||||
if is_valid and checksum_valid:
|
||||
msg = (
|
||||
f"File already exists and is valid: "
|
||||
f"{output_file}"
|
||||
)
|
||||
self.logger.info(msg)
|
||||
self.download_stats["successful_downloads"] += 1
|
||||
return True
|
||||
else:
|
||||
warning_msg = (
|
||||
f"Existing file appears corrupted, removing: "
|
||||
f"{output_path}"
|
||||
)
|
||||
self.logger.warning(warning_msg)
|
||||
try:
|
||||
os.remove(output_path)
|
||||
# Remove checksum entry
|
||||
integrity_mgr.remove_checksum(Path(output_path))
|
||||
except OSError as e:
|
||||
error_msg = f"Failed to remove corrupted file: {e}"
|
||||
self.logger.error(error_msg)
|
||||
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
# Create temp directory
|
||||
temp_dir = "./Temp/"
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
temp_path = os.path.join(temp_dir, output_file)
|
||||
|
||||
# Attempt download with recovery strategies
|
||||
success = self._download_with_recovery(
|
||||
season,
|
||||
episode,
|
||||
key,
|
||||
language,
|
||||
temp_path,
|
||||
output_path,
|
||||
progress_callback,
|
||||
)
|
||||
|
||||
if success:
|
||||
self.download_stats["successful_downloads"] += 1
|
||||
success_msg = f"Successfully downloaded: {output_file}"
|
||||
self.logger.info(success_msg)
|
||||
else:
|
||||
self.download_stats["failed_downloads"] += 1
|
||||
fail_msg = (
|
||||
f"Download failed for {key} S{season}E{episode} "
|
||||
f"({language})"
|
||||
)
|
||||
self.download_error_logger.error(fail_msg)
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
self.download_stats["failed_downloads"] += 1
|
||||
err_msg = (
|
||||
f"Download error for {key} S{season}E{episode}: {e}"
|
||||
)
|
||||
self.download_error_logger.error(err_msg, exc_info=True)
|
||||
raise DownloadError(f"Download failed: {e}") from e
|
||||
finally:
|
||||
self.ClearCache()
|
||||
|
||||
def _download_with_recovery(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str,
|
||||
temp_path: str,
|
||||
output_path: str,
|
||||
progress_callback: Optional[Callable],
|
||||
) -> bool:
|
||||
"""Attempt download with multiple providers and recovery."""
|
||||
|
||||
for provider_name in self.SUPPORTED_PROVIDERS:
|
||||
try:
|
||||
info_msg = (
|
||||
f"Attempting download with provider: {provider_name}"
|
||||
)
|
||||
self.logger.info(info_msg)
|
||||
|
||||
# Get download link and headers for provider
|
||||
link, headers = recovery_strategies.handle_network_failure(
|
||||
self._get_direct_link_from_provider,
|
||||
season,
|
||||
episode,
|
||||
key,
|
||||
language,
|
||||
)
|
||||
|
||||
if not link:
|
||||
warn_msg = (
|
||||
f"No download link found for provider: "
|
||||
f"{provider_name}"
|
||||
)
|
||||
self.logger.warning(warn_msg)
|
||||
continue
|
||||
|
||||
# Configure yt-dlp options
|
||||
ydl_opts = {
|
||||
"fragment_retries": float("inf"),
|
||||
"outtmpl": temp_path,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"progress_with_newline": False,
|
||||
"nocheckcertificate": True,
|
||||
"socket_timeout": self.download_timeout,
|
||||
"http_chunk_size": 1024 * 1024, # 1MB chunks
|
||||
"logger": self.logger,
|
||||
# Use ffmpeg for HLS streams and transport stream format
|
||||
"downloader": "ffmpeg",
|
||||
"hls_use_mpegts": True,
|
||||
}
|
||||
if headers:
|
||||
ydl_opts['http_headers'] = headers
|
||||
|
||||
if progress_callback:
|
||||
ydl_opts['progress_hooks'] = [progress_callback]
|
||||
|
||||
# Perform download with recovery
|
||||
success = recovery_strategies.handle_download_failure(
|
||||
self._perform_ytdl_download,
|
||||
temp_path,
|
||||
ydl_opts,
|
||||
link
|
||||
)
|
||||
|
||||
if success and os.path.exists(temp_path):
|
||||
# Verify downloaded file
|
||||
if file_corruption_detector.is_valid_video_file(temp_path):
|
||||
# Move to final location
|
||||
# Use copyfile instead of copy2 to avoid metadata permission issues
|
||||
shutil.copyfile(temp_path, output_path)
|
||||
|
||||
# Calculate and store checksum for integrity
|
||||
integrity_mgr = get_integrity_manager()
|
||||
try:
|
||||
checksum = integrity_mgr.store_checksum(
|
||||
Path(output_path)
|
||||
)
|
||||
filename = Path(output_path).name
|
||||
self.logger.info(
|
||||
f"Stored checksum for {filename}: "
|
||||
f"{checksum[:16]}..."
|
||||
)
|
||||
except Exception as e:
|
||||
self.logger.warning(
|
||||
f"Failed to store checksum: {e}"
|
||||
)
|
||||
|
||||
# Clean up temp file
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except Exception as e:
|
||||
warn_msg = f"Failed to remove temp file: {e}"
|
||||
self.logger.warning(warn_msg)
|
||||
|
||||
return True
|
||||
else:
|
||||
warn_msg = (
|
||||
f"Downloaded file failed validation: "
|
||||
f"{temp_path}"
|
||||
)
|
||||
self.logger.warning(warn_msg)
|
||||
try:
|
||||
os.remove(temp_path)
|
||||
except OSError as e:
|
||||
warn_msg = f"Failed to remove temp file: {e}"
|
||||
self.logger.warning(warn_msg)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning("Provider %s failed: %s", provider_name, e)
|
||||
# Clean up any partial temp files left by this failed attempt
|
||||
_cleanup_temp_file(temp_path, self.logger)
|
||||
self.download_stats['retried_downloads'] += 1
|
||||
continue
|
||||
|
||||
# All providers failed – make sure no temp remnants are left behind
|
||||
_cleanup_temp_file(temp_path, self.logger)
|
||||
return False
|
||||
|
||||
def _perform_ytdl_download(
|
||||
self, ydl_opts: Dict[str, Any], link: str
|
||||
) -> bool:
|
||||
"""Perform actual download using yt-dlp."""
|
||||
try:
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
ydl.download([link])
|
||||
return True
|
||||
except Exception as e:
|
||||
self.logger.error("yt-dlp download failed: %s", e)
|
||||
raise DownloadError(f"Download failed: {e}") from e
|
||||
|
||||
@with_error_recovery(max_retries=2, context="get_title")
|
||||
def GetTitle(self, key: str) -> str:
|
||||
"""Get anime title with error handling."""
|
||||
try:
|
||||
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
|
||||
title_div = soup.find('div', class_='series-title')
|
||||
|
||||
if title_div:
|
||||
title_span = title_div.find('h1')
|
||||
if title_span:
|
||||
span = title_span.find('span')
|
||||
if span:
|
||||
return span.text.strip()
|
||||
|
||||
self.logger.warning("Could not extract title for key: %s", key)
|
||||
return f"Unknown_Title_{key}"
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to get title for key %s: %s", key, e)
|
||||
raise RetryableError(f"Title extraction failed: {e}") from e
|
||||
|
||||
def GetSiteKey(self) -> str:
|
||||
"""Get site identifier."""
|
||||
return "aniworld.to"
|
||||
|
||||
@with_error_recovery(max_retries=2, context="get_key_html")
|
||||
def _GetKeyHTML(self, key: str):
|
||||
"""Get cached HTML for anime key."""
|
||||
if key in self._KeyHTMLDict:
|
||||
return self._KeyHTMLDict[key]
|
||||
|
||||
try:
|
||||
url = f"{self.ANIWORLD_TO}/anime/stream/{key}"
|
||||
response = recovery_strategies.handle_network_failure(
|
||||
self.session.get,
|
||||
url,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
if not response.ok:
|
||||
if response.status_code == 404:
|
||||
msg = f"Anime key not found: {key}"
|
||||
self.nokey_logger.error(msg)
|
||||
raise NonRetryableError(msg)
|
||||
else:
|
||||
err_msg = (
|
||||
f"HTTP error {response.status_code} for key {key}"
|
||||
)
|
||||
raise RetryableError(err_msg)
|
||||
|
||||
self._KeyHTMLDict[key] = response
|
||||
return self._KeyHTMLDict[key]
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to get HTML for key {key}: {e}"
|
||||
self.logger.error(error_msg)
|
||||
raise
|
||||
|
||||
@with_error_recovery(max_retries=2, context="get_episode_html")
|
||||
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
|
||||
"""Get cached HTML for specific episode.
|
||||
|
||||
Args:
|
||||
season: Season number (must be 1-999)
|
||||
episode: Episode number (must be 1-9999)
|
||||
key: Series identifier (should be non-empty)
|
||||
|
||||
Returns:
|
||||
Cached or fetched HTML response
|
||||
|
||||
Raises:
|
||||
ValueError: If parameters are invalid
|
||||
NonRetryableError: If episode not found (404)
|
||||
RetryableError: If HTTP error occurs
|
||||
"""
|
||||
# Validate parameters
|
||||
if not key or not key.strip():
|
||||
raise ValueError("Series key cannot be empty")
|
||||
if season < 1 or season > 999:
|
||||
raise ValueError(
|
||||
f"Invalid season number: {season} (must be 1-999)"
|
||||
)
|
||||
if episode < 1 or episode > 9999:
|
||||
raise ValueError(
|
||||
f"Invalid episode number: {episode} (must be 1-9999)"
|
||||
)
|
||||
|
||||
cache_key = (key, season, episode)
|
||||
if cache_key in self._EpisodeHTMLDict:
|
||||
return self._EpisodeHTMLDict[cache_key]
|
||||
|
||||
try:
|
||||
url = (
|
||||
f"{self.ANIWORLD_TO}/anime/stream/{key}/"
|
||||
f"staffel-{season}/episode-{episode}"
|
||||
)
|
||||
response = recovery_strategies.handle_network_failure(
|
||||
self.session.get, url, timeout=self.DEFAULT_REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
if not response.ok:
|
||||
if response.status_code == 404:
|
||||
err_msg = (
|
||||
f"Episode not found: {key} S{season}E{episode}"
|
||||
)
|
||||
raise NonRetryableError(err_msg)
|
||||
else:
|
||||
err_msg = (
|
||||
f"HTTP error {response.status_code} for episode"
|
||||
)
|
||||
raise RetryableError(err_msg)
|
||||
|
||||
self._EpisodeHTMLDict[cache_key] = response
|
||||
return self._EpisodeHTMLDict[cache_key]
|
||||
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"Failed to get episode HTML for {key} "
|
||||
f"S{season}E{episode}: {e}"
|
||||
)
|
||||
self.logger.error(error_msg)
|
||||
raise
|
||||
|
||||
def _get_provider_from_html(
|
||||
self, season: int, episode: int, key: str
|
||||
) -> dict:
|
||||
"""Extract providers from HTML with error handling."""
|
||||
try:
|
||||
episode_html = self._GetEpisodeHTML(season, episode, key)
|
||||
soup = BeautifulSoup(episode_html.content, "html.parser")
|
||||
providers: dict[str, dict] = {}
|
||||
|
||||
episode_links = soup.find_all(
|
||||
"li", class_=lambda x: x and x.startswith("episodeLink")
|
||||
)
|
||||
|
||||
if not episode_links:
|
||||
warn_msg = (
|
||||
f"No episode links found for {key} S{season}E{episode}"
|
||||
)
|
||||
self.logger.warning(warn_msg)
|
||||
return providers
|
||||
|
||||
for link in episode_links:
|
||||
provider_name_tag = link.find("h4")
|
||||
provider_name = (
|
||||
provider_name_tag.text.strip()
|
||||
if provider_name_tag
|
||||
else None
|
||||
)
|
||||
|
||||
redirect_link_tag = link.find("a", class_="watchEpisode")
|
||||
redirect_link = (
|
||||
redirect_link_tag["href"]
|
||||
if redirect_link_tag
|
||||
else None
|
||||
)
|
||||
|
||||
lang_key = link.get("data-lang-key")
|
||||
lang_key = (
|
||||
int(lang_key)
|
||||
if lang_key and lang_key.isdigit()
|
||||
else None
|
||||
)
|
||||
|
||||
if provider_name and redirect_link and lang_key:
|
||||
if provider_name not in providers:
|
||||
providers[provider_name] = {}
|
||||
providers[provider_name][lang_key] = (
|
||||
f"{self.ANIWORLD_TO}{redirect_link}"
|
||||
)
|
||||
|
||||
debug_msg = (
|
||||
f"Found {len(providers)} providers for "
|
||||
f"{key} S{season}E{episode}"
|
||||
)
|
||||
self.logger.debug(debug_msg)
|
||||
return providers
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to parse providers from HTML: {e}"
|
||||
self.logger.error(error_msg)
|
||||
raise RetryableError(f"Provider parsing failed: {e}") from e
|
||||
|
||||
def _get_redirect_link(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
):
|
||||
"""Get redirect link for episode with error handling."""
|
||||
languageCode = self._GetLanguageKey(language)
|
||||
|
||||
if not self.IsLanguage(season, episode, key, language):
|
||||
err_msg = (
|
||||
f"Language {language} not available for "
|
||||
f"{key} S{season}E{episode}"
|
||||
)
|
||||
raise NonRetryableError(err_msg)
|
||||
|
||||
providers = self._get_provider_from_html(season, episode, key)
|
||||
|
||||
for provider_name, lang_dict in providers.items():
|
||||
if languageCode in lang_dict:
|
||||
return lang_dict[languageCode], provider_name
|
||||
|
||||
err_msg = (
|
||||
f"No provider found for {language} in "
|
||||
f"{key} S{season}E{episode}"
|
||||
)
|
||||
raise NonRetryableError(err_msg)
|
||||
|
||||
def _get_embeded_link(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
):
|
||||
"""Get embedded link with error handling."""
|
||||
try:
|
||||
redirect_link, provider_name = self._get_redirect_link(
|
||||
season, episode, key, language
|
||||
)
|
||||
|
||||
response = recovery_strategies.handle_network_failure(
|
||||
self.session.get,
|
||||
redirect_link,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": self.RANDOM_USER_AGENT},
|
||||
)
|
||||
|
||||
return response.url
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to get embedded link: {e}"
|
||||
self.logger.error(error_msg)
|
||||
raise
|
||||
|
||||
def _get_direct_link_from_provider(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
):
|
||||
"""Get direct download link from provider."""
|
||||
try:
|
||||
embedded_link = self._get_embeded_link(
|
||||
season, episode, key, language
|
||||
)
|
||||
if not embedded_link:
|
||||
raise NonRetryableError("No embedded link found")
|
||||
|
||||
# Use VOE provider as default (could be made configurable)
|
||||
provider = self.Providers.GetProvider("VOE")
|
||||
if not provider:
|
||||
raise NonRetryableError("VOE provider not available")
|
||||
|
||||
return provider.get_link(
|
||||
embedded_link, self.DEFAULT_REQUEST_TIMEOUT
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to get direct link from provider: {e}"
|
||||
self.logger.error(error_msg)
|
||||
raise
|
||||
|
||||
@with_error_recovery(max_retries=2, context="get_season_episode_count")
|
||||
def get_season_episode_count(self, slug: str) -> dict:
|
||||
"""Get episode count per season with error handling."""
|
||||
try:
|
||||
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"
|
||||
response = recovery_strategies.handle_network_failure(
|
||||
requests.get,
|
||||
base_url,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(response.content, "html.parser")
|
||||
|
||||
season_meta = soup.find("meta", itemprop="numberOfSeasons")
|
||||
number_of_seasons = (
|
||||
int(season_meta["content"]) if season_meta else 0
|
||||
)
|
||||
|
||||
episode_counts = {}
|
||||
|
||||
for season in range(1, number_of_seasons + 1):
|
||||
season_url = f"{base_url}staffel-{season}"
|
||||
season_response = (
|
||||
recovery_strategies.handle_network_failure(
|
||||
requests.get,
|
||||
season_url,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||||
)
|
||||
)
|
||||
|
||||
season_soup = BeautifulSoup(
|
||||
season_response.content, "html.parser"
|
||||
)
|
||||
|
||||
episode_links = season_soup.find_all("a", href=True)
|
||||
unique_links = set(
|
||||
link["href"]
|
||||
for link in episode_links
|
||||
if f"staffel-{season}/episode-" in link['href']
|
||||
)
|
||||
|
||||
episode_counts[season] = len(unique_links)
|
||||
|
||||
return episode_counts
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to get episode counts for %s: %s", slug, e)
|
||||
raise RetryableError(f"Episode count retrieval failed: {e}") from e
|
||||
|
||||
def get_download_statistics(self) -> Dict[str, Any]:
|
||||
"""Get download statistics."""
|
||||
stats = self.download_stats.copy()
|
||||
stats['success_rate'] = (
|
||||
(stats['successful_downloads'] / stats['total_downloads'] * 100)
|
||||
if stats['total_downloads'] > 0 else 0
|
||||
)
|
||||
return stats
|
||||
|
||||
def reset_statistics(self):
|
||||
"""Reset download statistics."""
|
||||
self.download_stats = {
|
||||
'total_downloads': 0,
|
||||
'successful_downloads': 0,
|
||||
'failed_downloads': 0,
|
||||
'retried_downloads': 0
|
||||
}
|
||||
|
||||
|
||||
# For backward compatibility, create wrapper that uses enhanced loader
|
||||
class AniworldLoader(EnhancedAniWorldLoader):
|
||||
"""Backward compatibility wrapper for the enhanced loader."""
|
||||
|
||||
pass
|
||||
325
src/server/providers/failover.py
Normal file
325
src/server/providers/failover.py
Normal file
@@ -0,0 +1,325 @@
|
||||
"""Provider failover system for automatic fallback on failures.
|
||||
|
||||
This module implements automatic failover between multiple providers,
|
||||
ensuring high availability by switching to backup providers when the
|
||||
primary fails.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
||||
|
||||
from src.server.providers.health_monitor import get_health_monitor
|
||||
from src.server.providers.provider_config import DEFAULT_PROVIDERS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class ProviderFailover:
|
||||
"""Manages automatic failover between multiple providers."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
providers: Optional[List[str]] = None,
|
||||
max_retries: int = 3,
|
||||
retry_delay: float = 1.0,
|
||||
enable_health_monitoring: bool = True,
|
||||
):
|
||||
"""Initialize provider failover manager.
|
||||
|
||||
Args:
|
||||
providers: List of provider names to use (default: all).
|
||||
max_retries: Maximum retry attempts per provider.
|
||||
retry_delay: Delay between retries in seconds.
|
||||
enable_health_monitoring: Whether to use health monitoring.
|
||||
"""
|
||||
self._providers = providers or DEFAULT_PROVIDERS.copy()
|
||||
self._max_retries = max_retries
|
||||
self._retry_delay = retry_delay
|
||||
self._enable_health_monitoring = enable_health_monitoring
|
||||
|
||||
# Current provider index
|
||||
self._current_index = 0
|
||||
|
||||
# Health monitor
|
||||
self._health_monitor = (
|
||||
get_health_monitor() if enable_health_monitoring else None
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Provider failover initialized with "
|
||||
f"{len(self._providers)} providers"
|
||||
)
|
||||
|
||||
def get_current_provider(self) -> str:
|
||||
"""Get the current active provider.
|
||||
|
||||
Returns:
|
||||
Name of current provider.
|
||||
"""
|
||||
if self._enable_health_monitoring and self._health_monitor:
|
||||
# Try to get best available provider
|
||||
best = self._health_monitor.get_best_provider()
|
||||
if best and best in self._providers:
|
||||
return best
|
||||
|
||||
# Fall back to round-robin selection
|
||||
return self._providers[self._current_index % len(self._providers)]
|
||||
|
||||
def get_next_provider(self) -> Optional[str]:
|
||||
"""Get the next provider in the failover chain.
|
||||
|
||||
Returns:
|
||||
Name of next provider or None if none available.
|
||||
"""
|
||||
if self._enable_health_monitoring and self._health_monitor:
|
||||
# Get available providers
|
||||
available = [
|
||||
p
|
||||
for p in self._providers
|
||||
if p in self._health_monitor.get_available_providers()
|
||||
]
|
||||
|
||||
if not available:
|
||||
logger.warning("No available providers for failover")
|
||||
return None
|
||||
|
||||
# Find next available provider
|
||||
current = self.get_current_provider()
|
||||
try:
|
||||
current_idx = available.index(current)
|
||||
next_idx = (current_idx + 1) % len(available)
|
||||
return available[next_idx]
|
||||
except ValueError:
|
||||
# Current provider not in available list
|
||||
return available[0]
|
||||
|
||||
# Fall back to simple rotation
|
||||
self._current_index = (self._current_index + 1) % len(
|
||||
self._providers
|
||||
)
|
||||
return self._providers[self._current_index]
|
||||
|
||||
async def execute_with_failover(
|
||||
self,
|
||||
operation: Callable[[str], Any],
|
||||
operation_name: str = "operation",
|
||||
**kwargs,
|
||||
) -> Any:
|
||||
"""Execute an operation with automatic failover.
|
||||
|
||||
Args:
|
||||
operation: Async callable that takes provider name.
|
||||
operation_name: Name for logging purposes.
|
||||
**kwargs: Additional arguments to pass to operation.
|
||||
|
||||
Returns:
|
||||
Result from successful operation.
|
||||
|
||||
Raises:
|
||||
Exception: If all providers fail.
|
||||
"""
|
||||
providers_tried = []
|
||||
last_error = None
|
||||
|
||||
# Try each provider
|
||||
for attempt in range(len(self._providers)):
|
||||
provider = self.get_current_provider()
|
||||
|
||||
# Skip if already tried
|
||||
if provider in providers_tried:
|
||||
self.get_next_provider()
|
||||
continue
|
||||
|
||||
providers_tried.append(provider)
|
||||
|
||||
# Try operation with retries
|
||||
for retry in range(self._max_retries):
|
||||
try:
|
||||
logger.info(
|
||||
f"Executing {operation_name} with provider "
|
||||
f"{provider} (attempt {retry + 1}/{self._max_retries})" # noqa: E501
|
||||
)
|
||||
|
||||
# Execute operation
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
result = await operation(provider, **kwargs)
|
||||
elapsed_ms = (time.time() - start_time) * 1000
|
||||
|
||||
# Record success
|
||||
if self._health_monitor:
|
||||
self._health_monitor.record_request(
|
||||
provider_name=provider,
|
||||
success=True,
|
||||
response_time_ms=elapsed_ms,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"{operation_name} succeeded with provider "
|
||||
f"{provider} in {elapsed_ms:.2f}ms"
|
||||
)
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
logger.warning(
|
||||
f"{operation_name} failed with provider "
|
||||
f"{provider} (attempt {retry + 1}): {e}"
|
||||
)
|
||||
|
||||
# Record failure
|
||||
if self._health_monitor:
|
||||
import time
|
||||
|
||||
elapsed_ms = (time.time() - start_time) * 1000
|
||||
self._health_monitor.record_request(
|
||||
provider_name=provider,
|
||||
success=False,
|
||||
response_time_ms=elapsed_ms,
|
||||
error_message=str(e),
|
||||
)
|
||||
|
||||
# Retry with delay
|
||||
if retry < self._max_retries - 1:
|
||||
await asyncio.sleep(self._retry_delay)
|
||||
|
||||
# Try next provider
|
||||
next_provider = self.get_next_provider()
|
||||
if next_provider is None:
|
||||
break
|
||||
|
||||
# All providers failed
|
||||
error_msg = (
|
||||
f"{operation_name} failed with all providers. "
|
||||
f"Tried: {', '.join(providers_tried)}"
|
||||
)
|
||||
logger.error(error_msg)
|
||||
raise Exception(error_msg) from last_error
|
||||
|
||||
def add_provider(self, provider_name: str) -> None:
|
||||
"""Add a provider to the failover chain.
|
||||
|
||||
Args:
|
||||
provider_name: Name of provider to add.
|
||||
"""
|
||||
if provider_name not in self._providers:
|
||||
self._providers.append(provider_name)
|
||||
logger.info("Added provider to failover chain: %s", provider_name)
|
||||
|
||||
def remove_provider(self, provider_name: str) -> bool:
|
||||
"""Remove a provider from the failover chain.
|
||||
|
||||
Args:
|
||||
provider_name: Name of provider to remove.
|
||||
|
||||
Returns:
|
||||
True if removed, False if not found.
|
||||
"""
|
||||
if provider_name in self._providers:
|
||||
self._providers.remove(provider_name)
|
||||
logger.info(
|
||||
f"Removed provider from failover chain: {provider_name}"
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_providers(self) -> List[str]:
|
||||
"""Get list of all providers in failover chain.
|
||||
|
||||
Returns:
|
||||
List of provider names.
|
||||
"""
|
||||
return self._providers.copy()
|
||||
|
||||
def set_provider_priority(
|
||||
self, provider_name: str, priority_index: int
|
||||
) -> bool:
|
||||
"""Set priority of a provider by moving it in the chain.
|
||||
|
||||
Args:
|
||||
provider_name: Name of provider to prioritize.
|
||||
priority_index: New index position (0 = highest priority).
|
||||
|
||||
Returns:
|
||||
True if updated, False if provider not found.
|
||||
"""
|
||||
if provider_name not in self._providers:
|
||||
return False
|
||||
|
||||
self._providers.remove(provider_name)
|
||||
self._providers.insert(
|
||||
min(priority_index, len(self._providers)), provider_name
|
||||
)
|
||||
logger.info(
|
||||
f"Set provider {provider_name} priority to index {priority_index}"
|
||||
)
|
||||
return True
|
||||
|
||||
def get_failover_stats(self) -> Dict[str, Any]:
|
||||
"""Get failover statistics and configuration.
|
||||
|
||||
Returns:
|
||||
Dictionary with failover stats.
|
||||
"""
|
||||
stats = {
|
||||
"total_providers": len(self._providers),
|
||||
"providers": self._providers.copy(),
|
||||
"current_provider": self.get_current_provider(),
|
||||
"max_retries": self._max_retries,
|
||||
"retry_delay": self._retry_delay,
|
||||
"health_monitoring_enabled": self._enable_health_monitoring,
|
||||
}
|
||||
|
||||
if self._health_monitor:
|
||||
available = self._health_monitor.get_available_providers()
|
||||
stats["available_providers"] = [
|
||||
p for p in self._providers if p in available
|
||||
]
|
||||
stats["unavailable_providers"] = [
|
||||
p for p in self._providers if p not in available
|
||||
]
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
# Global failover instance
|
||||
_failover: Optional[ProviderFailover] = None
|
||||
|
||||
|
||||
def get_failover() -> ProviderFailover:
|
||||
"""Get or create global provider failover instance.
|
||||
|
||||
Returns:
|
||||
Global ProviderFailover instance.
|
||||
"""
|
||||
global _failover
|
||||
if _failover is None:
|
||||
_failover = ProviderFailover()
|
||||
return _failover
|
||||
|
||||
|
||||
def configure_failover(
|
||||
providers: Optional[List[str]] = None,
|
||||
max_retries: int = 3,
|
||||
retry_delay: float = 1.0,
|
||||
) -> ProviderFailover:
|
||||
"""Configure global provider failover instance.
|
||||
|
||||
Args:
|
||||
providers: List of provider names to use.
|
||||
max_retries: Maximum retry attempts per provider.
|
||||
retry_delay: Delay between retries in seconds.
|
||||
|
||||
Returns:
|
||||
Configured ProviderFailover instance.
|
||||
"""
|
||||
global _failover
|
||||
_failover = ProviderFailover(
|
||||
providers=providers,
|
||||
max_retries=max_retries,
|
||||
retry_delay=retry_delay,
|
||||
)
|
||||
return _failover
|
||||
416
src/server/providers/health_monitor.py
Normal file
416
src/server/providers/health_monitor.py
Normal file
@@ -0,0 +1,416 @@
|
||||
"""Provider health monitoring system for tracking availability and performance.
|
||||
|
||||
This module provides health monitoring capabilities for anime providers,
|
||||
tracking metrics like availability, response times, success rates, and
|
||||
bandwidth usage.
|
||||
"""
|
||||
import asyncio
|
||||
import logging
|
||||
from collections import defaultdict, deque
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Deque, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProviderHealthMetrics:
|
||||
"""Health metrics for a single provider."""
|
||||
|
||||
provider_name: str
|
||||
is_available: bool = True
|
||||
last_check_time: Optional[datetime] = None
|
||||
total_requests: int = 0
|
||||
successful_requests: int = 0
|
||||
failed_requests: int = 0
|
||||
average_response_time_ms: float = 0.0
|
||||
last_error: Optional[str] = None
|
||||
last_error_time: Optional[datetime] = None
|
||||
consecutive_failures: int = 0
|
||||
total_bytes_downloaded: int = 0
|
||||
uptime_percentage: float = 100.0
|
||||
|
||||
@property
|
||||
def success_rate(self) -> float:
|
||||
"""Calculate success rate as percentage."""
|
||||
if self.total_requests == 0:
|
||||
return 0.0
|
||||
return (self.successful_requests / self.total_requests) * 100
|
||||
|
||||
@property
|
||||
def failure_rate(self) -> float:
|
||||
"""Calculate failure rate as percentage."""
|
||||
return 100.0 - self.success_rate
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert metrics to dictionary."""
|
||||
return {
|
||||
"provider_name": self.provider_name,
|
||||
"is_available": self.is_available,
|
||||
"last_check_time": (
|
||||
self.last_check_time.isoformat()
|
||||
if self.last_check_time
|
||||
else None
|
||||
),
|
||||
"total_requests": self.total_requests,
|
||||
"successful_requests": self.successful_requests,
|
||||
"failed_requests": self.failed_requests,
|
||||
"success_rate": round(self.success_rate, 2),
|
||||
"average_response_time_ms": round(
|
||||
self.average_response_time_ms, 2
|
||||
),
|
||||
"last_error": self.last_error,
|
||||
"last_error_time": (
|
||||
self.last_error_time.isoformat()
|
||||
if self.last_error_time
|
||||
else None
|
||||
),
|
||||
"consecutive_failures": self.consecutive_failures,
|
||||
"total_bytes_downloaded": self.total_bytes_downloaded,
|
||||
"uptime_percentage": round(self.uptime_percentage, 2),
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestMetric:
|
||||
"""Individual request metric."""
|
||||
|
||||
timestamp: datetime
|
||||
success: bool
|
||||
response_time_ms: float
|
||||
bytes_transferred: int = 0
|
||||
error_message: Optional[str] = None
|
||||
|
||||
|
||||
class ProviderHealthMonitor:
|
||||
"""Monitors health and performance of anime providers."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_history_size: int = 1000,
|
||||
health_check_interval: int = 300, # 5 minutes
|
||||
failure_threshold: int = 3,
|
||||
):
|
||||
"""Initialize provider health monitor.
|
||||
|
||||
Args:
|
||||
max_history_size: Maximum number of request metrics to keep
|
||||
per provider.
|
||||
health_check_interval: Interval between health checks in
|
||||
seconds.
|
||||
failure_threshold: Number of consecutive failures before
|
||||
marking unavailable.
|
||||
"""
|
||||
self._max_history_size = max_history_size
|
||||
self._health_check_interval = health_check_interval
|
||||
self._failure_threshold = failure_threshold
|
||||
|
||||
# Provider metrics storage
|
||||
self._metrics: Dict[str, ProviderHealthMetrics] = {}
|
||||
self._request_history: Dict[str, Deque[RequestMetric]] = defaultdict(
|
||||
lambda: deque(maxlen=max_history_size)
|
||||
)
|
||||
|
||||
# Health check task
|
||||
self._health_check_task: Optional[asyncio.Task] = None
|
||||
self._is_running = False
|
||||
|
||||
logger.info("Provider health monitor initialized")
|
||||
|
||||
def start_monitoring(self) -> None:
|
||||
"""Start background health monitoring."""
|
||||
if self._is_running:
|
||||
logger.warning("Health monitoring already running")
|
||||
return
|
||||
|
||||
self._is_running = True
|
||||
self._health_check_task = asyncio.create_task(
|
||||
self._health_check_loop()
|
||||
)
|
||||
logger.info("Provider health monitoring started")
|
||||
|
||||
async def stop_monitoring(self) -> None:
|
||||
"""Stop background health monitoring."""
|
||||
self._is_running = False
|
||||
if self._health_check_task:
|
||||
self._health_check_task.cancel()
|
||||
try:
|
||||
await self._health_check_task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._health_check_task = None
|
||||
logger.info("Provider health monitoring stopped")
|
||||
|
||||
async def _health_check_loop(self) -> None:
|
||||
"""Background health check loop."""
|
||||
while self._is_running:
|
||||
try:
|
||||
await self._perform_health_checks()
|
||||
await asyncio.sleep(self._health_check_interval)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.exception("Error in health check loop: %s", e)
|
||||
await asyncio.sleep(self._health_check_interval)
|
||||
|
||||
async def _perform_health_checks(self) -> None:
|
||||
"""Perform health checks on all registered providers."""
|
||||
for provider_name in list(self._metrics.keys()):
|
||||
try:
|
||||
metrics = self._metrics[provider_name]
|
||||
metrics.last_check_time = datetime.now()
|
||||
|
||||
# Update uptime percentage based on recent history
|
||||
recent_metrics = self._get_recent_metrics(
|
||||
provider_name, minutes=60
|
||||
)
|
||||
if recent_metrics:
|
||||
successful = sum(1 for m in recent_metrics if m.success)
|
||||
metrics.uptime_percentage = (
|
||||
successful / len(recent_metrics)
|
||||
) * 100
|
||||
|
||||
logger.debug(
|
||||
f"Health check for {provider_name}: "
|
||||
f"available={metrics.is_available}, "
|
||||
f"success_rate={metrics.success_rate:.2f}%"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error checking health for {provider_name}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
def record_request(
|
||||
self,
|
||||
provider_name: str,
|
||||
success: bool,
|
||||
response_time_ms: float,
|
||||
bytes_transferred: int = 0,
|
||||
error_message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Record a provider request for health tracking.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
success: Whether the request was successful.
|
||||
response_time_ms: Response time in milliseconds.
|
||||
bytes_transferred: Number of bytes transferred.
|
||||
error_message: Error message if request failed.
|
||||
"""
|
||||
# Initialize metrics if not exists
|
||||
if provider_name not in self._metrics:
|
||||
self._metrics[provider_name] = ProviderHealthMetrics(
|
||||
provider_name=provider_name
|
||||
)
|
||||
|
||||
metrics = self._metrics[provider_name]
|
||||
|
||||
# Update request counts
|
||||
metrics.total_requests += 1
|
||||
if success:
|
||||
metrics.successful_requests += 1
|
||||
metrics.consecutive_failures = 0
|
||||
else:
|
||||
metrics.failed_requests += 1
|
||||
metrics.consecutive_failures += 1
|
||||
metrics.last_error = error_message
|
||||
metrics.last_error_time = datetime.now()
|
||||
|
||||
# Update availability based on consecutive failures
|
||||
if metrics.consecutive_failures >= self._failure_threshold:
|
||||
if metrics.is_available:
|
||||
logger.warning(
|
||||
f"Provider {provider_name} marked as unavailable after "
|
||||
f"{metrics.consecutive_failures} consecutive failures"
|
||||
)
|
||||
metrics.is_available = False
|
||||
else:
|
||||
metrics.is_available = True
|
||||
|
||||
# Update average response time
|
||||
total_time = metrics.average_response_time_ms * (
|
||||
metrics.total_requests - 1
|
||||
)
|
||||
metrics.average_response_time_ms = (
|
||||
total_time + response_time_ms
|
||||
) / metrics.total_requests
|
||||
|
||||
# Update bytes transferred
|
||||
metrics.total_bytes_downloaded += bytes_transferred
|
||||
|
||||
# Store request metric in history
|
||||
request_metric = RequestMetric(
|
||||
timestamp=datetime.now(),
|
||||
success=success,
|
||||
response_time_ms=response_time_ms,
|
||||
bytes_transferred=bytes_transferred,
|
||||
error_message=error_message,
|
||||
)
|
||||
self._request_history[provider_name].append(request_metric)
|
||||
|
||||
logger.debug(
|
||||
f"Recorded request for {provider_name}: "
|
||||
f"success={success}, time={response_time_ms:.2f}ms"
|
||||
)
|
||||
|
||||
def get_provider_metrics(
|
||||
self, provider_name: str
|
||||
) -> Optional[ProviderHealthMetrics]:
|
||||
"""Get health metrics for a specific provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
|
||||
Returns:
|
||||
Provider health metrics or None if not found.
|
||||
"""
|
||||
return self._metrics.get(provider_name)
|
||||
|
||||
def get_all_metrics(self) -> Dict[str, ProviderHealthMetrics]:
|
||||
"""Get health metrics for all providers.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping provider names to their metrics.
|
||||
"""
|
||||
return self._metrics.copy()
|
||||
|
||||
def get_available_providers(self) -> List[str]:
|
||||
"""Get list of currently available providers.
|
||||
|
||||
Returns:
|
||||
List of available provider names.
|
||||
"""
|
||||
return [
|
||||
name
|
||||
for name, metrics in self._metrics.items()
|
||||
if metrics.is_available
|
||||
]
|
||||
|
||||
def get_best_provider(self) -> Optional[str]:
|
||||
"""Get the best performing available provider.
|
||||
|
||||
Best is determined by:
|
||||
1. Availability
|
||||
2. Success rate
|
||||
3. Response time
|
||||
|
||||
Returns:
|
||||
Name of best provider or None if none available.
|
||||
"""
|
||||
available = [
|
||||
(name, metrics)
|
||||
for name, metrics in self._metrics.items()
|
||||
if metrics.is_available
|
||||
]
|
||||
|
||||
if not available:
|
||||
return None
|
||||
|
||||
# Sort by success rate (descending) then response time (ascending)
|
||||
available.sort(
|
||||
key=lambda x: (-x[1].success_rate, x[1].average_response_time_ms)
|
||||
)
|
||||
|
||||
best_provider = available[0][0]
|
||||
logger.debug("Best provider selected: %s", best_provider)
|
||||
return best_provider
|
||||
|
||||
def _get_recent_metrics(
|
||||
self, provider_name: str, minutes: int = 60
|
||||
) -> List[RequestMetric]:
|
||||
"""Get recent request metrics for a provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
minutes: Number of minutes to look back.
|
||||
|
||||
Returns:
|
||||
List of recent request metrics.
|
||||
"""
|
||||
if provider_name not in self._request_history:
|
||||
return []
|
||||
|
||||
cutoff_time = datetime.now() - timedelta(minutes=minutes)
|
||||
return [
|
||||
metric
|
||||
for metric in self._request_history[provider_name]
|
||||
if metric.timestamp >= cutoff_time
|
||||
]
|
||||
|
||||
def reset_provider_metrics(self, provider_name: str) -> bool:
|
||||
"""Reset metrics for a specific provider.
|
||||
|
||||
Args:
|
||||
provider_name: Name of the provider.
|
||||
|
||||
Returns:
|
||||
True if reset successful, False if provider not found.
|
||||
"""
|
||||
if provider_name not in self._metrics:
|
||||
return False
|
||||
|
||||
self._metrics[provider_name] = ProviderHealthMetrics(
|
||||
provider_name=provider_name
|
||||
)
|
||||
self._request_history[provider_name].clear()
|
||||
logger.info("Reset metrics for provider: %s", provider_name)
|
||||
return True
|
||||
|
||||
def get_health_summary(self) -> Dict[str, Any]:
|
||||
"""Get summary of overall provider health.
|
||||
|
||||
Returns:
|
||||
Dictionary with health summary statistics.
|
||||
"""
|
||||
total_providers = len(self._metrics)
|
||||
available_providers = len(self.get_available_providers())
|
||||
|
||||
if total_providers == 0:
|
||||
return {
|
||||
"total_providers": 0,
|
||||
"available_providers": 0,
|
||||
"availability_percentage": 0.0,
|
||||
"average_success_rate": 0.0,
|
||||
"average_response_time_ms": 0.0,
|
||||
}
|
||||
|
||||
avg_success_rate = sum(
|
||||
m.success_rate for m in self._metrics.values()
|
||||
) / total_providers
|
||||
|
||||
avg_response_time = sum(
|
||||
m.average_response_time_ms for m in self._metrics.values()
|
||||
) / total_providers
|
||||
|
||||
return {
|
||||
"total_providers": total_providers,
|
||||
"available_providers": available_providers,
|
||||
"availability_percentage": (
|
||||
available_providers / total_providers
|
||||
)
|
||||
* 100,
|
||||
"average_success_rate": round(avg_success_rate, 2),
|
||||
"average_response_time_ms": round(avg_response_time, 2),
|
||||
"providers": {
|
||||
name: metrics.to_dict()
|
||||
for name, metrics in self._metrics.items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Global health monitor instance
|
||||
_health_monitor: Optional[ProviderHealthMonitor] = None
|
||||
|
||||
|
||||
def get_health_monitor() -> ProviderHealthMonitor:
|
||||
"""Get or create global provider health monitor instance.
|
||||
|
||||
Returns:
|
||||
Global ProviderHealthMonitor instance.
|
||||
"""
|
||||
global _health_monitor
|
||||
if _health_monitor is None:
|
||||
_health_monitor = ProviderHealthMonitor()
|
||||
return _health_monitor
|
||||
307
src/server/providers/monitored_provider.py
Normal file
307
src/server/providers/monitored_provider.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""Performance monitoring wrapper for anime providers.
|
||||
|
||||
This module provides a wrapper that adds automatic performance tracking
|
||||
to any provider implementation.
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
from src.server.providers.base_provider import Loader
|
||||
from src.server.providers.health_monitor import get_health_monitor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MonitoredProviderWrapper(Loader):
|
||||
"""Wrapper that adds performance monitoring to any provider."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
provider: Loader,
|
||||
enable_monitoring: bool = True,
|
||||
):
|
||||
"""Initialize monitored provider wrapper.
|
||||
|
||||
Args:
|
||||
provider: Provider instance to wrap.
|
||||
enable_monitoring: Whether to enable performance monitoring.
|
||||
"""
|
||||
self._provider = provider
|
||||
self._enable_monitoring = enable_monitoring
|
||||
self._health_monitor = (
|
||||
get_health_monitor() if enable_monitoring else None
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Monitoring wrapper initialized for provider: "
|
||||
f"{provider.get_site_key()}"
|
||||
)
|
||||
|
||||
def _record_operation(
|
||||
self,
|
||||
operation_name: str,
|
||||
start_time: float,
|
||||
success: bool,
|
||||
bytes_transferred: int = 0,
|
||||
error_message: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Record operation metrics.
|
||||
|
||||
Args:
|
||||
operation_name: Name of the operation.
|
||||
start_time: Operation start time (from time.time()).
|
||||
success: Whether operation succeeded.
|
||||
bytes_transferred: Number of bytes transferred.
|
||||
error_message: Error message if operation failed.
|
||||
"""
|
||||
if not self._enable_monitoring or not self._health_monitor:
|
||||
return
|
||||
|
||||
elapsed_ms = (time.time() - start_time) * 1000
|
||||
provider_name = self._provider.get_site_key()
|
||||
|
||||
self._health_monitor.record_request(
|
||||
provider_name=provider_name,
|
||||
success=success,
|
||||
response_time_ms=elapsed_ms,
|
||||
bytes_transferred=bytes_transferred,
|
||||
error_message=error_message,
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.debug(
|
||||
f"{operation_name} succeeded for {provider_name} "
|
||||
f"in {elapsed_ms:.2f}ms"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"{operation_name} failed for {provider_name} "
|
||||
f"in {elapsed_ms:.2f}ms: {error_message}"
|
||||
)
|
||||
|
||||
def search(self, word: str) -> List[Dict[str, Any]]:
|
||||
"""Search for anime series by name (with monitoring).
|
||||
|
||||
Args:
|
||||
word: Search term to look for.
|
||||
|
||||
Returns:
|
||||
List of found series as dictionaries.
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = self._provider.search(word)
|
||||
self._record_operation(
|
||||
operation_name="search",
|
||||
start_time=start_time,
|
||||
success=True,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
self._record_operation(
|
||||
operation_name="search",
|
||||
start_time=start_time,
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
)
|
||||
raise
|
||||
|
||||
def is_language(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
) -> bool:
|
||||
"""Check if episode exists in specified language (monitored).
|
||||
|
||||
Args:
|
||||
season: Season number (1-indexed).
|
||||
episode: Episode number (1-indexed).
|
||||
key: Unique series identifier/key.
|
||||
language: Language to check (default: German Dub).
|
||||
|
||||
Returns:
|
||||
True if episode exists in specified language.
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = self._provider.is_language(
|
||||
season, episode, key, language
|
||||
)
|
||||
self._record_operation(
|
||||
operation_name="is_language",
|
||||
start_time=start_time,
|
||||
success=True,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
self._record_operation(
|
||||
operation_name="is_language",
|
||||
start_time=start_time,
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
)
|
||||
raise
|
||||
|
||||
def download(
|
||||
self,
|
||||
base_directory: str,
|
||||
serie_folder: str,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str = "German Dub",
|
||||
progress_callback: Optional[Callable[[str, Dict], None]] = None,
|
||||
) -> bool:
|
||||
"""Download episode to specified directory (with monitoring).
|
||||
|
||||
Args:
|
||||
base_directory: Base directory for downloads.
|
||||
serie_folder: Series folder name.
|
||||
season: Season number.
|
||||
episode: Episode number.
|
||||
key: Unique series identifier/key.
|
||||
language: Language version to download.
|
||||
progress_callback: Optional callback for progress updates.
|
||||
|
||||
Returns:
|
||||
True if download successful.
|
||||
"""
|
||||
start_time = time.time()
|
||||
bytes_transferred = 0
|
||||
|
||||
# Wrap progress callback to track bytes
|
||||
if progress_callback and self._enable_monitoring:
|
||||
|
||||
def monitored_callback(event_type: str, data: Dict) -> None:
|
||||
nonlocal bytes_transferred
|
||||
if event_type == "progress" and "downloaded" in data:
|
||||
bytes_transferred = data.get("downloaded", 0)
|
||||
progress_callback(event_type, data)
|
||||
|
||||
wrapped_callback = monitored_callback
|
||||
else:
|
||||
wrapped_callback = progress_callback
|
||||
|
||||
try:
|
||||
result = self._provider.download(
|
||||
base_directory=base_directory,
|
||||
serie_folder=serie_folder,
|
||||
season=season,
|
||||
episode=episode,
|
||||
key=key,
|
||||
language=language,
|
||||
progress_callback=wrapped_callback,
|
||||
)
|
||||
self._record_operation(
|
||||
operation_name="download",
|
||||
start_time=start_time,
|
||||
success=result,
|
||||
bytes_transferred=bytes_transferred,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
self._record_operation(
|
||||
operation_name="download",
|
||||
start_time=start_time,
|
||||
success=False,
|
||||
bytes_transferred=bytes_transferred,
|
||||
error_message=str(e),
|
||||
)
|
||||
raise
|
||||
|
||||
def get_site_key(self) -> str:
|
||||
"""Get the site key/identifier for this provider.
|
||||
|
||||
Returns:
|
||||
Site key string.
|
||||
"""
|
||||
return self._provider.get_site_key()
|
||||
|
||||
def get_title(self, key: str) -> str:
|
||||
"""Get the human-readable title of a series.
|
||||
|
||||
Args:
|
||||
key: Unique series identifier/key.
|
||||
|
||||
Returns:
|
||||
Series title string.
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = self._provider.get_title(key)
|
||||
self._record_operation(
|
||||
operation_name="get_title",
|
||||
start_time=start_time,
|
||||
success=True,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
self._record_operation(
|
||||
operation_name="get_title",
|
||||
start_time=start_time,
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
)
|
||||
raise
|
||||
|
||||
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
|
||||
"""Get season and episode counts for a series.
|
||||
|
||||
Args:
|
||||
slug: Series slug/key identifier.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping season number to episode count.
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
result = self._provider.get_season_episode_count(slug)
|
||||
self._record_operation(
|
||||
operation_name="get_season_episode_count",
|
||||
start_time=start_time,
|
||||
success=True,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
self._record_operation(
|
||||
operation_name="get_season_episode_count",
|
||||
start_time=start_time,
|
||||
success=False,
|
||||
error_message=str(e),
|
||||
)
|
||||
raise
|
||||
|
||||
@property
|
||||
def wrapped_provider(self) -> Loader:
|
||||
"""Get the underlying provider instance.
|
||||
|
||||
Returns:
|
||||
Wrapped provider instance.
|
||||
"""
|
||||
return self._provider
|
||||
|
||||
|
||||
def wrap_provider(
|
||||
provider: Loader,
|
||||
enable_monitoring: bool = True,
|
||||
) -> Loader:
|
||||
"""Wrap a provider with performance monitoring.
|
||||
|
||||
Args:
|
||||
provider: Provider to wrap.
|
||||
enable_monitoring: Whether to enable monitoring.
|
||||
|
||||
Returns:
|
||||
Monitored provider wrapper.
|
||||
"""
|
||||
if isinstance(provider, MonitoredProviderWrapper):
|
||||
# Already wrapped
|
||||
return provider
|
||||
|
||||
return MonitoredProviderWrapper(
|
||||
provider=provider,
|
||||
enable_monitoring=enable_monitoring,
|
||||
)
|
||||
79
src/server/providers/provider_config.py
Normal file
79
src/server/providers/provider_config.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Shared provider configuration constants for AniWorld providers.
|
||||
|
||||
Centralizes user-agent strings, provider lists and common headers so
|
||||
multiple provider implementations can import a single source of truth.
|
||||
"""
|
||||
from enum import Enum
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
class ProviderType(str, Enum):
|
||||
"""Enumeration of supported video providers."""
|
||||
VOE = "VOE"
|
||||
DOODSTREAM = "Doodstream"
|
||||
VIDMOLY = "Vidmoly"
|
||||
VIDOZA = "Vidoza"
|
||||
SPEEDFILES = "SpeedFiles"
|
||||
STREAMTAPE = "Streamtape"
|
||||
LULUVDO = "Luluvdo"
|
||||
|
||||
|
||||
DEFAULT_PROVIDERS: List[str] = [
|
||||
ProviderType.VOE.value,
|
||||
ProviderType.DOODSTREAM.value,
|
||||
ProviderType.VIDMOLY.value,
|
||||
ProviderType.VIDOZA.value,
|
||||
ProviderType.SPEEDFILES.value,
|
||||
ProviderType.STREAMTAPE.value,
|
||||
ProviderType.LULUVDO.value,
|
||||
]
|
||||
|
||||
ANIWORLD_HEADERS: Dict[str, str] = {
|
||||
"accept": (
|
||||
"text/html,application/xhtml+xml,application/xml;q=0.9,"
|
||||
"image/avif,image/webp,image/apng,*/*;q=0.8"
|
||||
),
|
||||
"accept-encoding": "gzip, deflate, br, zstd",
|
||||
"accept-language": (
|
||||
"de,de-DE;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"
|
||||
),
|
||||
"cache-control": "max-age=0",
|
||||
"priority": "u=0, i",
|
||||
"sec-ch-ua": (
|
||||
'"Chromium";v="136", "Microsoft Edge";v="136", '
|
||||
'"Not.A/Brand";v="99"'
|
||||
),
|
||||
"sec-ch-ua-mobile": "?0",
|
||||
"sec-ch-ua-platform": '"Windows"',
|
||||
"sec-fetch-dest": "document",
|
||||
"sec-fetch-mode": "navigate",
|
||||
"sec-fetch-site": "none",
|
||||
"sec-fetch-user": "?1",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
|
||||
),
|
||||
}
|
||||
|
||||
INVALID_PATH_CHARS: List[str] = [
|
||||
"<",
|
||||
">",
|
||||
":",
|
||||
'"',
|
||||
"/",
|
||||
"\\",
|
||||
"|",
|
||||
"?",
|
||||
"*",
|
||||
"&",
|
||||
]
|
||||
|
||||
LULUVDO_USER_AGENT = (
|
||||
"Mozilla/5.0 (Android 15; Mobile; rv:132.0) "
|
||||
"Gecko/132.0 Firefox/132.0"
|
||||
)
|
||||
|
||||
# Default download timeout (seconds)
|
||||
DEFAULT_DOWNLOAD_TIMEOUT = 600
|
||||
56
src/server/providers/provider_factory.py
Normal file
56
src/server/providers/provider_factory.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Provider factory for managing anime content providers.
|
||||
|
||||
This module provides a factory class for accessing different anime content
|
||||
providers (loaders). The factory uses provider identifiers (keys) to return
|
||||
the appropriate provider instance.
|
||||
|
||||
Note: The 'key' parameter in this factory refers to the provider identifier
|
||||
(e.g., 'aniworld.to'), not to be confused with series keys used within
|
||||
providers to identify specific anime series.
|
||||
"""
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from .aniworld_provider import AniworldLoader
|
||||
from .base_provider import Loader
|
||||
|
||||
|
||||
class Loaders:
|
||||
"""Factory class for managing and retrieving anime content providers.
|
||||
|
||||
This factory maintains a registry of available providers and provides
|
||||
access to them via provider keys. Each provider implements the Loader
|
||||
interface for searching and downloading anime content.
|
||||
|
||||
Attributes:
|
||||
dict: Dictionary mapping provider keys to provider instances.
|
||||
Provider keys are site identifiers (e.g., 'aniworld.to').
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
"""Initialize the provider factory with available providers.
|
||||
|
||||
Currently supports:
|
||||
- 'aniworld.to': AniworldLoader for aniworld.to content
|
||||
"""
|
||||
self.dict: Dict[str, Loader] = {"aniworld.to": AniworldLoader()}
|
||||
|
||||
def GetLoader(self, key: str) -> Loader:
|
||||
"""Retrieve a provider instance by its provider key.
|
||||
|
||||
Args:
|
||||
key: Provider identifier (e.g., 'aniworld.to').
|
||||
This is the site/provider key, not a series key.
|
||||
|
||||
Returns:
|
||||
Loader instance for the specified provider.
|
||||
|
||||
Raises:
|
||||
KeyError: If the provider key is not found in the registry.
|
||||
|
||||
Note:
|
||||
The 'key' parameter here identifies the provider/site, while
|
||||
series-specific operations on the returned Loader use series
|
||||
keys to identify individual anime series.
|
||||
"""
|
||||
return self.dict[key]
|
||||
27
src/server/providers/streaming/Provider.py
Normal file
27
src/server/providers/streaming/Provider.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Abstract base class for streaming providers."""
|
||||
|
||||
@abstractmethod
|
||||
def get_link(
|
||||
self, embedded_link: str, timeout: int
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
"""
|
||||
Extract direct download link from embedded player link.
|
||||
|
||||
Args:
|
||||
embedded_link: URL of the embedded player
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Tuple of (direct_link: str, headers: dict)
|
||||
- direct_link: Direct URL to download resource
|
||||
- headers: Dictionary of HTTP headers to use for download
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Streaming providers must implement get_link"
|
||||
)
|
||||
|
||||
139
src/server/providers/streaming/voe.py
Normal file
139
src/server/providers/streaming/voe.py
Normal file
@@ -0,0 +1,139 @@
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from fake_useragent import UserAgent
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from .Provider import Provider
|
||||
|
||||
# Precompile the different pattern matchers used during extraction:
|
||||
# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
|
||||
# script so we can follow the provider's hand-off.
|
||||
# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
|
||||
# field once decoded.
|
||||
# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
|
||||
# no direct MP4 link is present.
|
||||
REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
|
||||
B64_PATTERN = re.compile(r"var a168c='([^']+)'")
|
||||
HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")
|
||||
|
||||
|
||||
class VOE(Provider):
|
||||
"""VOE video provider implementation."""
|
||||
|
||||
def __init__(self):
|
||||
self.RANDOM_USER_AGENT = UserAgent().random
|
||||
self.Header = {"User-Agent": self.RANDOM_USER_AGENT}
|
||||
|
||||
def get_link(
|
||||
self, embedded_link: str, timeout: int
|
||||
) -> tuple[str, dict]:
|
||||
"""
|
||||
Extract direct download link from VOE embedded player.
|
||||
|
||||
Args:
|
||||
embedded_link: URL of the embedded VOE player
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Tuple of (direct_link, headers)
|
||||
"""
|
||||
self.session = requests.Session()
|
||||
|
||||
# Configure retries with backoff
|
||||
retries = Retry(
|
||||
total=5, # Number of retries
|
||||
backoff_factor=1, # Delay multiplier (1s, 2s, 4s, ...)
|
||||
status_forcelist=[500, 502, 503, 504],
|
||||
allowed_methods=["GET"],
|
||||
)
|
||||
|
||||
adapter = HTTPAdapter(max_retries=retries)
|
||||
self.session.mount("https://", adapter)
|
||||
timeout = 30
|
||||
|
||||
response = self.session.get(
|
||||
embedded_link,
|
||||
headers={"User-Agent": self.RANDOM_USER_AGENT},
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
redirect = re.search(r"https?://[^'\"<>]+", response.text)
|
||||
if not redirect:
|
||||
raise ValueError("No redirect found.")
|
||||
|
||||
redirect_url = redirect.group(0)
|
||||
parts = redirect_url.strip().split("/")
|
||||
self.Header["Referer"] = f"{parts[0]}//{parts[2]}/"
|
||||
|
||||
response = self.session.get(
|
||||
redirect_url, headers={"User-Agent": self.RANDOM_USER_AGENT}
|
||||
)
|
||||
html = response.content
|
||||
|
||||
# Method 1: Extract from script tag
|
||||
extracted = self.extract_voe_from_script(html)
|
||||
if extracted:
|
||||
return extracted, self.Header
|
||||
|
||||
# Method 2: Extract from base64 encoded variable
|
||||
htmlText = html.decode("utf-8")
|
||||
b64_match = B64_PATTERN.search(htmlText)
|
||||
if b64_match:
|
||||
decoded = base64.b64decode(b64_match.group(1)).decode()[::-1]
|
||||
source = json.loads(decoded).get("source")
|
||||
if source:
|
||||
return source, self.Header
|
||||
|
||||
# Method 3: Extract HLS source
|
||||
hls_match = HLS_PATTERN.search(htmlText)
|
||||
if hls_match:
|
||||
decoded_hls = base64.b64decode(hls_match.group("hls")).decode()
|
||||
return decoded_hls, self.Header
|
||||
|
||||
raise ValueError("Could not extract download link from VOE")
|
||||
|
||||
def shift_letters(self, input_str: str) -> str:
|
||||
"""Apply ROT13 shift to letters."""
|
||||
result = ""
|
||||
for c in input_str:
|
||||
code = ord(c)
|
||||
if 65 <= code <= 90:
|
||||
code = (code - 65 + 13) % 26 + 65
|
||||
elif 97 <= code <= 122:
|
||||
code = (code - 97 + 13) % 26 + 97
|
||||
result += chr(code)
|
||||
return result
|
||||
|
||||
def replace_junk(self, input_str: str) -> str:
|
||||
"""Replace junk character sequences."""
|
||||
junk_parts = ["@$", "^^", "~@", "%?", "*~", "!!", "#&"]
|
||||
for part in junk_parts:
|
||||
input_str = re.sub(re.escape(part), "_", input_str)
|
||||
return input_str
|
||||
|
||||
def shift_back(self, s: str, n: int) -> str:
|
||||
"""Shift characters back by n positions."""
|
||||
return "".join(chr(ord(c) - n) for c in s)
|
||||
|
||||
def decode_voe_string(self, encoded: str) -> dict:
|
||||
"""Decode VOE-encoded string to extract video source."""
|
||||
step1 = self.shift_letters(encoded)
|
||||
step2 = self.replace_junk(step1).replace("_", "")
|
||||
step3 = base64.b64decode(step2).decode()
|
||||
step4 = self.shift_back(step3, 3)
|
||||
step5 = base64.b64decode(step4[::-1]).decode()
|
||||
return json.loads(step5)
|
||||
|
||||
def extract_voe_from_script(self, html: bytes) -> str:
|
||||
"""Extract download link from VOE script tag."""
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
script = soup.find("script", type="application/json")
|
||||
return self.decode_voe_string(script.text[2:-2])["source"]
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user