989 lines
36 KiB
Python
989 lines
36 KiB
Python
"""
|
||
Enhanced AniWorld Loader with Error Handling and Recovery
|
||
|
||
This module extends the original AniWorldLoader with comprehensive
|
||
error handling, retry mechanisms, and recovery strategies.
|
||
"""
|
||
|
||
import html
|
||
import json
|
||
import logging
|
||
import os
|
||
import re
|
||
import shutil
|
||
from pathlib import Path
|
||
from typing import Any, Callable, Dict, Optional
|
||
from urllib.parse import quote
|
||
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
from fake_useragent import UserAgent
|
||
from requests.adapters import HTTPAdapter
|
||
from urllib3.util.retry import Retry
|
||
from yt_dlp import YoutubeDL
|
||
|
||
from ...infrastructure.security.file_integrity import get_integrity_manager
|
||
from ..error_handler import (
|
||
DownloadError,
|
||
NetworkError,
|
||
NonRetryableError,
|
||
RetryableError,
|
||
file_corruption_detector,
|
||
recovery_strategies,
|
||
with_error_recovery,
|
||
)
|
||
from ..interfaces.providers import Providers
|
||
from .base_provider import Loader
|
||
from .provider_config import (
|
||
ANIWORLD_HEADERS,
|
||
DEFAULT_PROVIDERS,
|
||
INVALID_PATH_CHARS,
|
||
LULUVDO_USER_AGENT,
|
||
ProviderType,
|
||
)
|
||
|
||
|
||
def _cleanup_temp_file(
|
||
temp_path: str,
|
||
logger: Optional[logging.Logger] = None,
|
||
) -> None:
|
||
"""Remove a temp file and any associated yt-dlp partial files.
|
||
|
||
Args:
|
||
temp_path: Path to the primary temp file.
|
||
logger: Optional logger for diagnostic messages.
|
||
"""
|
||
_log = logger or logging.getLogger(__name__)
|
||
candidates = [temp_path]
|
||
# yt-dlp creates fragment files like <file>.part
|
||
candidates.extend(
|
||
str(p) for p in Path(temp_path).parent.glob(
|
||
Path(temp_path).name + ".*"
|
||
)
|
||
)
|
||
for path in candidates:
|
||
if os.path.exists(path):
|
||
try:
|
||
os.remove(path)
|
||
_log.debug(f"Removed temp file: {path}")
|
||
except OSError as exc:
|
||
_log.warning(f"Failed to remove temp file {path}: {exc}")
|
||
|
||
|
||
class EnhancedAniWorldLoader(Loader):
|
||
"""Aniworld provider with retry and recovery strategies.
|
||
|
||
Also exposes metrics hooks for download statistics.
|
||
"""
|
||
|
||
def __init__(self) -> None:
|
||
super().__init__()
|
||
self.logger = logging.getLogger(__name__)
|
||
self.SUPPORTED_PROVIDERS = DEFAULT_PROVIDERS
|
||
# local copy so modifications don't mutate shared constant
|
||
self.AniworldHeaders = dict(ANIWORLD_HEADERS)
|
||
self.INVALID_PATH_CHARS = INVALID_PATH_CHARS
|
||
self.RANDOM_USER_AGENT = UserAgent().random
|
||
self.LULUVDO_USER_AGENT = LULUVDO_USER_AGENT
|
||
|
||
self.PROVIDER_HEADERS = {
|
||
ProviderType.VIDMOLY.value: ['Referer: "https://vidmoly.to"'],
|
||
ProviderType.DOODSTREAM.value: ['Referer: "https://dood.li/"'],
|
||
ProviderType.VOE.value: [f'User-Agent: {self.RANDOM_USER_AGENT}'],
|
||
ProviderType.LULUVDO.value: [
|
||
f'User-Agent: {self.LULUVDO_USER_AGENT}',
|
||
"Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
|
||
'Origin: "https://luluvdo.com"',
|
||
'Referer: "https://luluvdo.com/"',
|
||
],
|
||
}
|
||
|
||
self.ANIWORLD_TO = "https://aniworld.to"
|
||
self.DEFAULT_REQUEST_TIMEOUT = 30
|
||
|
||
# Initialize session with enhanced retry configuration
|
||
self.session = self._create_robust_session()
|
||
|
||
# Cache dictionaries
|
||
self._KeyHTMLDict = {}
|
||
self._EpisodeHTMLDict = {}
|
||
|
||
# Provider manager
|
||
self.Providers = Providers()
|
||
|
||
# Download statistics
|
||
self.download_stats = {
|
||
'total_downloads': 0,
|
||
'successful_downloads': 0,
|
||
'failed_downloads': 0,
|
||
'retried_downloads': 0
|
||
}
|
||
|
||
# Read timeout from environment variable (string->int safely)
|
||
self.download_timeout = int(os.getenv("DOWNLOAD_TIMEOUT") or "600")
|
||
|
||
# Setup logging
|
||
self._setup_logging()
|
||
|
||
def _create_robust_session(self) -> requests.Session:
|
||
"""Create a session with robust retry and error handling
|
||
configuration.
|
||
"""
|
||
session = requests.Session()
|
||
|
||
# Configure retries so transient network problems are retried while we
|
||
# still fail fast on permanent errors. The status codes cover
|
||
# timeouts, rate limits, and the Cloudflare-origin 52x responses that
|
||
# AniWorld occasionally emits under load.
|
||
retries = Retry(
|
||
total=5,
|
||
backoff_factor=2, # More aggressive backoff
|
||
status_forcelist=[
|
||
408,
|
||
429,
|
||
500,
|
||
502,
|
||
503,
|
||
504,
|
||
520,
|
||
521,
|
||
522,
|
||
523,
|
||
524,
|
||
],
|
||
allowed_methods=["GET", "POST", "HEAD"],
|
||
raise_on_status=False, # Handle status errors manually
|
||
)
|
||
|
||
adapter = HTTPAdapter(
|
||
max_retries=retries,
|
||
pool_connections=10,
|
||
pool_maxsize=20,
|
||
pool_block=True
|
||
)
|
||
|
||
session.mount("https://", adapter)
|
||
session.mount("http://", adapter)
|
||
|
||
# Set default headers
|
||
session.headers.update(self.AniworldHeaders)
|
||
|
||
return session
|
||
|
||
def _setup_logging(self):
|
||
"""Setup specialized logging for download errors and missing keys."""
|
||
# Download error logger
|
||
self.download_error_logger = logging.getLogger("DownloadErrors")
|
||
download_error_handler = logging.FileHandler(
|
||
"../../download_errors.log"
|
||
)
|
||
download_error_handler.setLevel(logging.ERROR)
|
||
download_error_formatter = logging.Formatter(
|
||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||
)
|
||
download_error_handler.setFormatter(download_error_formatter)
|
||
|
||
if not self.download_error_logger.handlers:
|
||
self.download_error_logger.addHandler(download_error_handler)
|
||
self.download_error_logger.setLevel(logging.ERROR)
|
||
|
||
# No key found logger
|
||
self.nokey_logger = logging.getLogger("NoKeyFound")
|
||
nokey_handler = logging.FileHandler("../../NoKeyFound.log")
|
||
nokey_handler.setLevel(logging.ERROR)
|
||
nokey_handler.setFormatter(download_error_formatter)
|
||
|
||
if not self.nokey_logger.handlers:
|
||
self.nokey_logger.addHandler(nokey_handler)
|
||
self.nokey_logger.setLevel(logging.ERROR)
|
||
|
||
def ClearCache(self):
|
||
"""Clear all cached data."""
|
||
self._KeyHTMLDict.clear()
|
||
self._EpisodeHTMLDict.clear()
|
||
self.logger.debug("Cache cleared")
|
||
|
||
def RemoveFromCache(self):
|
||
"""Remove episode HTML cache."""
|
||
self._EpisodeHTMLDict.clear()
|
||
self.logger.debug("Episode cache cleared")
|
||
|
||
@with_error_recovery(max_retries=3, context="anime_search")
|
||
def Search(self, word: str) -> list:
|
||
"""Search for anime with error handling."""
|
||
if not word or not word.strip():
|
||
raise ValueError("Search term cannot be empty")
|
||
|
||
search_url = (
|
||
f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
|
||
)
|
||
|
||
try:
|
||
return self._fetch_anime_list_with_recovery(search_url)
|
||
except Exception as e:
|
||
self.logger.error(f"Search failed for term '{word}': {e}")
|
||
raise RetryableError(f"Search failed: {e}") from e
|
||
|
||
def _fetch_anime_list_with_recovery(self, url: str) -> list:
|
||
"""Fetch anime list with comprehensive error handling."""
|
||
try:
|
||
response = recovery_strategies.handle_network_failure(
|
||
self.session.get,
|
||
url,
|
||
timeout=self.DEFAULT_REQUEST_TIMEOUT
|
||
)
|
||
|
||
if not response.ok:
|
||
if response.status_code == 404:
|
||
raise NonRetryableError(f"URL not found: {url}")
|
||
elif response.status_code == 403:
|
||
raise NonRetryableError(f"Access forbidden: {url}")
|
||
elif response.status_code >= 500:
|
||
# Log suspicious server errors for monitoring
|
||
self.logger.warning(
|
||
f"Server error {response.status_code} from {url} "
|
||
f"- will retry"
|
||
)
|
||
raise RetryableError(f"Server error {response.status_code}")
|
||
else:
|
||
raise RetryableError(f"HTTP error {response.status_code}")
|
||
|
||
return self._parse_anime_response(response.text)
|
||
|
||
except (requests.RequestException, ConnectionError) as e:
|
||
raise NetworkError(f"Network error during anime search: {e}") from e
|
||
|
||
def _parse_anime_response(self, response_text: str) -> list:
|
||
"""Parse anime search response with error handling."""
|
||
if not response_text or not response_text.strip():
|
||
raise ValueError("Empty response from server")
|
||
|
||
clean_text = response_text.strip()
|
||
|
||
# Quick fail for obviously non-JSON responses
|
||
if not (clean_text.startswith('[') or clean_text.startswith('{')):
|
||
# Check if it's HTML error page
|
||
if clean_text.lower().startswith('<!doctype') or \
|
||
clean_text.lower().startswith('<html'):
|
||
raise ValueError("Received HTML instead of JSON")
|
||
# If doesn't start with JSON markers, likely not JSON
|
||
self.logger.warning(
|
||
"Response doesn't start with JSON markers, "
|
||
"attempting parse anyway"
|
||
)
|
||
|
||
# Attempt increasingly permissive parsing strategies to cope with
|
||
# upstream anomalies such as HTML escaping, stray BOM markers, and
|
||
# injected control characters.
|
||
parsing_strategies = [
|
||
lambda text: json.loads(html.unescape(text)),
|
||
lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
|
||
lambda text: json.loads(re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text))
|
||
]
|
||
|
||
for i, strategy in enumerate(parsing_strategies):
|
||
try:
|
||
decoded_data = strategy(clean_text)
|
||
if isinstance(decoded_data, list):
|
||
msg = (
|
||
f"Successfully parsed anime response with "
|
||
f"strategy {i + 1}"
|
||
)
|
||
self.logger.debug(msg)
|
||
return decoded_data
|
||
else:
|
||
msg = (
|
||
f"Strategy {i + 1} returned non-list data: "
|
||
f"{type(decoded_data)}"
|
||
)
|
||
self.logger.warning(msg)
|
||
except json.JSONDecodeError as e:
|
||
msg = f"Parsing strategy {i + 1} failed: {e}"
|
||
self.logger.debug(msg)
|
||
continue
|
||
|
||
raise ValueError(
|
||
"Could not parse anime search response with any strategy"
|
||
)
|
||
|
||
def _GetLanguageKey(self, language: str) -> int:
|
||
"""Get numeric language code."""
|
||
language_map = {
|
||
"German Dub": 1,
|
||
"English Sub": 2,
|
||
"German Sub": 3,
|
||
}
|
||
return language_map.get(language, 0)
|
||
|
||
@with_error_recovery(max_retries=2, context="language_check")
|
||
def IsLanguage(
|
||
self,
|
||
season: int,
|
||
episode: int,
|
||
key: str,
|
||
language: str = "German Dub",
|
||
) -> bool:
|
||
"""Check if episode is available in specified language."""
|
||
try:
|
||
languageCode = self._GetLanguageKey(language)
|
||
if languageCode == 0:
|
||
raise ValueError(f"Unknown language: {language}")
|
||
|
||
episode_response = self._GetEpisodeHTML(season, episode, key)
|
||
soup = BeautifulSoup(episode_response.content, "html.parser")
|
||
|
||
lang_box = soup.find("div", class_="changeLanguageBox")
|
||
if not lang_box:
|
||
debug_msg = (
|
||
f"No language box found for {key} S{season}E{episode}"
|
||
)
|
||
self.logger.debug(debug_msg)
|
||
return False
|
||
|
||
img_tags = lang_box.find_all("img")
|
||
available_languages = []
|
||
|
||
for img in img_tags:
|
||
lang_key = img.get("data-lang-key")
|
||
if lang_key and lang_key.isdigit():
|
||
available_languages.append(int(lang_key))
|
||
|
||
is_available = languageCode in available_languages
|
||
debug_msg = (
|
||
f"Language check for {key} S{season}E{episode}: "
|
||
f"Requested={languageCode}, "
|
||
f"Available={available_languages}, "
|
||
f"Result={is_available}"
|
||
)
|
||
self.logger.debug(debug_msg)
|
||
|
||
return is_available
|
||
|
||
except Exception as e:
|
||
error_msg = (
|
||
f"Language check failed for {key} S{season}E{episode}: {e}"
|
||
)
|
||
self.logger.error(error_msg)
|
||
raise RetryableError(f"Language check failed: {e}") from e
|
||
|
||
def Download(
|
||
self,
|
||
baseDirectory: str,
|
||
serieFolder: str,
|
||
season: int,
|
||
episode: int,
|
||
key: str,
|
||
language: str = "German Dub",
|
||
progress_callback: Optional[Callable] = None,
|
||
) -> bool:
|
||
"""Download episode with comprehensive error handling.
|
||
|
||
Args:
|
||
baseDirectory: Base download directory path
|
||
serieFolder: Filesystem folder name (metadata only, used for
|
||
file path construction)
|
||
season: Season number (0 for movies)
|
||
episode: Episode number
|
||
key: Series unique identifier from provider (used for
|
||
identification and API calls)
|
||
language: Audio language preference (default: German Dub)
|
||
progress_callback: Optional callback for download progress
|
||
updates
|
||
|
||
Returns:
|
||
bool: True if download succeeded, False otherwise
|
||
|
||
Raises:
|
||
DownloadError: If download fails after all retry attempts
|
||
ValueError: If required parameters are missing or invalid
|
||
"""
|
||
self.download_stats["total_downloads"] += 1
|
||
|
||
try:
|
||
# Validate inputs
|
||
if not all([baseDirectory, serieFolder, key]):
|
||
raise ValueError("Missing required parameters for download")
|
||
|
||
if season < 0 or episode < 0:
|
||
raise ValueError("Season and episode must be non-negative")
|
||
|
||
# Prepare file paths
|
||
sanitized_anime_title = "".join(
|
||
char
|
||
for char in self.GetTitle(key)
|
||
if char not in self.INVALID_PATH_CHARS
|
||
)
|
||
|
||
if not sanitized_anime_title:
|
||
sanitized_anime_title = f"Unknown_{key}"
|
||
|
||
# Generate output filename
|
||
if season == 0:
|
||
output_file = (
|
||
f"{sanitized_anime_title} - Movie {episode:02} - "
|
||
f"({language}).mp4"
|
||
)
|
||
else:
|
||
output_file = (
|
||
f"{sanitized_anime_title} - S{season:02}E{episode:03} - "
|
||
f"({language}).mp4"
|
||
)
|
||
|
||
# Create directory structure
|
||
folder_path = os.path.join(
|
||
baseDirectory, serieFolder, f"Season {season}"
|
||
)
|
||
output_path = os.path.join(folder_path, output_file)
|
||
|
||
# Check if file already exists and is valid
|
||
if os.path.exists(output_path):
|
||
is_valid = file_corruption_detector.is_valid_video_file(
|
||
output_path
|
||
)
|
||
|
||
# Also verify checksum if available
|
||
integrity_mgr = get_integrity_manager()
|
||
checksum_valid = True
|
||
if integrity_mgr.has_checksum(Path(output_path)):
|
||
checksum_valid = integrity_mgr.verify_checksum(
|
||
Path(output_path)
|
||
)
|
||
if not checksum_valid:
|
||
self.logger.warning(
|
||
f"Checksum verification failed for {output_file}"
|
||
)
|
||
|
||
if is_valid and checksum_valid:
|
||
msg = (
|
||
f"File already exists and is valid: "
|
||
f"{output_file}"
|
||
)
|
||
self.logger.info(msg)
|
||
self.download_stats["successful_downloads"] += 1
|
||
return True
|
||
else:
|
||
warning_msg = (
|
||
f"Existing file appears corrupted, removing: "
|
||
f"{output_path}"
|
||
)
|
||
self.logger.warning(warning_msg)
|
||
try:
|
||
os.remove(output_path)
|
||
# Remove checksum entry
|
||
integrity_mgr.remove_checksum(Path(output_path))
|
||
except OSError as e:
|
||
error_msg = f"Failed to remove corrupted file: {e}"
|
||
self.logger.error(error_msg)
|
||
|
||
os.makedirs(folder_path, exist_ok=True)
|
||
|
||
# Create temp directory
|
||
temp_dir = "./Temp/"
|
||
os.makedirs(temp_dir, exist_ok=True)
|
||
temp_path = os.path.join(temp_dir, output_file)
|
||
|
||
# Attempt download with recovery strategies
|
||
success = self._download_with_recovery(
|
||
season,
|
||
episode,
|
||
key,
|
||
language,
|
||
temp_path,
|
||
output_path,
|
||
progress_callback,
|
||
)
|
||
|
||
if success:
|
||
self.download_stats["successful_downloads"] += 1
|
||
success_msg = f"Successfully downloaded: {output_file}"
|
||
self.logger.info(success_msg)
|
||
else:
|
||
self.download_stats["failed_downloads"] += 1
|
||
fail_msg = (
|
||
f"Download failed for {key} S{season}E{episode} "
|
||
f"({language})"
|
||
)
|
||
self.download_error_logger.error(fail_msg)
|
||
|
||
return success
|
||
|
||
except Exception as e:
|
||
self.download_stats["failed_downloads"] += 1
|
||
err_msg = (
|
||
f"Download error for {key} S{season}E{episode}: {e}"
|
||
)
|
||
self.download_error_logger.error(err_msg, exc_info=True)
|
||
raise DownloadError(f"Download failed: {e}") from e
|
||
finally:
|
||
self.ClearCache()
|
||
|
||
def _download_with_recovery(
|
||
self,
|
||
season: int,
|
||
episode: int,
|
||
key: str,
|
||
language: str,
|
||
temp_path: str,
|
||
output_path: str,
|
||
progress_callback: Optional[Callable],
|
||
) -> bool:
|
||
"""Attempt download with multiple providers and recovery."""
|
||
|
||
for provider_name in self.SUPPORTED_PROVIDERS:
|
||
try:
|
||
info_msg = (
|
||
f"Attempting download with provider: {provider_name}"
|
||
)
|
||
self.logger.info(info_msg)
|
||
|
||
# Get download link and headers for provider
|
||
link, headers = recovery_strategies.handle_network_failure(
|
||
self._get_direct_link_from_provider,
|
||
season,
|
||
episode,
|
||
key,
|
||
language,
|
||
)
|
||
|
||
if not link:
|
||
warn_msg = (
|
||
f"No download link found for provider: "
|
||
f"{provider_name}"
|
||
)
|
||
self.logger.warning(warn_msg)
|
||
continue
|
||
|
||
# Configure yt-dlp options
|
||
ydl_opts = {
|
||
"fragment_retries": float("inf"),
|
||
"outtmpl": temp_path,
|
||
"quiet": True,
|
||
"no_warnings": True,
|
||
"progress_with_newline": False,
|
||
"nocheckcertificate": True,
|
||
"socket_timeout": self.download_timeout,
|
||
"http_chunk_size": 1024 * 1024, # 1MB chunks
|
||
}
|
||
if headers:
|
||
ydl_opts['http_headers'] = headers
|
||
|
||
if progress_callback:
|
||
ydl_opts['progress_hooks'] = [progress_callback]
|
||
|
||
# Perform download with recovery
|
||
success = recovery_strategies.handle_download_failure(
|
||
self._perform_ytdl_download,
|
||
temp_path,
|
||
ydl_opts,
|
||
link
|
||
)
|
||
|
||
if success and os.path.exists(temp_path):
|
||
# Verify downloaded file
|
||
if file_corruption_detector.is_valid_video_file(temp_path):
|
||
# Move to final location
|
||
# Use copyfile instead of copy2 to avoid metadata permission issues
|
||
shutil.copyfile(temp_path, output_path)
|
||
|
||
# Calculate and store checksum for integrity
|
||
integrity_mgr = get_integrity_manager()
|
||
try:
|
||
checksum = integrity_mgr.store_checksum(
|
||
Path(output_path)
|
||
)
|
||
filename = Path(output_path).name
|
||
self.logger.info(
|
||
f"Stored checksum for {filename}: "
|
||
f"{checksum[:16]}..."
|
||
)
|
||
except Exception as e:
|
||
self.logger.warning(
|
||
f"Failed to store checksum: {e}"
|
||
)
|
||
|
||
# Clean up temp file
|
||
try:
|
||
os.remove(temp_path)
|
||
except Exception as e:
|
||
warn_msg = f"Failed to remove temp file: {e}"
|
||
self.logger.warning(warn_msg)
|
||
|
||
return True
|
||
else:
|
||
warn_msg = (
|
||
f"Downloaded file failed validation: "
|
||
f"{temp_path}"
|
||
)
|
||
self.logger.warning(warn_msg)
|
||
try:
|
||
os.remove(temp_path)
|
||
except OSError as e:
|
||
warn_msg = f"Failed to remove temp file: {e}"
|
||
self.logger.warning(warn_msg)
|
||
|
||
except Exception as e:
|
||
self.logger.warning(f"Provider {provider_name} failed: {e}")
|
||
# Clean up any partial temp files left by this failed attempt
|
||
_cleanup_temp_file(temp_path, self.logger)
|
||
self.download_stats['retried_downloads'] += 1
|
||
continue
|
||
|
||
# All providers failed – make sure no temp remnants are left behind
|
||
_cleanup_temp_file(temp_path, self.logger)
|
||
return False
|
||
|
||
def _perform_ytdl_download(
|
||
self, ydl_opts: Dict[str, Any], link: str
|
||
) -> bool:
|
||
"""Perform actual download using yt-dlp."""
|
||
try:
|
||
with YoutubeDL(ydl_opts) as ydl:
|
||
ydl.download([link])
|
||
return True
|
||
except Exception as e:
|
||
self.logger.error(f"yt-dlp download failed: {e}")
|
||
raise DownloadError(f"Download failed: {e}") from e
|
||
|
||
@with_error_recovery(max_retries=2, context="get_title")
|
||
def GetTitle(self, key: str) -> str:
|
||
"""Get anime title with error handling."""
|
||
try:
|
||
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
|
||
title_div = soup.find('div', class_='series-title')
|
||
|
||
if title_div:
|
||
title_span = title_div.find('h1')
|
||
if title_span:
|
||
span = title_span.find('span')
|
||
if span:
|
||
return span.text.strip()
|
||
|
||
self.logger.warning(f"Could not extract title for key: {key}")
|
||
return f"Unknown_Title_{key}"
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Failed to get title for key {key}: {e}")
|
||
raise RetryableError(f"Title extraction failed: {e}") from e
|
||
|
||
def GetSiteKey(self) -> str:
|
||
"""Get site identifier."""
|
||
return "aniworld.to"
|
||
|
||
@with_error_recovery(max_retries=2, context="get_key_html")
|
||
def _GetKeyHTML(self, key: str):
|
||
"""Get cached HTML for anime key."""
|
||
if key in self._KeyHTMLDict:
|
||
return self._KeyHTMLDict[key]
|
||
|
||
try:
|
||
url = f"{self.ANIWORLD_TO}/anime/stream/{key}"
|
||
response = recovery_strategies.handle_network_failure(
|
||
self.session.get,
|
||
url,
|
||
timeout=self.DEFAULT_REQUEST_TIMEOUT
|
||
)
|
||
|
||
if not response.ok:
|
||
if response.status_code == 404:
|
||
msg = f"Anime key not found: {key}"
|
||
self.nokey_logger.error(msg)
|
||
raise NonRetryableError(msg)
|
||
else:
|
||
err_msg = (
|
||
f"HTTP error {response.status_code} for key {key}"
|
||
)
|
||
raise RetryableError(err_msg)
|
||
|
||
self._KeyHTMLDict[key] = response
|
||
return self._KeyHTMLDict[key]
|
||
|
||
except Exception as e:
|
||
error_msg = f"Failed to get HTML for key {key}: {e}"
|
||
self.logger.error(error_msg)
|
||
raise
|
||
|
||
@with_error_recovery(max_retries=2, context="get_episode_html")
|
||
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
|
||
"""Get cached HTML for specific episode.
|
||
|
||
Args:
|
||
season: Season number (must be 1-999)
|
||
episode: Episode number (must be 1-9999)
|
||
key: Series identifier (should be non-empty)
|
||
|
||
Returns:
|
||
Cached or fetched HTML response
|
||
|
||
Raises:
|
||
ValueError: If parameters are invalid
|
||
NonRetryableError: If episode not found (404)
|
||
RetryableError: If HTTP error occurs
|
||
"""
|
||
# Validate parameters
|
||
if not key or not key.strip():
|
||
raise ValueError("Series key cannot be empty")
|
||
if season < 1 or season > 999:
|
||
raise ValueError(
|
||
f"Invalid season number: {season} (must be 1-999)"
|
||
)
|
||
if episode < 1 or episode > 9999:
|
||
raise ValueError(
|
||
f"Invalid episode number: {episode} (must be 1-9999)"
|
||
)
|
||
|
||
cache_key = (key, season, episode)
|
||
if cache_key in self._EpisodeHTMLDict:
|
||
return self._EpisodeHTMLDict[cache_key]
|
||
|
||
try:
|
||
url = (
|
||
f"{self.ANIWORLD_TO}/anime/stream/{key}/"
|
||
f"staffel-{season}/episode-{episode}"
|
||
)
|
||
response = recovery_strategies.handle_network_failure(
|
||
self.session.get, url, timeout=self.DEFAULT_REQUEST_TIMEOUT
|
||
)
|
||
|
||
if not response.ok:
|
||
if response.status_code == 404:
|
||
err_msg = (
|
||
f"Episode not found: {key} S{season}E{episode}"
|
||
)
|
||
raise NonRetryableError(err_msg)
|
||
else:
|
||
err_msg = (
|
||
f"HTTP error {response.status_code} for episode"
|
||
)
|
||
raise RetryableError(err_msg)
|
||
|
||
self._EpisodeHTMLDict[cache_key] = response
|
||
return self._EpisodeHTMLDict[cache_key]
|
||
|
||
except Exception as e:
|
||
error_msg = (
|
||
f"Failed to get episode HTML for {key} "
|
||
f"S{season}E{episode}: {e}"
|
||
)
|
||
self.logger.error(error_msg)
|
||
raise
|
||
|
||
def _get_provider_from_html(
|
||
self, season: int, episode: int, key: str
|
||
) -> dict:
|
||
"""Extract providers from HTML with error handling."""
|
||
try:
|
||
episode_html = self._GetEpisodeHTML(season, episode, key)
|
||
soup = BeautifulSoup(episode_html.content, "html.parser")
|
||
providers: dict[str, dict] = {}
|
||
|
||
episode_links = soup.find_all(
|
||
"li", class_=lambda x: x and x.startswith("episodeLink")
|
||
)
|
||
|
||
if not episode_links:
|
||
warn_msg = (
|
||
f"No episode links found for {key} S{season}E{episode}"
|
||
)
|
||
self.logger.warning(warn_msg)
|
||
return providers
|
||
|
||
for link in episode_links:
|
||
provider_name_tag = link.find("h4")
|
||
provider_name = (
|
||
provider_name_tag.text.strip()
|
||
if provider_name_tag
|
||
else None
|
||
)
|
||
|
||
redirect_link_tag = link.find("a", class_="watchEpisode")
|
||
redirect_link = (
|
||
redirect_link_tag["href"]
|
||
if redirect_link_tag
|
||
else None
|
||
)
|
||
|
||
lang_key = link.get("data-lang-key")
|
||
lang_key = (
|
||
int(lang_key)
|
||
if lang_key and lang_key.isdigit()
|
||
else None
|
||
)
|
||
|
||
if provider_name and redirect_link and lang_key:
|
||
if provider_name not in providers:
|
||
providers[provider_name] = {}
|
||
providers[provider_name][lang_key] = (
|
||
f"{self.ANIWORLD_TO}{redirect_link}"
|
||
)
|
||
|
||
debug_msg = (
|
||
f"Found {len(providers)} providers for "
|
||
f"{key} S{season}E{episode}"
|
||
)
|
||
self.logger.debug(debug_msg)
|
||
return providers
|
||
|
||
except Exception as e:
|
||
error_msg = f"Failed to parse providers from HTML: {e}"
|
||
self.logger.error(error_msg)
|
||
raise RetryableError(f"Provider parsing failed: {e}") from e
|
||
|
||
def _get_redirect_link(
|
||
self,
|
||
season: int,
|
||
episode: int,
|
||
key: str,
|
||
language: str = "German Dub",
|
||
):
|
||
"""Get redirect link for episode with error handling."""
|
||
languageCode = self._GetLanguageKey(language)
|
||
|
||
if not self.IsLanguage(season, episode, key, language):
|
||
err_msg = (
|
||
f"Language {language} not available for "
|
||
f"{key} S{season}E{episode}"
|
||
)
|
||
raise NonRetryableError(err_msg)
|
||
|
||
providers = self._get_provider_from_html(season, episode, key)
|
||
|
||
for provider_name, lang_dict in providers.items():
|
||
if languageCode in lang_dict:
|
||
return lang_dict[languageCode], provider_name
|
||
|
||
err_msg = (
|
||
f"No provider found for {language} in "
|
||
f"{key} S{season}E{episode}"
|
||
)
|
||
raise NonRetryableError(err_msg)
|
||
|
||
def _get_embeded_link(
|
||
self,
|
||
season: int,
|
||
episode: int,
|
||
key: str,
|
||
language: str = "German Dub",
|
||
):
|
||
"""Get embedded link with error handling."""
|
||
try:
|
||
redirect_link, provider_name = self._get_redirect_link(
|
||
season, episode, key, language
|
||
)
|
||
|
||
response = recovery_strategies.handle_network_failure(
|
||
self.session.get,
|
||
redirect_link,
|
||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||
headers={"User-Agent": self.RANDOM_USER_AGENT},
|
||
)
|
||
|
||
return response.url
|
||
|
||
except Exception as e:
|
||
error_msg = f"Failed to get embedded link: {e}"
|
||
self.logger.error(error_msg)
|
||
raise
|
||
|
||
def _get_direct_link_from_provider(
|
||
self,
|
||
season: int,
|
||
episode: int,
|
||
key: str,
|
||
language: str = "German Dub",
|
||
):
|
||
"""Get direct download link from provider."""
|
||
try:
|
||
embedded_link = self._get_embeded_link(
|
||
season, episode, key, language
|
||
)
|
||
if not embedded_link:
|
||
raise NonRetryableError("No embedded link found")
|
||
|
||
# Use VOE provider as default (could be made configurable)
|
||
provider = self.Providers.GetProvider("VOE")
|
||
if not provider:
|
||
raise NonRetryableError("VOE provider not available")
|
||
|
||
return provider.get_link(
|
||
embedded_link, self.DEFAULT_REQUEST_TIMEOUT
|
||
)
|
||
|
||
except Exception as e:
|
||
error_msg = f"Failed to get direct link from provider: {e}"
|
||
self.logger.error(error_msg)
|
||
raise
|
||
|
||
@with_error_recovery(max_retries=2, context="get_season_episode_count")
|
||
def get_season_episode_count(self, slug: str) -> dict:
|
||
"""Get episode count per season with error handling."""
|
||
try:
|
||
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"
|
||
response = recovery_strategies.handle_network_failure(
|
||
requests.get,
|
||
base_url,
|
||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||
)
|
||
|
||
soup = BeautifulSoup(response.content, "html.parser")
|
||
|
||
season_meta = soup.find("meta", itemprop="numberOfSeasons")
|
||
number_of_seasons = (
|
||
int(season_meta["content"]) if season_meta else 0
|
||
)
|
||
|
||
episode_counts = {}
|
||
|
||
for season in range(1, number_of_seasons + 1):
|
||
season_url = f"{base_url}staffel-{season}"
|
||
season_response = (
|
||
recovery_strategies.handle_network_failure(
|
||
requests.get,
|
||
season_url,
|
||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||
)
|
||
)
|
||
|
||
season_soup = BeautifulSoup(
|
||
season_response.content, "html.parser"
|
||
)
|
||
|
||
episode_links = season_soup.find_all("a", href=True)
|
||
unique_links = set(
|
||
link["href"]
|
||
for link in episode_links
|
||
if f"staffel-{season}/episode-" in link['href']
|
||
)
|
||
|
||
episode_counts[season] = len(unique_links)
|
||
|
||
return episode_counts
|
||
|
||
except Exception as e:
|
||
self.logger.error(f"Failed to get episode counts for {slug}: {e}")
|
||
raise RetryableError(f"Episode count retrieval failed: {e}") from e
|
||
|
||
def get_download_statistics(self) -> Dict[str, Any]:
|
||
"""Get download statistics."""
|
||
stats = self.download_stats.copy()
|
||
stats['success_rate'] = (
|
||
(stats['successful_downloads'] / stats['total_downloads'] * 100)
|
||
if stats['total_downloads'] > 0 else 0
|
||
)
|
||
return stats
|
||
|
||
def reset_statistics(self):
|
||
"""Reset download statistics."""
|
||
self.download_stats = {
|
||
'total_downloads': 0,
|
||
'successful_downloads': 0,
|
||
'failed_downloads': 0,
|
||
'retried_downloads': 0
|
||
}
|
||
|
||
|
||
# For backward compatibility, create wrapper that uses enhanced loader
|
||
class AniworldLoader(EnhancedAniWorldLoader):
|
||
"""Backward compatibility wrapper for the enhanced loader."""
|
||
|
||
pass
|