import html import json import logging import os import re import shutil import threading from pathlib import Path from urllib.parse import quote import chardet import requests from bs4 import BeautifulSoup from events import Events from fake_useragent import UserAgent from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry from yt_dlp import YoutubeDL from yt_dlp.utils import DownloadCancelled from ..interfaces.providers import Providers from .base_provider import Loader def _cleanup_temp_file(temp_path: str) -> None: """Clean up a temp file and any associated partial download files. Removes the temp file itself and any yt-dlp partial files (e.g. ``.part``) that may have been left behind. Args: temp_path: Absolute or relative path to the temp file. """ paths_to_remove = [temp_path] # yt-dlp writes partial fragments to .part paths_to_remove.extend( str(p) for p in Path(temp_path).parent.glob( Path(temp_path).name + ".*" ) ) for path in paths_to_remove: if os.path.exists(path): try: os.remove(path) logger.debug("Removed temp file: %s", path) except OSError as exc: logger.warning("Failed to remove temp file %s: %s", path, exc) # Imported shared provider configuration from .provider_config import ( ANIWORLD_HEADERS, DEFAULT_DOWNLOAD_TIMEOUT, DEFAULT_PROVIDERS, INVALID_PATH_CHARS, LULUVDO_USER_AGENT, ProviderType, ) logger = logging.getLogger(__name__) # Configure persistent loggers but don't add duplicate handlers when module # is imported multiple times (common in test environments). # Use absolute paths for log files to prevent security issues # Determine project root (assuming this file is in src/core/providers/) _module_dir = Path(__file__).parent _project_root = _module_dir.parent.parent.parent _logs_dir = _project_root / "logs" # Ensure logs directory exists _logs_dir.mkdir(parents=True, exist_ok=True) download_error_logger = logging.getLogger("DownloadErrors") if not download_error_logger.handlers: log_path = _logs_dir / "download_errors.log" download_error_handler = logging.FileHandler(str(log_path)) download_error_handler.setLevel(logging.ERROR) download_error_logger.addHandler(download_error_handler) noKeyFound_logger = logging.getLogger() def _decode_html_content(content: bytes) -> str: """Decode HTML content with encoding detection. Uses chardet to detect the actual encoding of the content, falling back to utf-8 with replacement error handling. Args: content: Raw HTML bytes from the response Returns: Decoded string content """ detected = chardet.detect(content) encoding = detected.get('encoding', 'utf-8') confidence = detected.get('confidence', 0) if confidence < 0.7: logger.debug( "Low encoding confidence (%.2f) for detected encoding '%s', using utf-8", confidence, encoding ) encoding = 'utf-8' try: return content.decode(encoding, errors='replace') except Exception as exc: logger.warning("Failed to decode content with %s: %s, using utf-8 replace", encoding, exc) return content.decode('utf-8', errors='replace') class AniworldLoader(Loader): def __init__(self) -> None: self.SUPPORTED_PROVIDERS = DEFAULT_PROVIDERS # Copy default AniWorld headers so modifications remain local self.AniworldHeaders = dict(ANIWORLD_HEADERS) self.INVALID_PATH_CHARS = INVALID_PATH_CHARS self.RANDOM_USER_AGENT = UserAgent().random self.LULUVDO_USER_AGENT = LULUVDO_USER_AGENT self.PROVIDER_HEADERS = { ProviderType.VIDMOLY.value: ['Referer: "https://vidmoly.to"'], ProviderType.DOODSTREAM.value: [ 'Referer: "https://dood.li/"', 'Referer: "https://playmogo.com/"', ], ProviderType.VOE.value: [f"User-Agent: {self.RANDOM_USER_AGENT}"], ProviderType.LULUVDO.value: [ f"User-Agent: {self.LULUVDO_USER_AGENT}", "Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7", 'Origin: "https://luluvdo.com"', 'Referer: "https://luluvdo.com/"', ], } self.ANIWORLD_TO = "https://aniworld.to" self.session = requests.Session() # Cancellation flag for graceful shutdown self._cancel_flag = threading.Event() # Configure retries with backoff retries = Retry( total=5, # Number of retries backoff_factor=1, # Delay multiplier (1s, 2s, 4s, ...) status_forcelist=[500, 502, 503, 504], allowed_methods=["GET"] ) adapter = HTTPAdapter(max_retries=retries) self.session.mount("https://", adapter) # Default HTTP request timeout used for requests.Session calls. # Allows overriding via DOWNLOAD_TIMEOUT env var at runtime. self.DEFAULT_REQUEST_TIMEOUT = int( os.getenv("DOWNLOAD_TIMEOUT") or DEFAULT_DOWNLOAD_TIMEOUT ) self._KeyHTMLDict = {} self._EpisodeHTMLDict = {} self._YearDict = {} self.Providers = Providers() # Events: download_progress is triggered with progress dict self.events = Events() def subscribe_download_progress(self, handler): """Subscribe a handler to the download_progress event. Args: handler: Callable to be called with progress dict. """ self.events.download_progress += handler def unsubscribe_download_progress(self, handler): """Unsubscribe a handler from the download_progress event. Args: handler: Callable previously subscribed. """ self.events.download_progress -= handler def clear_cache(self): """Clear the cached HTML data.""" logger.debug("Clearing HTML cache") self._KeyHTMLDict = {} self._EpisodeHTMLDict = {} logger.debug("HTML cache cleared successfully") def remove_from_cache(self): """Remove episode HTML from cache.""" logger.debug("Removing episode HTML from cache") self._EpisodeHTMLDict = {} logger.debug("Episode HTML cache cleared") def search(self, word: str) -> list: """Search for anime series. Args: word: Search term Returns: List of found series """ logger.info("Searching for anime with keyword: '%s'", word) search_url = ( f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}" ) logger.debug("Search URL: %s", search_url) anime_list = self.fetch_anime_list(search_url) logger.info("Found %s anime series for keyword '%s'", len(anime_list), word) return anime_list def fetch_anime_list(self, url: str) -> list: logger.debug("Fetching anime list from URL: %s", url) response = self.session.get(url, timeout=self.DEFAULT_REQUEST_TIMEOUT) response.raise_for_status() logger.debug("Response status code: %s", response.status_code) clean_text = response.text.strip() try: decoded_data = json.loads(html.unescape(clean_text)) logger.debug("Successfully decoded JSON data on first attempt") return decoded_data if isinstance(decoded_data, list) else [] except json.JSONDecodeError: logger.warning("Initial JSON decode failed, attempting cleanup") try: # Remove BOM and problematic characters clean_text = clean_text.encode('utf-8').decode('utf-8-sig') # Remove problematic characters clean_text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', clean_text) # Parse the new text decoded_data = json.loads(clean_text) logger.debug("Successfully decoded JSON after cleanup") return decoded_data if isinstance(decoded_data, list) else [] except (requests.RequestException, json.JSONDecodeError) as exc: logger.error("Failed to decode anime list from %s: %s", url, exc) raise ValueError("Could not get valid anime: ") from exc def _get_language_key(self, language: str) -> int: """Convert language name to language code. Language Codes: 1: German Dub 2: English Sub 3: German Sub """ language_code = 0 if language == "German Dub": language_code = 1 if language == "English Sub": language_code = 2 if language == "German Sub": language_code = 3 logger.debug("Converted language '%s' to code %s", language, language_code) return language_code def is_language( self, season: int, episode: int, key: str, language: str = "German Dub" ) -> bool: """Check if episode is available in specified language.""" logger.debug("Checking if S%02dE%03d (%s) is available in %s", season, episode, key, language) language_code = self._get_language_key(language) episode_soup = BeautifulSoup( _decode_html_content(self._get_episode_html(season, episode, key).content), 'html.parser' ) change_language_box_div = episode_soup.find( 'div', class_='changeLanguageBox') languages = [] if change_language_box_div: img_tags = change_language_box_div.find_all('img') for img in img_tags: lang_key = img.get('data-lang-key') if lang_key and lang_key.isdigit(): languages.append(int(lang_key)) is_available = language_code in languages logger.debug("Available languages for S%02dE%03d: %s, requested: %s, available: %s", season, episode, languages, language_code, is_available) return is_available def _check_url_alive( self, url: str, headers: dict | None = None, timeout: int = 10, ) -> bool: """Probe a provider URL with HEAD before committing to yt-dlp. Skips dead providers quickly so the failover loop never blocks waiting for yt-dlp to fail on a 404. Falls back to a streaming GET when HEAD is not allowed by the upstream server. Args: url: URL to probe. headers: Optional headers to forward with the probe. timeout: Per-request timeout (seconds). Returns: True when the URL responds with a non-4xx status, else False. """ try: response = self.session.head( url, headers=headers, timeout=timeout, allow_redirects=True, ) if response.status_code == 405: response = self.session.get( url, headers=headers, timeout=timeout, stream=True, allow_redirects=True, ) response.close() if 400 <= response.status_code < 500: logger.warning( "Provider URL returned HTTP %s: %s", response.status_code, url ) return False return True except requests.RequestException as exc: logger.warning("Provider URL unreachable %s: %s", url, exc) return False def _try_direct_stream( self, link: str, output_path: str, headers: dict | None, timeout: int, ) -> bool: """Stream a direct video URL to disk without yt-dlp. Used as a fast-path when the resolved provider link already points at a downloadable video file (``Content-Type: video/*`` or ``application/octet-stream``). HLS and other non-video payloads are rejected so the caller can fall back to yt-dlp. Args: link: Direct download URL. output_path: Destination file path. headers: Optional HTTP headers. timeout: Per-request timeout (seconds). Returns: True on a successful save, False when the link is not a direct video or the download fails. """ try: with self.session.get( link, headers=headers, timeout=timeout, stream=True, ) as response: if not response.ok: logger.debug( "Direct stream HEAD returned %s for %s", response.status_code, link[:80] ) return False content_type = response.headers.get("Content-Type", "") if not ( content_type.startswith("video/") or content_type == "application/octet-stream" ): logger.debug( "Direct stream skipped, Content-Type=%s", content_type ) return False logger.info( "Direct stream download starting (type=%s)", content_type ) with open(output_path, "wb") as fh: for chunk in response.iter_content(chunk_size=1024 * 1024): if self._cancel_flag.is_set(): logger.info( "Cancellation detected during direct stream" ) return False if chunk: fh.write(chunk) return True except requests.RequestException as exc: logger.warning("Direct stream download failed: %s", exc) return False def download( self, base_directory: str, serie_folder: str, season: int, episode: int, key: str, language: str = "German Dub" ) -> bool: """Download episode to specified directory. Iterates the providers actually advertised on the episode page (ordered by SUPPORTED_PROVIDERS preference), probing each URL before attempting an extraction so dead providers are skipped immediately instead of stalling yt-dlp on a 404. Args: base_directory: Base download directory path serie_folder: Filesystem folder name (metadata only, used for file path construction) season: Season number episode: Episode number key: Series unique identifier from provider (used for identification and API calls) language: Audio language preference (default: German Dub) Returns: bool: True if download succeeded, False otherwise """ logger.info( "Starting download for S%02dE%03d (%s) in %s", season, episode, key, language ) sanitized_anime_title = ''.join( char for char in self.get_title(key) if char not in self.INVALID_PATH_CHARS ) logger.debug("Sanitized anime title: %s", sanitized_anime_title) if season == 0: output_file = ( f"{sanitized_anime_title} - " f"Movie {episode:02} - " f"({language}).mp4" ) else: output_file = ( f"{sanitized_anime_title} - " f"S{season:02}E{episode:03} - " f"({language}).mp4" ) folder_path = os.path.join( os.path.join(base_directory, serie_folder), f"Season {season}" ) output_path = os.path.join(folder_path, output_file) logger.debug("Output path: %s", output_path) os.makedirs(os.path.dirname(output_path), exist_ok=True) temp_dir = "./Temp/" os.makedirs(os.path.dirname(temp_dir), exist_ok=True) temp_path = os.path.join(temp_dir, output_file) logger.debug("Temporary path: %s", temp_path) candidate_providers = self._select_providers_for_episode( season, episode, key, language ) if not candidate_providers: logger.error( "No providers advertised for S%02dE%03d (%s) in %s", season, episode, key, language ) self.clear_cache() return False tried: list[str] = [] for provider_name, redirect_url in candidate_providers: tried.append(provider_name) logger.debug("Attempting download with provider: %s", provider_name) probe_headers = {"User-Agent": self.RANDOM_USER_AGENT} if not self._check_url_alive( redirect_url, headers=probe_headers, timeout=self.DEFAULT_REQUEST_TIMEOUT, ): logger.info( "Skipping provider %s, redirect URL not reachable", provider_name ) continue try: resolved = self._resolve_direct_link( redirect_url, provider_name ) except Exception as exc: logger.warning( "Provider %s link resolution failed: %s: %s", provider_name, type(exc).__name__, exc ) continue if resolved is None: logger.info( "Provider %s returned no direct link", provider_name ) continue link, header = resolved if self._cancel_flag.is_set(): logger.info("Cancellation requested before download start") _cleanup_temp_file(temp_path) self.clear_cache() return False if self._try_direct_stream( link, temp_path, header, self.DEFAULT_REQUEST_TIMEOUT, ) and os.path.exists(temp_path): logger.debug( "Direct stream succeeded with provider %s", provider_name ) shutil.copyfile(temp_path, output_path) os.remove(temp_path) logger.info( "Download completed successfully (direct): %s", output_file ) self.clear_cache() return True _cleanup_temp_file(temp_path) cancel_flag = self._cancel_flag def events_progress_hook(d): if cancel_flag.is_set(): logger.info("Cancellation detected in progress hook") raise DownloadCancelled("Download cancelled by user") self.events.download_progress(d) ydl_opts = { 'fragment_retries': float('inf'), 'outtmpl': temp_path, 'quiet': True, 'no_warnings': True, 'progress_with_newline': False, 'nocheckcertificate': True, 'logger': logger, 'progress_hooks': [events_progress_hook], # yt-dlp defaults to native HLS downloader which warns about # "Live HLS streams are not supported" - disable to go # straight to ffmpeg, avoiding the warning 'hls_prefer_native': False, } if header: ydl_opts['http_headers'] = header logger.debug("Using custom headers for download") try: logger.info( "Starting yt-dlp download with %s: %s", provider_name, output_file ) logger.debug("Download link: %s...", link[:100]) with YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(link, download=True) logger.debug( "Download info: title=%s, filesize=%s", info.get('title'), info.get('filesize') ) if os.path.exists(temp_path): logger.debug("Moving file from temp to final destination") shutil.copyfile(temp_path, output_path) os.remove(temp_path) logger.info( "Download completed successfully: %s", output_file ) self.clear_cache() return True logger.error( "Download failed: temp file not found at %s", temp_path ) except DownloadCancelled: logger.info("Download cancelled by user") _cleanup_temp_file(temp_path) self.clear_cache() return False except BrokenPipeError as exc: logger.error( "Broken pipe error with provider %s: %s", provider_name, exc ) _cleanup_temp_file(temp_path) continue except Exception as exc: # Check if this is an HLS-related failure that might succeed # with additional ffmpeg options exc_str = str(exc).lower() is_hls_related = ( 'hls' in exc_str or 'live' in exc_str or 'native downloader' in exc_str ) if is_hls_related and 'ffmpeg' not in str(ydl_opts.get('downloader', '')): logger.info( "HLS stream detected, retrying with ffmpeg options: %s", output_file ) # Retry with ffmpeg explicitly set retry_opts = ydl_opts.copy() retry_opts['downloader'] = 'ffmpeg' retry_opts['hls_use_mpegts'] = True try: with YoutubeDL(retry_opts) as ydl: info = ydl.extract_info(link, download=True) if os.path.exists(temp_path): shutil.copyfile(temp_path, output_path) os.remove(temp_path) logger.info( "Download completed successfully (retry): %s", output_file ) self.clear_cache() return True except Exception: _cleanup_temp_file(temp_path) # Continue to next provider if retry also fails continue logger.error( "YoutubeDL download failed with provider %s: %s: %s", provider_name, type(exc).__name__, exc ) _cleanup_temp_file(temp_path) continue logger.error( "All download providers failed for S%02dE%03d (%s) in %s. " "Tried: %s. Episode may be unavailable on the source site.", season, episode, key, language, ", ".join(tried) or "none" ) download_error_logger.error( "All providers failed for %s S%02dE%03d (%s); tried=%s", key, season, episode, language, tried ) _cleanup_temp_file(temp_path) self.clear_cache() return False def _select_providers_for_episode( self, season: int, episode: int, key: str, language: str, ) -> list[tuple[str, str]]: """Return ``[(provider_name, redirect_url), ...]`` for an episode. Filters by requested language and orders results by ``SUPPORTED_PROVIDERS`` preference so the failover chain matches operator expectations. Returns an empty list when nothing is advertised on the page. """ if not self.is_language(season, episode, key, language): logger.warning( "Language %s not advertised for S%02dE%03d (%s)", language, season, episode, key ) return [] language_code = self._get_language_key(language) providers = self._get_provider_from_html(season, episode, key) ordered: list[tuple[str, str]] = [] preferred = list(self.SUPPORTED_PROVIDERS) for name in preferred: lang_map = providers.get(name) if lang_map and language_code in lang_map: ordered.append((name, lang_map[language_code])) for name, lang_map in providers.items(): if name in preferred: continue if language_code in lang_map: ordered.append((name, lang_map[language_code])) return ordered def _resolve_direct_link( self, redirect_url: str, provider_name: str, ) -> tuple[str, dict] | None: """Resolve a provider redirect URL into a direct stream link. Follows the redirect to the embedded player, then delegates to a provider-specific extractor (when registered) or returns the embed URL itself so yt-dlp can attempt extraction. Args: redirect_url: AniWorld redirect URL. provider_name: Provider key (e.g. ``"VOE"``). Returns: ``(direct_link, headers)`` tuple or None when extraction fails. """ try: embedded = self.session.get( redirect_url, timeout=self.DEFAULT_REQUEST_TIMEOUT, headers={"User-Agent": self.RANDOM_USER_AGENT}, allow_redirects=True, ).url except requests.RequestException as exc: logger.warning( "Failed resolving redirect for %s: %s", provider_name, exc ) return None try: extractor = self.Providers.GetProvider(provider_name) except (KeyError, AttributeError): extractor = None if extractor is not None: try: return extractor.get_link( embedded, self.DEFAULT_REQUEST_TIMEOUT ) except Exception as exc: logger.warning( "Custom extractor %s failed: %s", provider_name, exc ) return None header_list = self.PROVIDER_HEADERS.get(provider_name) header_dict = self._parse_provider_headers(header_list) return embedded, header_dict @staticmethod def _parse_provider_headers( header_list: list | None, ) -> dict[str, str]: """Convert legacy ``"Name: value"`` header strings to a dict.""" if not header_list: return {} parsed: dict[str, str] = {} for entry in header_list: if not isinstance(entry, str) or ":" not in entry: continue name, _, value = entry.partition(":") parsed[name.strip()] = value.strip().strip('"') return parsed def get_site_key(self) -> str: """Get the site key for this provider.""" return "aniworld.to" def get_title(self, key: str) -> str: """Get anime title from series key.""" logger.debug("Getting title for key: %s", key) soup = BeautifulSoup( _decode_html_content(self._get_key_html(key).content), 'html.parser' ) title_div = soup.find('div', class_='series-title') if title_div: h1_tag = title_div.find('h1') span_tag = h1_tag.find('span') if h1_tag else None if span_tag: title = span_tag.text logger.debug("Found title: %s", title) # Also try to extract year from sibling p tag "Jahr: {year}" # Year is typically right after title in the HTML structure year = self._extract_year_from_soup(soup) if year is not None: self._YearDict[key] = year logger.debug("Cached year %d for key: %s", year, key) return title logger.warning("No title found for key: %s", key) return "" def _extract_year_from_soup(self, soup: BeautifulSoup) -> int | None: """Extract year from BeautifulSoup object. Looks for 'Jahr: {year}' pattern in p tags adjacent to series-title. Args: soup: Parsed BeautifulSoup object Returns: Year as int or None if not found """ # Try to find year in metadata for p_tag in soup.find_all('p'): text = p_tag.get_text() if 'Jahr:' in text or 'Year:' in text: match = re.search(r'(\d{4})', text) if match: return int(match.group(1)) # Fallback: look in series-info div info_div = soup.find('div', class_='series-info') if info_div: text = info_div.get_text() match = re.search(r'\b(19\d{2}|20\d{2})\b', text) if match: return int(match.group(1)) return None def get_year(self, key: str) -> int | None: """Get anime release year from series key. Uses cached year from get_title if available, otherwise extracts and caches it. Args: key: Series identifier Returns: Release year or None if not found """ logger.debug("Getting year for key: %s", key) # Check cache first if key in self._YearDict: logger.debug("Using cached year %d for key: %s", self._YearDict[key], key) return self._YearDict[key] # Not cached - extract from HTML try: soup = BeautifulSoup( _decode_html_content(self._get_key_html(key).content), 'html.parser' ) year = self._extract_year_from_soup(soup) if year is not None: self._YearDict[key] = year logger.debug("Found and cached year %d for key: %s", year, key) return year except Exception as e: logger.warning("Error extracting year for key %s: %s", key, e) return None def _get_key_html(self, key: str): """Get cached HTML for series key. Args: key: Series identifier (will be URL-encoded for safety) Returns: Cached or fetched HTML response """ if key in self._KeyHTMLDict: logger.debug("Using cached HTML for key: %s", key) return self._KeyHTMLDict[key] # Sanitize key parameter for URL safe_key = quote(key, safe='') url = f"{self.ANIWORLD_TO}/anime/stream/{safe_key}" logger.debug("Fetching HTML for key: %s from %s", key, url) self._KeyHTMLDict[key] = self.session.get( url, timeout=self.DEFAULT_REQUEST_TIMEOUT ) logger.debug("Cached HTML for key: %s", key) return self._KeyHTMLDict[key] def _get_episode_html(self, season: int, episode: int, key: str): """Get cached HTML for episode. Args: season: Season number (validated to be positive) episode: Episode number (validated to be positive) key: Series identifier (will be URL-encoded for safety) Returns: Cached or fetched HTML response Raises: ValueError: If season or episode are invalid """ # Validate season and episode numbers if season < 1 or season > 999: logger.error("Invalid season number: %s", season) raise ValueError(f"Invalid season number: {season}") if episode < 1 or episode > 9999: logger.error("Invalid episode number: %s", episode) raise ValueError(f"Invalid episode number: {episode}") if key in self._EpisodeHTMLDict: logger.debug("Using cached HTML for S%02dE%03d (%s)", season, episode, key) return self._EpisodeHTMLDict[(key, season, episode)] # Sanitize key parameter for URL safe_key = quote(key, safe='') link = ( f"{self.ANIWORLD_TO}/anime/stream/{safe_key}/" f"staffel-{season}/episode-{episode}" ) logger.debug("Fetching episode HTML from: %s", link) html = self.session.get(link, timeout=self.DEFAULT_REQUEST_TIMEOUT) self._EpisodeHTMLDict[(key, season, episode)] = html logger.debug("Cached episode HTML for S%02dE%03d (%s)", season, episode, key) return self._EpisodeHTMLDict[(key, season, episode)] def _get_provider_from_html( self, season: int, episode: int, key: str ) -> dict: """Parse HTML content to extract streaming providers. Returns a dictionary with provider names as keys and language key-to-redirect URL mappings as values. Example: { 'VOE': {1: 'https://aniworld.to/redirect/1766412', 2: 'https://aniworld.to/redirect/1766405'}, } """ logger.debug("Extracting providers from HTML for S%02dE%03d (%s)", season, episode, key) soup = BeautifulSoup( _decode_html_content(self._get_episode_html(season, episode, key).content), 'html.parser' ) providers: dict[str, dict[int, str]] = {} episode_links = soup.find_all( 'li', class_=lambda x: x and x.startswith('episodeLink') ) if not episode_links: logger.warning("No episode links found for S%02dE%03d (%s)", season, episode, key) return providers for link in episode_links: provider_name_tag = link.find('h4') provider_name = ( provider_name_tag.text.strip() if provider_name_tag else None ) redirect_link_tag = link.find('a', class_='watchEpisode') redirect_link = ( redirect_link_tag.get('href') if redirect_link_tag else None ) lang_key = link.get('data-lang-key') lang_key = ( int(lang_key) if lang_key and lang_key.isdigit() else None ) if provider_name and redirect_link and lang_key: if provider_name not in providers: providers[provider_name] = {} providers[provider_name][lang_key] = ( f"{self.ANIWORLD_TO}{redirect_link}" ) logger.debug("Found provider: %s, lang_key: %s", provider_name, lang_key) logger.debug("Total providers found: %s", len(providers)) return providers def _get_redirect_link( self, season: int, episode: int, key: str, language: str = "German Dub" ): """Get redirect link for episode in specified language.""" logger.debug("Getting redirect link for S%02dE%03d (%s) in %s", season, episode, key, language) language_code = self._get_language_key(language) if self.is_language(season, episode, key, language): for (provider_name, lang_dict) in ( self._get_provider_from_html( season, episode, key ).items() ): if language_code in lang_dict: logger.debug("Found redirect link with provider: %s", provider_name) return (lang_dict[language_code], provider_name) logger.warning("No redirect link found for S%02dE%03d (%s) in %s", season, episode, key, language) return None def _get_embeded_link( self, season: int, episode: int, key: str, language: str = "German Dub" ): """Get embedded link from redirect link.""" logger.debug("Getting embedded link for S%02dE%03d (%s) in %s", season, episode, key, language) redirect_link, provider_name = ( self._get_redirect_link(season, episode, key, language) ) logger.debug("Redirect link: %s, provider: %s", redirect_link, provider_name) embeded_link = self.session.get( redirect_link, timeout=self.DEFAULT_REQUEST_TIMEOUT, headers={'User-Agent': self.RANDOM_USER_AGENT} ).url logger.debug("Embedded link: %s", embeded_link) return embeded_link def _get_direct_link_from_provider( self, season: int, episode: int, key: str, language: str = "German Dub" ): """Get direct download link from streaming provider.""" logger.debug("Getting direct link from provider for S%02dE%03d (%s) in %s", season, episode, key, language) embeded_link = self._get_embeded_link( season, episode, key, language ) if embeded_link is None: logger.error("No embedded link found for S%02dE%03d (%s)", season, episode, key) return None logger.debug("Using VOE provider to extract direct link") return self.Providers.GetProvider( "VOE" ).get_link(embeded_link, self.DEFAULT_REQUEST_TIMEOUT) def get_season_episode_count(self, slug: str) -> dict: """Get episode count for each season. Args: slug: Series identifier (will be URL-encoded for safety) Returns: Dictionary mapping season numbers to episode counts """ logger.info("Getting season and episode count for slug: %s", slug) # Sanitize slug parameter for URL safe_slug = quote(slug, safe='') base_url = f"{self.ANIWORLD_TO}/anime/stream/{safe_slug}/" logger.debug("Base URL: %s", base_url) response = requests.get(base_url, timeout=self.DEFAULT_REQUEST_TIMEOUT) soup = BeautifulSoup(_decode_html_content(response.content), 'html.parser') season_meta = soup.find('meta', itemprop='numberOfSeasons') number_of_seasons = int(season_meta['content']) if season_meta else 0 logger.info("Found %s seasons for '%s'", number_of_seasons, slug) episode_counts = {} for season in range(1, number_of_seasons + 1): season_url = f"{base_url}staffel-{season}" logger.debug("Fetching episodes for season %s from: %s", season, season_url) response = requests.get( season_url, timeout=self.DEFAULT_REQUEST_TIMEOUT, ) soup = BeautifulSoup(_decode_html_content(response.content), 'html.parser') episode_links = soup.find_all('a', href=True) unique_links = set( link['href'] for link in episode_links if f"staffel-{season}/episode-" in link['href'] ) episode_counts[season] = len(unique_links) logger.debug("Season %s has %s episodes", season, episode_counts[season]) logger.info("Episode count retrieval complete for '%s': %s", slug, episode_counts) return episode_counts