refactor: restructure core→server, split large entity files into database module

- Move src/core/ → src/server/
- Split SerieList.py (531 lines) and series.py (414 lines) into src/server/database/
- Add database/models.py for SQLAlchemy models
- Update all test imports to reflect new structure
- Remove deprecated test files (test_serie_class.py, test_serie_folder_with_year.py)
This commit is contained in:
2026-06-04 21:11:53 +02:00
parent 09d454d4c0
commit 5526ab884a
76 changed files with 1186 additions and 3574 deletions

870
src/server/SerieScanner.py Normal file
View File

@@ -0,0 +1,870 @@
"""
SerieScanner - Scans directories for anime series and missing episodes.
This module provides functionality to scan anime directories, identify
missing episodes, and report progress through callback interfaces.
Note:
This module is pure domain logic. Database operations are handled
by the service layer (AnimeService).
"""
from __future__ import annotations
import asyncio
import logging
import os
import re
import traceback
import uuid
from typing import Callable, Iterable, Iterator, Optional
from events import Events
from src.config.settings import settings
from src.server.database.models import AnimeSeries
from src.server.exceptions.exceptions.Exceptions import MatchNotFoundError
from src.server.providers.base_provider import Loader
from src.server.database.connection import get_sync_session
from src.server.database.service import AnimeSeriesService, EpisodeService
logger = logging.getLogger(__name__)
error_logger = logging.getLogger("error")
no_key_found_logger = logging.getLogger("series.nokey")
class SerieScanner:
"""
Scans directories for anime series and identifies missing episodes.
Supports progress callbacks for real-time scanning updates.
Note:
This class is pure domain logic. Database operations are handled
by the service layer (AnimeService). Scan results are stored
in keyDict and can be retrieved after scanning.
Example:
# Synchronous context (CLI):
scanner = SerieScanner("/path/to/anime", loader)
scanner.scan() # asyncio.run() used internally when no event loop
# Asynchronous context (server/scheduler):
# scan() detects running event loop and uses create_task()
# internally, so no special handling needed by caller.
# Results are in scanner.keyDict
"""
def __init__(
self,
basePath: str,
loader: Loader,
) -> None:
"""
Initialize the SerieScanner.
Args:
basePath: Base directory containing anime series
loader: Loader instance for fetching series information
Raises:
ValueError: If basePath is invalid or doesn't exist
"""
# Validate basePath to prevent directory traversal attacks
if not basePath or not basePath.strip():
raise ValueError("Base path cannot be empty")
# Resolve to absolute path and validate it exists
abs_path = os.path.abspath(basePath)
if not os.path.exists(abs_path):
raise ValueError(f"Base path does not exist: {abs_path}")
if not os.path.isdir(abs_path):
raise ValueError(f"Base path is not a directory: {abs_path}")
self.directory: str = abs_path
self.keyDict: dict[str, AnimeSeries] = {}
self.loader: Loader = loader
self._current_operation_id: Optional[str] = None
self.events = Events()
self.events.on_progress = []
self.events.on_error = []
self.events.on_warning = []
self.events.on_completion = []
logger.info("Initialized SerieScanner with base path: %s", abs_path)
def _safe_call_event(self, event_handler, data: dict) -> None:
"""Safely call an event handler if it exists.
Args:
event_handler: Event handler attribute (e.g., self.events.on_progress)
data: Data dictionary to pass to the event handler
"""
if event_handler:
try:
# Event handlers are stored as lists, iterate over them
for handler in event_handler:
handler(data)
except Exception as e:
logger.error("Error calling event handler: %s", e, exc_info=True)
def subscribe_on_progress(self, handler):
"""
Subscribe a handler to an event.
Args:
handler: Callable to handle the event
"""
if handler not in self.events.on_progress:
self.events.on_progress.append(handler)
def unsubscribe_on_progress(self, handler):
"""
Unsubscribe a handler from an event.
Args:
handler: Callable to remove
"""
if handler in self.events.on_progress:
self.events.on_progress.remove(handler)
def _extract_year_from_folder_name(self, folder_name: str) -> int | None:
"""Extract year from folder name if present.
Looks for year in format "(YYYY)" at the end of folder name.
Args:
folder_name: The folder name to check
Returns:
int or None: Year if found, None otherwise
Example:
>>> _extract_year_from_folder_name("Dororo (2025)")
2025
>>> _extract_year_from_folder_name("Dororo")
None
"""
if not folder_name:
return None
# Look for year in format (YYYY) - typically at end of name
match = re.search(r'\((\d{4})\)', folder_name)
if match:
try:
year = int(match.group(1))
# Validate year is reasonable (between 1900 and 2100)
if 1900 <= year <= 2100:
logger.debug(
"Extracted year from folder name: %s -> %d",
folder_name,
year
)
return year
except ValueError:
pass
return None
def subscribe_on_error(self, handler):
"""
Subscribe a handler to an event.
Args:
handler: Callable to handle the event
"""
if handler not in self.events.on_error:
self.events.on_error.append(handler)
def unsubscribe_on_error(self, handler):
"""
Unsubscribe a handler from an event.
Args:
handler: Callable to remove
"""
if handler in self.events.on_error:
self.events.on_error.remove(handler)
def subscribe_on_warning(self, handler):
"""
Subscribe a handler to an event.
Args:
handler: Callable to handle the event
"""
if handler not in self.events.on_warning:
self.events.on_warning.append(handler)
def unsubscribe_on_warning(self, handler):
"""
Unsubscribe a handler from an event.
Args:
handler: Callable to remove
"""
if handler in self.events.on_warning:
self.events.on_warning.remove(handler)
def subscribe_on_completion(self, handler):
"""
Subscribe a handler to an event.
Args:
handler: Callable to handle the event
"""
if handler not in self.events.on_completion:
self.events.on_completion.append(handler)
def unsubscribe_on_completion(self, handler):
"""
Unsubscribe a handler from an event.
Args:
handler: Callable to remove
"""
if handler in self.events.on_completion:
self.events.on_completion.remove(handler)
def reinit(self) -> None:
"""Reinitialize the series dictionary (keyed by anime.key)."""
self.keyDict: dict[str, AnimeSeries] = {}
async def _persist_serie_to_db(self, anime: AnimeSeries) -> None:
"""Persist anime to database (create or update).
Args:
anime: AnimeSeries model to persist
"""
try:
from src.server.database.connection import get_async_session_factory
db = get_async_session_factory()
try:
existing = await AnimeSeriesService.get_by_key(db, anime.key)
if existing:
await AnimeSeriesService.update(
db, existing.id,
name=anime.name,
folder=anime.folder,
year=anime.year
)
await self._sync_episodes_to_db(db, existing.id, anime.episodeDict)
else:
db_anime = await AnimeSeriesService.create(
db=db,
key=anime.key,
name=anime.name,
site=anime.site,
folder=anime.folder,
year=anime.year
)
for ep in anime.episodes:
await EpisodeService.create(
db=db,
series_id=db_anime.id,
season=ep.season,
episode_number=ep.episode_number
)
await db.commit()
logger.debug(
"Persisted anime '%s' (key=%s) to database",
anime.name, anime.key
)
except Exception as e:
await db.rollback()
logger.error(
"Failed to persist anime '%s' to DB: %s",
anime.key, e, exc_info=True
)
raise
finally:
await db.close()
except Exception as e:
logger.error(
"Could not persist anime '%s' to DB (DB unavailable?): %s",
anime.key, e
)
async def _sync_episodes_to_db(
self, db, series_id: int, episode_dict: dict[int, list[int]]
) -> None:
"""Sync episodes to database, preserving downloaded flags.
Adds missing episodes, removes episodes no longer missing,
and preserves is_downloaded=True episodes.
Args:
db: Async database session
series_id: Database ID of the series
episode_dict: Dict mapping season -> list of episode numbers
"""
existing_episodes = await EpisodeService.get_by_series(db, series_id)
existing_map = {
(ep.season, ep.episode_number): ep for ep in existing_episodes
}
new_keys = set()
for season, eps in episode_dict.items():
for ep_num in eps:
new_keys.add((season, ep_num))
for (season, ep_num), ep in existing_map.items():
if (season, ep_num) not in new_keys:
if ep.is_downloaded:
logger.debug(
"Preserving downloaded episode S%02dE%02d for series_id=%d",
season, ep_num, series_id
)
else:
await EpisodeService.delete_by_series(
db, series_id, season, ep_num
)
for season, eps in episode_dict.items():
for ep_num in eps:
if (season, ep_num) not in existing_map:
await EpisodeService.create(
db=db,
series_id=series_id,
season=season,
episode_number=ep_num
)
def get_total_to_scan(self) -> int:
"""Get the total number of folders to scan.
Returns:
Total count of folders with MP4 files
"""
result = self.__find_mp4_files()
return sum(1 for _ in result)
def scan(self) -> None:
"""
Scan directories for anime series and missing episodes.
Results are stored in self.keyDict and can be retrieved after
scanning. Data files are also saved to disk for persistence.
Raises:
Exception: If scan fails critically
"""
# Generate unique operation ID
self._current_operation_id = str(uuid.uuid4())
logger.info("Starting scan for missing episodes")
# Notify scan starting
self._safe_call_event(
self.events.on_progress,
{
"operation_id": self._current_operation_id,
"phase": "STARTING",
"current": 0,
"total": 0,
"percentage": 0.0,
"message": "Initializing scan"
}
)
try:
# Get total items to process
total_to_scan = self.get_total_to_scan()
logger.info("Total folders to scan: %d", total_to_scan)
# The scanner enumerates folders with mp4 files, loads existing
# metadata, calculates the missing episodes via the provider, and
# persists the refreshed metadata while emitting progress events.
result = self.__find_mp4_files()
counter = 0
for folder, mp4_files in result:
try:
counter += 1
# Calculate progress
if total_to_scan > 0:
percentage = (counter / total_to_scan) * 100
else:
percentage = 0.0
# Notify progress
self._safe_call_event(
self.events.on_progress,
{
"operation_id": self._current_operation_id,
"phase": "IN_PROGRESS",
"current": counter,
"total": total_to_scan,
"percentage": percentage,
"message": f"Scanning: {folder}",
"details": f"Found {len(mp4_files)} episodes"
}
)
serie = self.__read_data_from_file(folder)
if serie is None or not serie.key or not serie.key.strip():
logger.warning(
"No series found in DB for folder '%s', skipping",
folder,
)
continue
if (
serie is not None
and serie.key
and serie.key.strip()
):
# Delegate the provider to compare local files with
# remote metadata, yielding missing episodes per
# season. Results are saved back to disk so that both
# CLI and API consumers see consistent state.
missing_episodes, _site = (
self.__get_missing_episodes_and_season(
serie.key, mp4_files
)
)
serie.episodeDict = missing_episodes
serie.folder = folder
# Persist to database (async)
try:
try:
loop = asyncio.get_running_loop()
except RuntimeError:
# No running loop — safe to use asyncio.run()
asyncio.run(self._persist_serie_to_db(serie))
else:
# Already in async context — schedule as task
asyncio.create_task(self._persist_serie_to_db(serie))
except Exception as e:
logger.warning(
"DB persistence failed for '%s', "
"continuing without DB: %s",
serie.key, e
)
# Store by key (primary identifier), not folder
if serie.key in self.keyDict:
existing = self.keyDict[serie.key]
logger.warning(
"Duplicate series found with key '%s': "
"folder '%s' maps to same key as existing folder '%s'. "
"Skipping duplicate folder.",
serie.key,
folder,
existing.folder
)
self._safe_call_event(
self.events.on_warning,
{
"operation_id": self._current_operation_id,
"warning": "duplicate_key",
"message": f"Duplicate series skipped: '{folder}' maps to key '{serie.key}' already used by '{existing.folder}'",
"metadata": {
"key": serie.key,
"duplicate_folder": folder,
"existing_folder": existing.folder,
}
}
)
else:
self.keyDict[serie.key] = serie
logger.debug(
"Stored series with key '%s' (folder: '%s')",
serie.key,
folder
)
no_key_found_logger.info(
"Saved Serie: '%s'", str(serie)
)
except Exception as e:
# Log error and notify via callback
error_msg = (
f"Folder: '{folder}' - "
f"Unexpected error: {e}"
)
error_logger.error(
"%s\n%s",
error_msg,
traceback.format_exc()
)
self._safe_call_event(
self.events.on_error,
{
"operation_id": self._current_operation_id,
"error": e,
"message": error_msg,
"recoverable": True,
"metadata": {"folder": folder, "key": None}
}
)
continue
# Notify scan completion
self._safe_call_event(
self.events.on_completion,
{
"operation_id": self._current_operation_id,
"success": True,
"message": f"Scan completed. Processed {counter} folders.",
"statistics": {
"total_folders": counter,
"series_found": len(self.keyDict)
}
}
)
logger.info(
"Scan completed. Processed %d folders, found %d series",
counter,
len(self.keyDict)
)
except Exception as e:
# Critical error - notify and re-raise
error_msg = f"Critical scan error: {e}"
logger.error("%s\n%s", error_msg, traceback.format_exc())
self._safe_call_event(
self.events.on_error,
{
"operation_id": self._current_operation_id,
"error": e,
"message": error_msg,
"recoverable": False
}
)
self._safe_call_event(
self.events.on_completion,
{
"operation_id": self._current_operation_id,
"success": False,
"message": error_msg
}
)
raise
def __find_mp4_files(self) -> Iterator[tuple[str, list[str]]]:
"""Find all .mp4 files in the directory structure."""
logger.info("Scanning for .mp4 files")
for anime_name in os.listdir(self.directory):
anime_path = os.path.join(self.directory, anime_name)
if os.path.isdir(anime_path):
if settings.should_ignore_folder(anime_name):
logger.debug("Skipping ignored folder: %s", anime_name)
continue
mp4_files: list[str] = []
has_files = False
for root, _, files in os.walk(anime_path):
for file in files:
if file.endswith(".mp4"):
mp4_files.append(os.path.join(root, file))
has_files = True
yield anime_name, mp4_files if has_files else []
def __read_data_from_file(self, folder_name: str) -> Optional[AnimeSeries]:
"""Load or discover an AnimeSeries for the given folder.
Strategy:
1. Query DB by folder name
2. If not found in DB, return None (no file fallback)
Args:
folder_name: Filesystem folder name
Returns:
AnimeSeries object if found in DB, None otherwise
"""
# Step 1: Try DB lookup by folder name
try:
session = get_sync_session()
try:
anime_series = AnimeSeriesService.get_by_folder_sync(session, folder_name)
return anime_series
finally:
session.close()
except Exception as exc:
logger.warning(
"DB lookup failed for folder '%s': %s",
folder_name,
exc
)
return None
def __get_episode_and_season(self, filename: str) -> tuple[int, int]:
"""Extract season and episode numbers from filename.
Args:
filename: Filename to parse
Returns:
Tuple of (season, episode) as integers
Raises:
MatchNotFoundError: If pattern not found
"""
pattern = r'S(\d+)E(\d+)'
match = re.search(pattern, filename)
if match:
season = match.group(1)
episode = match.group(2)
logger.debug(
"Extracted season %s, episode %s from '%s'",
season,
episode,
filename
)
return int(season), int(episode)
else:
logger.error(
"Failed to find season/episode pattern in '%s'",
filename
)
raise MatchNotFoundError(
"Season and episode pattern not found in the filename."
)
def __get_episodes_and_seasons(
self,
mp4_files: Iterable[str]
) -> dict[int, list[int]]:
"""Get episodes grouped by season from mp4 files.
Args:
mp4_files: List of MP4 filenames
Returns:
Dictionary mapping season to list of episode numbers
"""
episodes_dict: dict[int, list[int]] = {}
for file in mp4_files:
season, episode = self.__get_episode_and_season(file)
if season in episodes_dict:
episodes_dict[season].append(episode)
else:
episodes_dict[season] = [episode]
return episodes_dict
def __get_missing_episodes_and_season(
self,
key: str,
mp4_files: Iterable[str]
) -> tuple[dict[int, list[int]], str]:
"""Get missing episodes for a serie.
Args:
key: Series key
mp4_files: List of MP4 filenames
Returns:
Tuple of (episodes_dict, site_name)
"""
# key season , value count of episodes
expected_dict = self.loader.get_season_episode_count(key)
filedict = self.__get_episodes_and_seasons(mp4_files)
episodes_dict: dict[int, list[int]] = {}
for season, expected_count in expected_dict.items():
existing_episodes = filedict.get(season, [])
missing_episodes = [
ep for ep in range(1, expected_count + 1)
if ep not in existing_episodes
and self.loader.is_language(season, ep, key)
]
if missing_episodes:
episodes_dict[season] = missing_episodes
return episodes_dict, "aniworld.to"
def scan_single_series(
self,
key: str,
folder: str,
) -> dict[int, list[int]]:
"""
Scan a single series for missing episodes.
This method performs a targeted scan for only the specified series,
without triggering a full library rescan. It fetches available
episodes from the provider and compares with local files.
Args:
key: The unique provider key for the series
folder: The filesystem folder name where the series is stored
Returns:
dict[int, list[int]]: Dictionary mapping season numbers to lists
of missing episode numbers. Empty dict if no missing episodes.
Raises:
ValueError: If key or folder is empty
Example:
>>> scanner = SerieScanner("/path/to/anime", loader)
>>> missing = scanner.scan_single_series(
... "attack-on-titan",
... "Attack on Titan"
... )
>>> print(missing)
{1: [5, 6, 7], 2: [1, 2]}
"""
if not key or not key.strip():
raise ValueError("Series key cannot be empty")
if not folder or not folder.strip():
raise ValueError("Series folder cannot be empty")
logger.info(
"Starting targeted scan for series: %s (folder: %s)",
key,
folder
)
# Generate unique operation ID for this targeted scan
operation_id = str(uuid.uuid4())
# Notify scan starting
self._safe_call_event(
self.events.on_progress,
{
"operation_id": operation_id,
"phase": "STARTING",
"current": 0,
"total": 1,
"percentage": 0.0,
"message": f"Scanning series: {folder}",
"details": f"Key: {key}"
}
)
try:
# Get the folder path
folder_path = os.path.join(self.directory, folder)
# Check if folder exists
if not os.path.isdir(folder_path):
logger.info(
"Series folder does not exist yet: %s - "
"will scan for available episodes from provider",
folder_path
)
mp4_files: list[str] = []
else:
# Find existing MP4 files in the folder
mp4_files = []
for root, _, files in os.walk(folder_path):
for file in files:
if file.endswith(".mp4"):
mp4_files.append(os.path.join(root, file))
logger.debug(
"Found %d existing MP4 files in folder %s",
len(mp4_files),
folder
)
# Get missing episodes from provider
missing_episodes, site = self.__get_missing_episodes_and_season(
key, mp4_files
)
# Update progress
self._safe_call_event(
self.events.on_progress,
{
"operation_id": operation_id,
"phase": "IN_PROGRESS",
"current": 1,
"total": 1,
"percentage": 100.0,
"message": f"Scanned: {folder}",
"details": f"Found {sum(len(eps) for eps in missing_episodes.values())} missing episodes"
}
)
# Create or update AnimeSeries in keyDict
if key in self.keyDict:
# Update existing anime - rebuild episodeDict from episodes
existing = self.keyDict[key]
existing_ep_dict = existing.episodeDict
# Merge missing episodes
for season, eps in missing_episodes.items():
if season not in existing_ep_dict:
existing_ep_dict[season] = []
existing_ep_dict[season].extend(eps)
logger.debug(
"Updated existing series %s with %d missing episodes",
key,
sum(len(eps) for eps in missing_episodes.values())
)
else:
# Extract year from folder name if present, otherwise leave as None
year = self._extract_year_from_folder_name(folder)
# Create new AnimeSeries entry (minimal, fields populated later)
from src.server.database.models import AnimeSeries
anime_series = AnimeSeries(
key=key,
name=folder, # Use folder as fallback name since we don't have actual name
site=site,
folder=folder,
year=year
)
# Set episodeDict cache directly since AnimeSeries doesn't persist missing episodes
# (they get synced to DB via _persist_serie_to_db later)
anime_series._episode_dict_cache = missing_episodes.copy()
self.keyDict[key] = anime_series
logger.debug(
"Created new series entry for %s with %d missing episodes (year=%s)",
key,
sum(len(eps) for eps in missing_episodes.values()),
year
)
# Notify completion
self._safe_call_event(
self.events.on_completion,
{
"operation_id": operation_id,
"success": True,
"message": f"Scan completed for {folder}",
"statistics": {
"missing_episodes": sum(
len(eps) for eps in missing_episodes.values()
),
"seasons_with_missing": len(missing_episodes)
}
}
)
logger.info(
"Targeted scan completed for %s: %d missing episodes across %d seasons",
key,
sum(len(eps) for eps in missing_episodes.values()),
len(missing_episodes)
)
return missing_episodes
except Exception as e:
error_msg = f"Failed to scan series {key}: {e}"
logger.error(error_msg, exc_info=True)
# Notify error
self._safe_call_event(
self.events.on_error,
{
"operation_id": operation_id,
"error": e,
"message": error_msg,
"recoverable": True,
"metadata": {"key": key, "folder": folder}
}
)
# Notify completion with failure
self._safe_call_event(
self.events.on_completion,
{
"operation_id": operation_id,
"success": False,
"message": error_msg
}
)
# Return empty dict on error (scan failed but not critical)
return {}

733
src/server/SeriesApp.py Normal file
View File

@@ -0,0 +1,733 @@
"""
SeriesApp - Core application logic for anime series management.
This module provides the main application interface for searching,
downloading, and managing anime series with support for async callbacks,
progress reporting, and error handling.
Note:
This module is pure domain logic with no database dependencies.
Database operations are handled by the service layer (AnimeService).
"""
import asyncio
import logging
import os
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Callable, Dict, List, Optional
from events import Events
from src.config.settings import settings
from src.server.database.SerieList import SerieList
from src.server.database.models import AnimeSeries
from src.server.providers.provider_factory import Loaders
from src.server.SerieScanner import SerieScanner
logger = logging.getLogger(__name__)
class DownloadStatusEventArgs:
"""Event arguments for download status events."""
def __init__(
self,
serie_folder: str,
season: int,
episode: int,
status: str,
key: Optional[str] = None,
progress: float = 0.0,
message: Optional[str] = None,
error: Optional[Exception] = None,
eta: Optional[int] = None,
mbper_sec: Optional[float] = None,
item_id: Optional[str] = None,
):
"""
Initialize download status event arguments.
Args:
serie_folder: Serie folder name (metadata only, used for
file paths)
season: Season number
episode: Episode number
status: Status message (e.g., "started", "progress",
"completed", "failed")
key: Serie unique identifier (provider key, primary
identifier)
progress: Download progress (0.0 to 1.0)
message: Optional status message
error: Optional error if status is "failed"
eta: Estimated time remaining in seconds
mbper_sec: Download speed in MB/s
item_id: Optional download queue item ID for tracking
"""
self.serie_folder = serie_folder
self.key = key
self.season = season
self.episode = episode
self.status = status
self.progress = progress
self.message = message
self.error = error
self.eta = eta
self.mbper_sec = mbper_sec
self.item_id = item_id
class ScanStatusEventArgs:
"""Event arguments for scan status events."""
def __init__(
self,
current: int,
total: int,
folder: str,
status: str,
key: Optional[str] = None,
progress: float = 0.0,
message: Optional[str] = None,
error: Optional[Exception] = None,
):
"""
Initialize scan status event arguments.
Args:
current: Current item being scanned
total: Total items to scan
folder: Current folder being scanned (metadata only)
status: Status message (e.g., "started", "progress",
"completed", "failed", "cancelled")
key: Serie unique identifier if applicable (provider key,
primary identifier)
progress: Scan progress (0.0 to 1.0)
message: Optional status message
error: Optional error if status is "failed"
"""
self.current = current
self.total = total
self.folder = folder
self.key = key
self.status = status
self.progress = progress
self.message = message
self.error = error
class SeriesApp:
"""
Main application class for anime series management.
Provides functionality for:
- Searching anime series
- Downloading episodes
- Scanning directories for missing episodes
- Managing series lists
Supports async callbacks for progress reporting.
Note:
This class is now pure domain logic with no database dependencies.
Database operations are handled by the service layer (AnimeService).
Events:
download_status: Raised when download status changes.
Handler signature: def handler(args: DownloadStatusEventArgs)
scan_status: Raised when scan status changes.
Handler signature: def handler(args: ScanStatusEventArgs)
"""
def __init__(
self,
directory_to_search: str,
):
"""
Initialize SeriesApp.
Args:
directory_to_search: Base directory for anime series
"""
self.directory_to_search = directory_to_search
# Initialize thread pool executor
self.executor = ThreadPoolExecutor(max_workers=3)
# Initialize events
self._events = Events()
self.loaders = Loaders()
self.loader = self.loaders.GetLoader(key="aniworld.to")
self.serie_scanner = SerieScanner(
directory_to_search,
self.loader,
)
# Series will be loaded from database by the service layer during application setup
self.list = SerieList(self.directory_to_search)
self.series_list: List[Any] = []
# Initialize empty list - series loaded later via load_series_from_list()
# No need to call _init_list_sync() anymore
# NFO service removed - metadata handling moved to server layer
self.nfo_service = None
logger.info(
"SeriesApp initialized for directory: %s",
directory_to_search,
)
@property
def download_status(self):
"""
Event raised when download status changes.
Subscribe using:
app.download_status += handler
"""
return self._events.download_status
@download_status.setter
def download_status(self, value):
"""Set download_status event handler."""
self._events.download_status = value
@property
def scan_status(self):
"""
Event raised when scan status changes.
Subscribe using:
app.scan_status += handler
"""
return self._events.scan_status
@scan_status.setter
def scan_status(self, value):
"""Set scan_status event handler."""
self._events.scan_status = value
def load_series_from_list(self, series: list) -> None:
"""
Load series into the in-memory list.
This method is called by the service layer after loading
series from the database.
Args:
series: List of Serie objects to load
"""
self.list.keyDict.clear()
for serie in series:
self.list.keyDict[serie.key] = serie
self.series_list = self.list.GetMissingEpisode()
logger.debug(
"Loaded %d series with %d having missing episodes",
len(series),
len(self.series_list)
)
async def search(self, words: str) -> List[Dict[str, Any]]:
"""
Search for anime series (async).
Args:
words: Search query
Returns:
List of search results
Raises:
RuntimeError: If search fails
"""
logger.info("Searching for: %s", words)
loop = asyncio.get_running_loop()
results = await loop.run_in_executor(
self.executor,
self.loader.search,
words
)
logger.info("Found %d results", len(results))
return results
async def download(
self,
serie_folder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub",
item_id: Optional[str] = None,
) -> bool:
"""
Download an episode (async).
Args:
serie_folder: Serie folder name (metadata only, used for
file path construction)
season: Season number
episode: Episode number
key: Serie unique identifier (provider key, primary
identifier for lookups)
language: Language preference
item_id: Optional download queue item ID for progress
tracking
Returns:
True if download succeeded, False otherwise
Note:
The 'key' parameter is the primary identifier for series
lookups. The 'serie_folder' parameter is only used for
filesystem operations.
"""
logger.info(
"Starting download: %s (key: %s) S%02dE%02d",
serie_folder,
key,
season,
episode
)
# Fire download started event
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="started",
message="Download started",
item_id=item_id,
)
)
# Create series folder if it doesn't exist
folder_path = os.path.join(self.directory_to_search, serie_folder)
if not os.path.exists(folder_path):
try:
os.makedirs(folder_path, exist_ok=True)
logger.info(
"Created series folder: %s (key: %s)",
folder_path,
key
)
except OSError as e:
logger.error(
"Failed to create series folder %s: %s",
folder_path,
str(e)
)
# Fire download failed event
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="failed",
message=f"Failed to create folder: {str(e)}",
item_id=item_id,
)
)
return False
try:
def download_progress_handler(progress_info):
"""Handle download progress events from loader."""
# Throttle progress logging to avoid spam
status = progress_info.get("status", "")
if status in ("downloading", "finished"):
logger.debug(
"download_progress_handler called with: %s", progress_info
)
downloaded = progress_info.get('downloaded_bytes', 0)
total_bytes = (
progress_info.get('total_bytes')
or progress_info.get('total_bytes_estimate', 0)
)
speed = progress_info.get('speed', 0) # bytes/sec
eta = progress_info.get('eta') # seconds
mbper_sec = speed / (1024 * 1024) if speed else None
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="progress",
message="Download progress",
progress=(
(downloaded / total_bytes) * 100
if total_bytes else 0
),
eta=eta,
mbper_sec=mbper_sec,
item_id=item_id,
)
)
# Subscribe to loader's download progress events
self.loader.subscribe_download_progress(download_progress_handler)
try:
# Perform download in thread to avoid blocking event loop
loop = asyncio.get_running_loop()
download_success = await loop.run_in_executor(
self.executor,
self.loader.download,
self.directory_to_search,
serie_folder,
season,
episode,
key,
language
)
finally:
# Always unsubscribe after download completes or fails
self.loader.unsubscribe_download_progress(
download_progress_handler
)
if download_success:
logger.info(
"Download completed: %s (key: %s) S%02dE%02d",
serie_folder,
key,
season,
episode
)
# Fire download completed event
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="completed",
progress=1.0,
message="Download completed successfully",
item_id=item_id,
)
)
else:
logger.warning(
"Download failed: %s (key: %s) S%02dE%02d",
serie_folder,
key,
season,
episode
)
# Fire download failed event
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="failed",
message="Download failed",
item_id=item_id,
)
)
return download_success
except InterruptedError:
# Download was cancelled - propagate the cancellation
logger.info(
"Download cancelled: %s (key: %s) S%02dE%02d",
serie_folder,
key,
season,
episode,
)
# Fire download cancelled event
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="cancelled",
message="Download cancelled by user",
item_id=item_id,
)
)
raise # Re-raise to propagate cancellation
except Exception as e: # pylint: disable=broad-except
logger.error(
"Download error: %s (key: %s) S%02dE%02d - %s",
serie_folder,
key,
season,
episode,
str(e),
exc_info=True,
)
# Fire download error event
self._events.download_status(
DownloadStatusEventArgs(
serie_folder=serie_folder,
key=key,
season=season,
episode=episode,
status="failed",
error=e,
message=f"Download error: {str(e)}",
item_id=item_id,
)
)
return False
async def rescan(self) -> list:
"""
Rescan directory for missing episodes (async).
This method performs a file-based scan and returns the results.
Database persistence is handled by the service layer (AnimeService).
Returns:
List of Serie objects found during scan with their
missing episodes.
Note:
This method no longer saves to database directly. The returned
list should be persisted by the caller (AnimeService).
"""
logger.info("Starting directory rescan")
total_to_scan = 0
try:
# Get total items to scan
logger.info("Getting total items to scan...")
loop = asyncio.get_running_loop()
total_to_scan = await loop.run_in_executor(
self.executor,
self.serie_scanner.get_total_to_scan
)
logger.info("Total folders to scan: %d", total_to_scan)
# Fire scan started event
logger.info(
"Firing scan_status 'started' event, handler=%s",
self._events.scan_status
)
self._events.scan_status(
ScanStatusEventArgs(
current=0,
total=total_to_scan,
folder="",
status="started",
progress=0.0,
message="Scan started",
)
)
# Reinitialize scanner
await loop.run_in_executor(
self.executor,
self.serie_scanner.reinit
)
def scan_progress_handler(progress_data):
"""Handle scan progress events from scanner."""
# Fire scan progress event
message = progress_data.get('message', '')
folder = message.replace('Scanning: ', '')
self._events.scan_status(
ScanStatusEventArgs(
current=progress_data.get('current', 0),
total=progress_data.get('total', total_to_scan),
folder=folder,
status="progress",
progress=(
progress_data.get('percentage', 0.0) / 100.0
),
message=message,
)
)
# Subscribe to scanner's progress events
self.serie_scanner.subscribe_on_progress(scan_progress_handler)
try:
# Perform scan (file-based, returns results in scanner.keyDict)
await loop.run_in_executor(
self.executor,
self.serie_scanner.scan
)
finally:
# Always unsubscribe after scan completes or fails
self.serie_scanner.unsubscribe_on_progress(
scan_progress_handler
)
# Get scanned series from scanner
scanned_series = list(self.serie_scanner.keyDict.values())
# Update in-memory list with scan results
self.list.keyDict.clear()
for serie in scanned_series:
self.list.keyDict[serie.key] = serie
self.series_list = self.list.GetMissingEpisode()
logger.info("Directory rescan completed successfully")
# Fire scan completed event
logger.info(
"Firing scan_status 'completed' event, handler=%s",
self._events.scan_status
)
self._events.scan_status(
ScanStatusEventArgs(
current=total_to_scan,
total=total_to_scan,
folder="",
status="completed",
progress=1.0,
message=(
f"Scan completed. Found {len(self.series_list)} "
"series with missing episodes."
),
)
)
return scanned_series
except InterruptedError:
logger.warning("Scan cancelled by user")
# Fire scan cancelled event
self._events.scan_status(
ScanStatusEventArgs(
current=0,
total=total_to_scan,
folder="",
status="cancelled",
message="Scan cancelled by user",
)
)
raise
except Exception as e:
logger.error("Scan error: %s", str(e), exc_info=True)
# Fire scan failed event
self._events.scan_status(
ScanStatusEventArgs(
current=0,
total=total_to_scan,
folder="",
status="failed",
error=e,
message=f"Scan error: {str(e)}",
)
)
raise
async def get_series_list(self) -> List[Any]:
"""
Get the current series list (async).
Returns:
List of series with missing episodes
"""
return self.series_list
async def refresh_series_list(self) -> None:
"""
Reload the cached series list from the underlying data store.
This is an async operation.
"""
await self._init_list()
def _get_serie_by_key(self, key: str) -> Optional[AnimeSeries]:
"""
Get a series by its unique provider key.
This is the primary method for series lookups within SeriesApp.
Args:
key: The unique provider identifier (e.g.,
"attack-on-titan")
Returns:
The AnimeSeries instance if found, None otherwise
Note:
This method uses the SerieList.get_by_key() method which
looks up series by their unique key, not by folder name.
"""
return self.list.get_by_key(key)
def get_all_series_from_data_files(self) -> List[AnimeSeries]:
"""
Get all series from data files in the anime directory.
Scans the directory_to_search for all 'data' files and loads
the AnimeSeries metadata from each file. This method is synchronous
and can be wrapped with asyncio.to_thread if needed for async
contexts.
Returns:
List of AnimeSeries objects found in data files. Returns an empty
list if no data files are found or if the directory doesn't
exist.
Example:
series_app = SeriesApp("/path/to/anime")
all_series = series_app.get_all_series_from_data_files()
for anime in all_series:
print(f"Found: {anime.name} (key={anime.key})")
"""
logger.info(
"Scanning for data files in directory: %s",
self.directory_to_search
)
# Create a fresh SerieList instance for file-based loading
# This ensures we get all series from data files without
# interfering with the main instance's state
try:
temp_list = SerieList(self.directory_to_search)
except (OSError, ValueError) as e:
logger.error(
"Failed to scan directory for data files: %s",
str(e),
exc_info=True
)
return []
# Get all series from the temporary list
all_series = temp_list.get_all()
logger.info(
"Found %d series from data files in %s",
len(all_series),
self.directory_to_search
)
return all_series
def shutdown(self) -> None:
"""
Shutdown the thread pool executor.
Should be called when the SeriesApp instance is no longer needed
to properly clean up resources.
"""
if hasattr(self, 'executor'):
self.executor.shutdown(wait=True)
logger.info("ThreadPoolExecutor shut down successfully")

View File

@@ -8,8 +8,8 @@ from pydantic import BaseModel, Field
from sqlalchemy.ext.asyncio import AsyncSession
from src.config.settings import settings
from src.core.entities.series import Serie
from src.core.utils.key_utils import generate_key_from_folder, is_valid_key
from src.server.database.models import AnimeSeries
from src.server.utils.key_utils import generate_key_from_folder, is_valid_key
from src.server.database.service import AnimeSeriesService
from src.server.exceptions import (
BadRequestError,
@@ -896,18 +896,18 @@ async def add_series(
# Step D: Add to SerieList (in-memory only, no folder creation)
if series_app and hasattr(series_app, "list"):
serie = Serie(
from src.server.database.models import AnimeSeries
anime = AnimeSeries(
key=key,
name=name,
site="aniworld.to",
folder=folder,
episodeDict={},
year=year
)
# Add to in-memory cache without creating folder on disk
if hasattr(series_app.list, 'keyDict'):
series_app.list.keyDict[key] = serie
series_app.list.keyDict[key] = anime
logger.info(
"Added series to in-memory cache: %s (key=%s, folder=%s, year=%s)",
name,

View File

@@ -0,0 +1,8 @@
"""
Core module for AniWorld application.
Contains domain entities, interfaces, application services, and exceptions.
"""
from . import entities, exceptions, interfaces, providers
__all__ = ['entities', 'exceptions', 'interfaces', 'providers']

View File

@@ -0,0 +1,213 @@
"""NFO XML generator for Kodi/XBMC format.
This module provides functions to generate tvshow.nfo XML files from
TVShowNFO Pydantic models, adapted from the scraper project.
Example:
>>> from src.server.entities.nfo_models import TVShowNFO
>>> nfo = TVShowNFO(title="Test Show", year=2020, tmdbid=12345)
>>> xml_string = generate_tvshow_nfo(nfo)
"""
import logging
from typing import Optional
from lxml import etree
from src.config.settings import settings
from src.server.entities.nfo_models import TVShowNFO
logger = logging.getLogger(__name__)
def generate_tvshow_nfo(tvshow: TVShowNFO, pretty_print: bool = True) -> str:
"""Generate tvshow.nfo XML content from TVShowNFO model.
Args:
tvshow: TVShowNFO Pydantic model with metadata
pretty_print: Whether to format XML with indentation
Returns:
XML string in Kodi/XBMC tvshow.nfo format
Example:
>>> nfo = TVShowNFO(title="Attack on Titan", year=2013)
>>> xml = generate_tvshow_nfo(nfo)
"""
root = etree.Element("tvshow")
# Basic information
_add_element(root, "title", tvshow.title)
_add_element(root, "originaltitle", tvshow.originaltitle)
_add_element(root, "showtitle", tvshow.showtitle)
_add_element(root, "sorttitle", tvshow.sorttitle)
_add_element(root, "year", str(tvshow.year) if tvshow.year else None)
# Plot and description always write <plot> even when empty so that
# all NFO files have a consistent set of tags regardless of whether they
# were produced by create or update.
_add_element(root, "plot", tvshow.plot, always_write=True)
_add_element(root, "outline", tvshow.outline)
_add_element(root, "tagline", tvshow.tagline)
# Technical details
_add_element(root, "runtime", str(tvshow.runtime) if tvshow.runtime else None)
# Content rating - prefer FSK if available and configured
if getattr(settings, 'nfo_prefer_fsk_rating', True) and tvshow.fsk:
_add_element(root, "mpaa", tvshow.fsk)
else:
_add_element(root, "mpaa", tvshow.mpaa)
_add_element(root, "certification", tvshow.certification)
# Status and dates
_add_element(root, "premiered", tvshow.premiered)
_add_element(root, "status", tvshow.status)
_add_element(root, "dateadded", tvshow.dateadded)
# Ratings
if tvshow.ratings:
ratings_elem = etree.SubElement(root, "ratings")
for rating in tvshow.ratings:
rating_elem = etree.SubElement(ratings_elem, "rating")
if rating.name:
rating_elem.set("name", rating.name)
if rating.max_rating:
rating_elem.set("max", str(rating.max_rating))
if rating.default:
rating_elem.set("default", "true")
_add_element(rating_elem, "value", str(rating.value))
if rating.votes is not None:
_add_element(rating_elem, "votes", str(rating.votes))
_add_element(root, "userrating", str(tvshow.userrating) if tvshow.userrating is not None else None)
# IDs
_add_element(root, "tmdbid", str(tvshow.tmdbid) if tvshow.tmdbid else None)
_add_element(root, "imdbid", tvshow.imdbid)
_add_element(root, "tvdbid", str(tvshow.tvdbid) if tvshow.tvdbid else None)
# Legacy ID fields for compatibility
_add_element(root, "id", str(tvshow.tvdbid) if tvshow.tvdbid else None)
_add_element(root, "imdb_id", tvshow.imdbid)
# Unique IDs
for uid in tvshow.uniqueid:
uid_elem = etree.SubElement(root, "uniqueid")
uid_elem.set("type", uid.type)
if uid.default:
uid_elem.set("default", "true")
uid_elem.text = uid.value
# Multi-value fields
for genre in tvshow.genre:
_add_element(root, "genre", genre)
for studio in tvshow.studio:
_add_element(root, "studio", studio)
for country in tvshow.country:
_add_element(root, "country", country)
for tag in tvshow.tag:
_add_element(root, "tag", tag)
# Thumbnails (posters, logos)
for thumb in tvshow.thumb:
thumb_elem = etree.SubElement(root, "thumb")
if thumb.aspect:
thumb_elem.set("aspect", thumb.aspect)
if thumb.season is not None:
thumb_elem.set("season", str(thumb.season))
if thumb.type:
thumb_elem.set("type", thumb.type)
thumb_elem.text = str(thumb.url)
# Fanart
if tvshow.fanart:
fanart_elem = etree.SubElement(root, "fanart")
for fanart in tvshow.fanart:
fanart_thumb = etree.SubElement(fanart_elem, "thumb")
fanart_thumb.text = str(fanart.url)
# Named seasons
for named_season in tvshow.namedseason:
season_elem = etree.SubElement(root, "namedseason")
season_elem.set("number", str(named_season.number))
season_elem.text = named_season.name
# Actors
for actor in tvshow.actors:
actor_elem = etree.SubElement(root, "actor")
_add_element(actor_elem, "name", actor.name)
_add_element(actor_elem, "role", actor.role)
_add_element(actor_elem, "thumb", str(actor.thumb) if actor.thumb else None)
_add_element(actor_elem, "profile", str(actor.profile) if actor.profile else None)
_add_element(actor_elem, "tmdbid", str(actor.tmdbid) if actor.tmdbid else None)
# Additional fields
_add_element(root, "trailer", str(tvshow.trailer) if tvshow.trailer else None)
_add_element(root, "watched", "true" if tvshow.watched else "false")
if tvshow.playcount is not None:
_add_element(root, "playcount", str(tvshow.playcount))
# Generate XML string
xml_str = etree.tostring(
root,
pretty_print=pretty_print,
encoding="unicode",
xml_declaration=False
)
# Add XML declaration
xml_declaration = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
return xml_declaration + xml_str
def _add_element(
parent: etree.Element,
tag: str,
text: Optional[str],
always_write: bool = False,
) -> Optional[etree.Element]:
"""Add a child element to parent if text is not None or empty.
Args:
parent: Parent XML element
tag: Tag name for child element
text: Text content (None or empty strings are skipped
unless *always_write* is True)
always_write: When True the element is created even when
*text* is None/empty (the element will have
no text content). Useful for tags like
``<plot>`` that should always be present.
Returns:
Created element or None if skipped
"""
if text is not None and text != "":
elem = etree.SubElement(parent, tag)
elem.text = text
return elem
if always_write:
return etree.SubElement(parent, tag)
return None
def validate_nfo_xml(xml_string: str) -> bool:
"""Validate NFO XML structure.
Args:
xml_string: XML content to validate
Returns:
True if valid XML, False otherwise
"""
try:
etree.fromstring(xml_string.encode('utf-8'))
return True
except etree.XMLSyntaxError as e:
logger.error("Invalid NFO XML: %s", e)
return False

View File

@@ -0,0 +1,234 @@
"""TMDB to NFO model mapper.
This module converts TMDB API data to TVShowNFO Pydantic models,
keeping the mapping logic separate from the service orchestration.
Example:
>>> model = tmdb_to_nfo_model(tmdb_data, content_ratings, get_image_url, "original")
"""
import logging
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional
from src.server.entities.nfo_models import (
ActorInfo,
ImageInfo,
NamedSeason,
RatingInfo,
TVShowNFO,
UniqueID,
)
logger = logging.getLogger(__name__)
def _extract_rating_by_country(
content_ratings: Dict[str, Any],
country_code: str,
) -> Optional[str]:
"""Extract content rating for a specific country from TMDB content ratings.
Args:
content_ratings: TMDB content ratings response dict with "results" list.
country_code: ISO 3166-1 alpha-2 country code (e.g., "DE", "US").
Returns:
Raw rating string for the requested country, or None if not found.
Example:
>>> _extract_rating_by_country({"results": [{"iso_3166_1": "US", "rating": "TV-14"}]}, "US")
'TV-14'
"""
if not content_ratings or "results" not in content_ratings:
return None
for rating in content_ratings["results"]:
if rating.get("iso_3166_1") == country_code:
return rating.get("rating") or None
return None
def _extract_fsk_rating(content_ratings: Dict[str, Any]) -> Optional[str]:
"""Extract German FSK rating from TMDB content ratings.
Delegates to :func:`_extract_rating_by_country` and then normalises the
raw TMDB string into the 'FSK XX' format expected by Kodi/Jellyfin.
Args:
content_ratings: TMDB content ratings response.
Returns:
Formatted FSK string (e.g., 'FSK 12') or None.
"""
raw = _extract_rating_by_country(content_ratings, "DE")
if raw is None:
return None
fsk_mapping: Dict[str, str] = {
"0": "FSK 0",
"6": "FSK 6",
"12": "FSK 12",
"16": "FSK 16",
"18": "FSK 18",
}
if raw in fsk_mapping:
return fsk_mapping[raw]
# Try to extract numeric part (ordered high→low to avoid partial matches)
for key in ["18", "16", "12", "6", "0"]:
if key in raw:
return fsk_mapping[key]
if raw.startswith("FSK"):
return raw
logger.debug("Unmapped German rating: %s", raw)
return None
def tmdb_to_nfo_model(
tmdb_data: Dict[str, Any],
content_ratings: Optional[Dict[str, Any]],
get_image_url: Callable[[str, str], str],
image_size: str = "original",
) -> TVShowNFO:
"""Convert TMDB API data to a fully-populated TVShowNFO model.
All required NFO tags are explicitly set in this function so that newly
created files are complete without a subsequent repair pass.
Args:
tmdb_data: TMDB TV show details (with credits, external_ids, images
appended via ``append_to_response``).
content_ratings: TMDB content ratings response, or None.
get_image_url: Callable ``(path, size) -> url`` for TMDB images.
image_size: TMDB image size parameter (e.g., ``"original"``, ``"w500"``).
Returns:
TVShowNFO Pydantic model with all available fields populated.
"""
title: str = tmdb_data["name"]
original_title: str = tmdb_data.get("original_name") or title
# --- Year and dates ---
first_air_date: Optional[str] = tmdb_data.get("first_air_date") or None
year: Optional[int] = int(first_air_date[:4]) if first_air_date else None
# --- Ratings ---
ratings: List[RatingInfo] = []
if tmdb_data.get("vote_average"):
ratings.append(RatingInfo(
name="themoviedb",
value=float(tmdb_data["vote_average"]),
votes=tmdb_data.get("vote_count", 0),
max_rating=10,
default=True,
))
# --- External IDs ---
external_ids: Dict[str, Any] = tmdb_data.get("external_ids", {})
imdb_id: Optional[str] = external_ids.get("imdb_id")
tvdb_id: Optional[int] = external_ids.get("tvdb_id")
# --- Images ---
thumb_images: List[ImageInfo] = []
fanart_images: List[ImageInfo] = []
if tmdb_data.get("poster_path"):
thumb_images.append(ImageInfo(
url=get_image_url(tmdb_data["poster_path"], image_size),
aspect="poster",
))
if tmdb_data.get("backdrop_path"):
fanart_images.append(ImageInfo(
url=get_image_url(tmdb_data["backdrop_path"], image_size),
))
logos: List[Dict[str, Any]] = tmdb_data.get("images", {}).get("logos", [])
if logos:
thumb_images.append(ImageInfo(
url=get_image_url(logos[0]["file_path"], image_size),
aspect="clearlogo",
))
# --- Cast (top 10) ---
actors: List[ActorInfo] = []
for member in tmdb_data.get("credits", {}).get("cast", [])[:10]:
actor_thumb: Optional[str] = None
if member.get("profile_path"):
actor_thumb = get_image_url(member["profile_path"], "h632")
actors.append(ActorInfo(
name=member["name"],
role=member.get("character"),
thumb=actor_thumb,
tmdbid=member["id"],
))
# --- Named seasons ---
named_seasons: List[NamedSeason] = []
for season_info in tmdb_data.get("seasons", []):
season_name = season_info.get("name")
season_number = season_info.get("season_number")
if season_name and season_number is not None:
named_seasons.append(NamedSeason(
number=season_number,
name=season_name,
))
# --- Unique IDs ---
unique_ids: List[UniqueID] = []
if tmdb_data.get("id"):
unique_ids.append(UniqueID(type="tmdb", value=str(tmdb_data["id"]), default=False))
if imdb_id:
unique_ids.append(UniqueID(type="imdb", value=imdb_id, default=False))
if tvdb_id:
unique_ids.append(UniqueID(type="tvdb", value=str(tvdb_id), default=True))
# --- Content ratings ---
fsk_rating: Optional[str] = _extract_fsk_rating(content_ratings) if content_ratings else None
mpaa_rating: Optional[str] = (
_extract_rating_by_country(content_ratings, "US") if content_ratings else None
)
# --- Country: prefer origin_country codes; fall back to production_countries names ---
country_list: List[str] = list(tmdb_data.get("origin_country", []))
if not country_list:
country_list = [c["name"] for c in tmdb_data.get("production_countries", [])]
# --- Runtime ---
runtime_list: List[int] = tmdb_data.get("episode_run_time", [])
runtime: Optional[int] = runtime_list[0] if runtime_list else None
return TVShowNFO(
title=title,
originaltitle=original_title,
showtitle=title,
sorttitle=title,
year=year,
plot=tmdb_data.get("overview") or None,
outline=tmdb_data.get("overview") or None,
tagline=tmdb_data.get("tagline") or None,
runtime=runtime,
premiered=first_air_date,
status=tmdb_data.get("status"),
genre=[g["name"] for g in tmdb_data.get("genres", [])],
studio=[n["name"] for n in tmdb_data.get("networks", [])],
country=country_list,
ratings=ratings,
fsk=fsk_rating,
mpaa=mpaa_rating,
tmdbid=tmdb_data.get("id"),
imdbid=imdb_id,
tvdbid=tvdb_id,
uniqueid=unique_ids,
thumb=thumb_images,
fanart=fanart_images,
actors=actors,
namedseason=named_seasons,
watched=False,
dateadded=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
)

View File

@@ -0,0 +1,288 @@
"""Utilities for loading and managing stored anime series metadata.
This module provides the SerieList class for managing collections of anime
series metadata loaded from the database.
Note:
This module is part of the server database layer. All persistence
is handled by the service layer.
"""
from __future__ import annotations
import logging
from typing import Dict, List, Optional
from src.server.database.models import AnimeSeries
logger = logging.getLogger(__name__)
class SerieList:
"""
Represents the collection of cached series loaded from database.
Series are identified by their unique 'key' (provider identifier).
The 'folder' is metadata only and not used for lookups.
This class manages in-memory series data loaded from database.
Example:
# Load from database
serie_list = SerieList("/path/to/anime")
await serie_list.load_all_from_db()
series = serie_list.get_all()
Attributes:
directory: Path to the anime directory
keyDict: Internal dictionary mapping serie.key to AnimeSeries objects
"""
def __init__(self, base_path: str) -> None:
"""Initialize the SerieList.
Args:
base_path: Path to the anime directory
"""
self.directory: str = base_path
# Internal storage using serie.key as the dictionary key
self.keyDict: Dict[str, AnimeSeries] = {}
async def add_to_db(self, anime: AnimeSeries) -> bool:
"""Persist a new series to the database.
Creates the filesystem folder using anime.folder, then persists
the series metadata to the database.
Args:
anime: The AnimeSeries instance to add
Returns:
True if successful, False otherwise
"""
try:
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService, EpisodeService
folder_name = anime.folder
anime_path = self.directory + "/" + folder_name
import os
os.makedirs(anime_path, exist_ok=True)
session_factory = get_async_session_factory()
db = session_factory()
try:
existing = await AnimeSeriesService.get_by_key(db, anime.key)
if existing:
logger.debug(
"Series '%s' (key=%s) already exists in DB, skipping",
anime.name, anime.key
)
return True
db_anime_series = await AnimeSeriesService.create(
db=db,
key=anime.key,
name=anime.name,
site=anime.site,
folder=folder_name,
year=anime.year
)
for ep in anime.episodes:
await EpisodeService.create(
db=db,
series_id=db_anime_series.id,
season=ep.season,
episode_number=ep.episode_number
)
await db.commit()
self.keyDict[anime.key] = anime
logger.info(
"Persisted series '%s' to database",
anime.name
)
return True
except Exception as e:
await db.rollback()
logger.error(
"Failed to persist series '%s' to DB: %s",
anime.key, e, exc_info=True
)
return False
finally:
await db.close()
except Exception as e:
logger.error(
"Could not add series '%s' to DB (DB unavailable?): %s",
anime.key, e
)
return False
def contains(self, key: str) -> bool:
"""
Return True when a series identified by ``key`` already exists.
Args:
key: The unique provider identifier for the series
Returns:
True if the series exists in the collection
"""
return key in self.keyDict
def GetMissingEpisode(self) -> List[AnimeSeries]:
"""Return all series that still contain missing episodes."""
return [
anime for anime in self.keyDict.values()
if anime.episodeDict
]
def get_missing_episodes(self) -> List[AnimeSeries]:
"""PEP8-friendly alias for :meth:`GetMissingEpisode`."""
return self.GetMissingEpisode()
def GetList(self) -> List[AnimeSeries]:
"""Return all series instances stored in the list."""
return list(self.keyDict.values())
def get_all(self) -> List[AnimeSeries]:
"""PEP8-friendly alias for :meth:`GetList`."""
return self.GetList()
def get_by_key(self, key: str) -> Optional[AnimeSeries]:
"""
Get a series by its unique provider key.
This is the primary method for series lookup.
Args:
key: The unique provider identifier (e.g., "attack-on-titan")
Returns:
The AnimeSeries instance if found, None otherwise
"""
return self.keyDict.get(key)
def get_by_folder(self, folder: str) -> Optional[AnimeSeries]:
"""
Get a series by its folder name.
.. deprecated:: 2.0.0
Use :meth:`get_by_key` instead. Folder-based lookups will be
removed in version 3.0.0. The `folder` field is metadata only
and should not be used for identification.
This method is provided for backward compatibility only.
Prefer using get_by_key() for new code.
Args:
folder: The filesystem folder name (e.g., "Attack on Titan (2013)")
Returns:
The AnimeSeries instance if found, None otherwise
"""
import warnings
warnings.warn(
"get_by_folder() is deprecated and will be removed in v3.0.0. "
"Use get_by_key() instead. The 'folder' field is metadata only.",
DeprecationWarning,
stacklevel=2
)
for anime in self.keyDict.values():
if anime.folder == folder:
return anime
return None
async def load_all_from_db(self) -> int:
"""Load all series from database into in-memory cache.
Retrieves all anime series from the database with their episodes
and populates the in-memory keyDict for fast access.
Returns:
int: Number of series loaded into cache
"""
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService
try:
session_factory = get_async_session_factory()
db = session_factory()
try:
anime_series_list = await AnimeSeriesService.get_all(
db, with_episodes=True
)
count = 0
for anime_series in anime_series_list:
self.keyDict[anime_series.key] = anime_series
count += 1
logger.info(
"Loaded %d series from database into in-memory cache",
count
)
return count
finally:
await db.close()
except RuntimeError:
logger.warning(
"Database not available, skipping DB load"
)
return 0
async def _load_single_series_from_db(
self,
anime_folder: str
) -> Optional[AnimeSeries]:
"""Load a single series from database by folder name.
Looks up a series in the database by its folder name and adds
it to the in-memory cache.
Args:
anime_folder: The filesystem folder name to look up
Returns:
AnimeSeries if found and loaded, None otherwise
"""
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService
try:
session_factory = get_async_session_factory()
db = session_factory()
try:
anime_series = await AnimeSeriesService.get_by_folder(
db, anime_folder
)
if not anime_series:
logger.debug(
"Series with folder '%s' not found in DB",
anime_folder
)
return None
self.keyDict[anime_series.key] = anime_series
logger.debug(
"Loaded series '%s' (key=%s) from DB",
anime_series.name, anime_series.key
)
return anime_series
finally:
await db.close()
except RuntimeError:
logger.warning(
"Database not available, cannot load series '%s'",
anime_folder
)
return None
def invalidate_cache(self) -> None:
"""Clear the in-memory cache.
Use after database modifications to force reload from DB
on next access.
"""
self.keyDict.clear()
logger.debug("SerieList in-memory cache invalidated")

View File

@@ -48,6 +48,7 @@ from src.server.database.service import (
EpisodeService,
UserSessionService,
)
from src.server.database.SerieList import SerieList
from src.server.database.system_settings_service import SystemSettingsService
__all__ = [
@@ -79,4 +80,6 @@ __all__ = [
"DownloadQueueService",
"SystemSettingsService",
"UserSessionService",
# SerieList
"SerieList",
]

View File

@@ -190,6 +190,54 @@ class AnimeSeries(Base, TimestampMixin):
f"name='{self.name}')>"
)
@property
def episodeDict(self) -> dict[int, list[int]]:
"""Build episode dictionary from episodes relationship or private cache.
Returns:
Dictionary mapping season numbers to lists of episode numbers
"""
# Check for private cache first (set when loading from JSON without DB)
if hasattr(self, '_episode_dict_cache') and self._episode_dict_cache is not None:
return self._episode_dict_cache
episode_dict: dict[int, list[int]] = {}
if self.episodes:
for ep in self.episodes:
season = ep.season or 1
if season not in episode_dict:
episode_dict[season] = []
episode_dict[season].append(ep.episode_number or 0)
return episode_dict
@property
def name_with_year(self) -> str:
"""Get series name with year appended if available.
Returns:
Name in format "Name (Year)" if year is available, else just name
"""
if self.year:
import re
year_suffix = f" ({self.year})"
clean_name = re.sub(r'(\s*\(\d{4}\))+\s*$', '', self.name or '').strip()
return f"{clean_name}{year_suffix}"
return self.name or ''
@property
def sanitized_folder(self) -> str:
"""Get filesystem-safe folder name from display name with year.
Returns:
Sanitized folder name based on display name with year
"""
from src.server.utils.filesystem import sanitize_folder_name
name_to_sanitize = self.name_with_year or self.folder or self.key
try:
return sanitize_folder_name(name_to_sanitize)
except ValueError:
return sanitize_folder_name(self.key)
class Episode(Base, TimestampMixin):
"""SQLAlchemy model for anime episodes.

View File

@@ -0,0 +1,335 @@
"""Pydantic models for NFO metadata based on Kodi/XBMC standard.
This module provides data models for tvshow.nfo files that are compatible
with media center applications like Kodi, Plex, and Jellyfin.
Example:
>>> nfo = TVShowNFO(
... title="Attack on Titan",
... year=2013,
... tmdbid=1429
... )
>>> nfo.premiered = "2013-04-07"
"""
from datetime import datetime
from typing import List, Optional
from pydantic import BaseModel, Field, HttpUrl, field_validator
class RatingInfo(BaseModel):
"""Rating information from various sources.
Attributes:
name: Source of the rating (e.g., 'themoviedb', 'imdb')
value: Rating value (typically 0-10)
votes: Number of votes
max_rating: Maximum possible rating (default: 10)
default: Whether this is the default rating to display
"""
name: str = Field(..., description="Rating source name")
value: float = Field(..., ge=0, description="Rating value")
votes: Optional[int] = Field(None, ge=0, description="Number of votes")
max_rating: int = Field(10, ge=1, description="Maximum rating value")
default: bool = Field(False, description="Is this the default rating")
@field_validator('value')
@classmethod
def validate_value(cls, v: float, info) -> float:
"""Ensure rating value doesn't exceed max_rating."""
# Note: max_rating is not available yet during validation,
# so we use a reasonable default check
if v > 10:
raise ValueError("Rating value cannot exceed 10")
return v
class ActorInfo(BaseModel):
"""Actor/cast member information.
Attributes:
name: Actor's name
role: Character name/role
thumb: URL to actor's photo
profile: URL to actor's profile page
tmdbid: TMDB ID for the actor
"""
name: str = Field(..., description="Actor's name")
role: Optional[str] = Field(None, description="Character role")
thumb: Optional[HttpUrl] = Field(None, description="Actor photo URL")
profile: Optional[HttpUrl] = Field(None, description="Actor profile URL")
tmdbid: Optional[int] = Field(None, description="TMDB actor ID")
class ImageInfo(BaseModel):
"""Image information for posters, fanart, and logos.
Attributes:
url: URL to the image
aspect: Image aspect/type (e.g., 'poster', 'clearlogo', 'logo')
season: Season number for season-specific images
type: Image type (e.g., 'season')
"""
url: HttpUrl = Field(..., description="Image URL")
aspect: Optional[str] = Field(
None,
description="Image aspect (poster, clearlogo, logo)"
)
season: Optional[int] = Field(None, ge=-1, description="Season number")
type: Optional[str] = Field(None, description="Image type")
class NamedSeason(BaseModel):
"""Named season information.
Attributes:
number: Season number
name: Season name/title
"""
number: int = Field(..., ge=0, description="Season number")
name: str = Field(..., description="Season name")
class UniqueID(BaseModel):
"""Unique identifier from various sources.
Attributes:
type: ID source type (tmdb, imdb, tvdb)
value: The ID value
default: Whether this is the default ID
"""
type: str = Field(..., description="ID type (tmdb, imdb, tvdb)")
value: str = Field(..., description="ID value")
default: bool = Field(False, description="Is default ID")
class TVShowNFO(BaseModel):
"""Main tvshow.nfo structure following Kodi/XBMC standard.
This model represents the complete metadata for a TV show that can be
serialized to XML for use with media center applications.
Attributes:
title: Main title of the show
originaltitle: Original title (e.g., in original language)
showtitle: Show title (often same as title)
sorttitle: Title used for sorting
year: Release year
plot: Full plot description
outline: Short plot summary
tagline: Show tagline/slogan
runtime: Episode runtime in minutes
mpaa: Content rating (e.g., TV-14, TV-MA)
certification: Additional certification info
premiered: Premiere date (YYYY-MM-DD format)
status: Show status (e.g., 'Continuing', 'Ended')
studio: List of production studios
genre: List of genres
country: List of countries
tag: List of tags/keywords
ratings: List of ratings from various sources
userrating: User's personal rating
watched: Whether the show has been watched
playcount: Number of times watched
tmdbid: TMDB ID
imdbid: IMDB ID
tvdbid: TVDB ID
uniqueid: List of unique IDs
thumb: List of thumbnail/poster images
fanart: List of fanart/backdrop images
actors: List of cast members
namedseason: List of named seasons
trailer: Trailer URL
dateadded: Date when added to library
"""
# Required fields
title: str = Field(..., description="Show title", min_length=1)
# Basic information (optional)
originaltitle: Optional[str] = Field(None, description="Original title")
showtitle: Optional[str] = Field(None, description="Show title")
sorttitle: Optional[str] = Field(None, description="Sort title")
year: Optional[int] = Field(
None,
ge=1900,
le=2100,
description="Release year"
)
# Plot and description
plot: Optional[str] = Field(None, description="Full plot description")
outline: Optional[str] = Field(None, description="Short plot summary")
tagline: Optional[str] = Field(None, description="Show tagline")
# Technical details
runtime: Optional[int] = Field(
None,
ge=0,
description="Episode runtime in minutes"
)
mpaa: Optional[str] = Field(None, description="Content rating")
fsk: Optional[str] = Field(
None,
description="German FSK rating (e.g., 'FSK 12', 'FSK 16')"
)
certification: Optional[str] = Field(
None,
description="Certification info"
)
# Status and dates
premiered: Optional[str] = Field(
None,
description="Premiere date (YYYY-MM-DD)"
)
status: Optional[str] = Field(None, description="Show status")
dateadded: Optional[str] = Field(
None,
description="Date added to library"
)
# Multi-value fields
studio: List[str] = Field(
default_factory=list,
description="Production studios"
)
genre: List[str] = Field(
default_factory=list,
description="Genres"
)
country: List[str] = Field(
default_factory=list,
description="Countries"
)
tag: List[str] = Field(
default_factory=list,
description="Tags/keywords"
)
# IDs
tmdbid: Optional[int] = Field(None, description="TMDB ID")
imdbid: Optional[str] = Field(None, description="IMDB ID")
tvdbid: Optional[int] = Field(None, description="TVDB ID")
uniqueid: List[UniqueID] = Field(
default_factory=list,
description="Unique IDs"
)
# Ratings and viewing info
ratings: List[RatingInfo] = Field(
default_factory=list,
description="Ratings"
)
userrating: Optional[float] = Field(
None,
ge=0,
le=10,
description="User rating"
)
watched: bool = Field(False, description="Watched status")
playcount: Optional[int] = Field(
None,
ge=0,
description="Play count"
)
# Media
thumb: List[ImageInfo] = Field(
default_factory=list,
description="Thumbnail images"
)
fanart: List[ImageInfo] = Field(
default_factory=list,
description="Fanart images"
)
# Cast and crew
actors: List[ActorInfo] = Field(
default_factory=list,
description="Cast members"
)
# Seasons
namedseason: List[NamedSeason] = Field(
default_factory=list,
description="Named seasons"
)
# Additional
trailer: Optional[HttpUrl] = Field(None, description="Trailer URL")
@field_validator('premiered')
@classmethod
def validate_premiered_date(cls, v: Optional[str]) -> Optional[str]:
"""Validate premiered date format (YYYY-MM-DD)."""
if v is None:
return v
# Check format strictly: YYYY-MM-DD
if len(v) != 10 or v[4] != '-' or v[7] != '-':
raise ValueError(
"Premiered date must be in YYYY-MM-DD format"
)
try:
datetime.strptime(v, '%Y-%m-%d')
except ValueError as exc:
raise ValueError(
"Premiered date must be in YYYY-MM-DD format"
) from exc
return v
@field_validator('dateadded')
@classmethod
def validate_dateadded(cls, v: Optional[str]) -> Optional[str]:
"""Validate dateadded format (YYYY-MM-DD HH:MM:SS)."""
if v is None:
return v
# Check format strictly: YYYY-MM-DD HH:MM:SS
if len(v) != 19 or v[4] != '-' or v[7] != '-' or v[10] != ' ' or v[13] != ':' or v[16] != ':':
raise ValueError(
"Dateadded must be in YYYY-MM-DD HH:MM:SS format"
)
try:
datetime.strptime(v, '%Y-%m-%d %H:%M:%S')
except ValueError as exc:
raise ValueError(
"Dateadded must be in YYYY-MM-DD HH:MM:SS format"
) from exc
return v
@field_validator('imdbid')
@classmethod
def validate_imdbid(cls, v: Optional[str]) -> Optional[str]:
"""Validate IMDB ID format (should start with 'tt')."""
if v is None:
return v
if not v.startswith('tt'):
raise ValueError("IMDB ID must start with 'tt'")
if not v[2:].isdigit():
raise ValueError("IMDB ID must be 'tt' followed by digits")
return v
def model_post_init(self, __context) -> None:
"""Set default values after initialization."""
# Set showtitle to title if not provided
if self.showtitle is None:
self.showtitle = self.title
# Set originaltitle to title if not provided
if self.originaltitle is None:
self.originaltitle = self.title

200
src/server/error_handler.py Normal file
View File

@@ -0,0 +1,200 @@
"""
Error handling and recovery strategies for core providers.
This module provides custom exceptions and decorators for handling
errors in provider operations with automatic retry mechanisms.
"""
import functools
import logging
from typing import Any, Callable, Optional, TypeVar
logger = logging.getLogger(__name__)
# Type variable for decorator
F = TypeVar("F", bound=Callable[..., Any])
class RetryableError(Exception):
"""Exception that indicates an operation can be safely retried."""
pass
class NonRetryableError(Exception):
"""Exception that indicates an operation should not be retried."""
pass
class NetworkError(Exception):
"""Exception for network-related errors."""
pass
class DownloadError(Exception):
"""Exception for download-related errors."""
pass
class RecoveryStrategies:
"""Strategies for handling errors and recovering from failures."""
def __init__(
self,
max_retries: int = 3,
base_delay: float = 1.0,
max_delay: float = 60.0,
exponential_base: float = 2.0,
) -> None:
"""Initialize recovery strategies.
Args:
max_retries: Maximum number of retry attempts.
base_delay: Initial delay between retries in seconds.
max_delay: Maximum delay between retries in seconds.
exponential_base: Base for exponential backoff multiplier.
"""
self.max_retries = max_retries
self.base_delay = base_delay
self.max_delay = max_delay
self.exponential_base = exponential_base
def _calculate_delay(self, attempt: int) -> float:
"""Calculate delay for given retry attempt using exponential backoff.
Args:
attempt: Zero-based retry attempt number.
Returns:
Delay in seconds before next retry.
"""
delay = self.base_delay * (self.exponential_base ** attempt)
return min(delay, self.max_delay)
def handle_network_failure(
self,
func: Callable, *args: Any, **kwargs: Any
) -> Any:
"""Handle network failures with exponential backoff retry logic."""
last_error: Optional[Exception] = None
for attempt in range(self.max_retries):
try:
return func(*args, **kwargs)
except (NetworkError, ConnectionError, TimeoutError) as exc:
last_error = exc
if attempt < self.max_retries - 1:
delay = self._calculate_delay(attempt)
logger.warning(
"Network error on attempt %d/%d, retrying in %.1fs: %s",
attempt + 1, self.max_retries, delay, exc
)
import time
time.sleep(delay)
continue
if last_error:
raise last_error
raise NetworkError("Network failure after retries")
def handle_download_failure(
self,
func: Callable, *args: Any, **kwargs: Any
) -> Any:
"""Handle download failures with exponential backoff retry logic."""
last_error: Optional[Exception] = None
for attempt in range(self.max_retries):
try:
return func(*args, **kwargs)
except DownloadError as exc:
last_error = exc
if attempt < self.max_retries - 1:
delay = self._calculate_delay(attempt)
logger.warning(
"Download error on attempt %d/%d, retrying in %.1fs: %s",
attempt + 1, self.max_retries, delay, exc
)
import time
time.sleep(delay)
continue
if last_error:
raise last_error
raise DownloadError("Download failed after retries")
class FileCorruptionDetector:
"""Detector for corrupted files."""
@staticmethod
def is_valid_video_file(filepath: str) -> bool:
"""Check if a video file is valid and not corrupted."""
try:
import os
if not os.path.exists(filepath):
return False
file_size = os.path.getsize(filepath)
# Video files should be at least 1MB
return file_size > 1024 * 1024
except Exception as e:
logger.error("Error checking file validity: %s", e)
return False
def with_error_recovery(
max_retries: int = 3, context: str = ""
) -> Callable[[F], F]:
"""
Decorator for adding error recovery to functions.
Args:
max_retries: Maximum number of retry attempts
context: Context string for logging
Returns:
Decorated function with retry logic
"""
def decorator(func: F) -> F:
@functools.wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
last_error = None
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except NonRetryableError:
raise
except Exception as e:
last_error = e
if attempt < max_retries - 1:
logger.warning(
"Error in %s (attempt %d/%d): %s, retrying...",
context,
attempt + 1,
max_retries,
e,
)
else:
logger.error(
"Error in %s failed after %d attempts: %s",
context,
max_retries,
e,
)
if last_error:
raise last_error
raise RuntimeError(
f"Unexpected error in {context} after {max_retries} attempts"
)
return wrapper # type: ignore
return decorator
# Create module-level instances for use in provider code
recovery_strategies = RecoveryStrategies()
file_corruption_detector = FileCorruptionDetector()

View File

@@ -0,0 +1,7 @@
class NoKeyFoundException(Exception):
"""Exception raised when an anime key cannot be found."""
pass
class MatchNotFoundError(Exception):
"""Exception raised when an anime key cannot be found."""
pass

View File

@@ -0,0 +1,3 @@
from src.server.exceptions.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundException
__all__ = ["MatchNotFoundError", "NoKeyFoundException"]

View File

@@ -0,0 +1,367 @@
"""
Progress callback interfaces for core operations.
This module defines clean interfaces for progress reporting, error handling,
and completion notifications across all core operations (scanning,
downloading).
"""
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class OperationType(str, Enum):
"""Types of operations that can report progress."""
SCAN = "scan"
DOWNLOAD = "download"
SEARCH = "search"
INITIALIZATION = "initialization"
class ProgressPhase(str, Enum):
"""Phases of an operation's lifecycle."""
STARTING = "starting"
IN_PROGRESS = "in_progress"
COMPLETING = "completing"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@dataclass
class ProgressContext:
"""
Complete context information for a progress update.
Attributes:
operation_type: Type of operation being performed
operation_id: Unique identifier for this operation
phase: Current phase of the operation
current: Current progress value (e.g., files processed)
total: Total progress value (e.g., total files)
percentage: Completion percentage (0.0 to 100.0)
message: Human-readable progress message
details: Additional context-specific details
key: Provider-assigned series identifier (None when not applicable)
folder: Optional folder metadata for display purposes only
metadata: Extra metadata for specialized use cases
"""
operation_type: OperationType
operation_id: str
phase: ProgressPhase
current: int
total: int
percentage: float
message: str
details: Optional[str] = None
key: Optional[str] = None
folder: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
"operation_type": self.operation_type.value,
"operation_id": self.operation_id,
"phase": self.phase.value,
"current": self.current,
"total": self.total,
"percentage": round(self.percentage, 2),
"message": self.message,
"details": self.details,
"key": self.key,
"folder": self.folder,
"metadata": self.metadata,
}
@dataclass
class ErrorContext:
"""
Context information for error callbacks.
Attributes:
operation_type: Type of operation that failed
operation_id: Unique identifier for the operation
error: The exception that occurred
message: Human-readable error message
recoverable: Whether the error is recoverable
retry_count: Number of retry attempts made
key: Provider-assigned series identifier (None when not applicable)
folder: Optional folder metadata for display purposes only
metadata: Additional error context
"""
operation_type: OperationType
operation_id: str
error: Exception
message: str
recoverable: bool = False
retry_count: int = 0
key: Optional[str] = None
folder: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
"operation_type": self.operation_type.value,
"operation_id": self.operation_id,
"error_type": type(self.error).__name__,
"error_message": str(self.error),
"message": self.message,
"recoverable": self.recoverable,
"retry_count": self.retry_count,
"key": self.key,
"folder": self.folder,
"metadata": self.metadata,
}
@dataclass
class CompletionContext:
"""
Context information for completion callbacks.
Attributes:
operation_type: Type of operation that completed
operation_id: Unique identifier for the operation
success: Whether the operation completed successfully
message: Human-readable completion message
result_data: Result data from the operation
statistics: Operation statistics (duration, items processed, etc.)
key: Provider-assigned series identifier (None when not applicable)
folder: Optional folder metadata for display purposes only
metadata: Additional completion context
"""
operation_type: OperationType
operation_id: str
success: bool
message: str
result_data: Optional[Any] = None
statistics: Dict[str, Any] = field(default_factory=dict)
key: Optional[str] = None
folder: Optional[str] = None
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
"operation_type": self.operation_type.value,
"operation_id": self.operation_id,
"success": self.success,
"message": self.message,
"statistics": self.statistics,
"key": self.key,
"folder": self.folder,
"metadata": self.metadata,
}
class ProgressCallback(ABC):
"""
Abstract base class for progress callbacks.
Implement this interface to receive progress updates from core operations.
"""
@abstractmethod
def on_progress(self, context: ProgressContext) -> None:
"""
Called when progress is made in an operation.
Args:
context: Complete progress context information
"""
pass
class ErrorCallback(ABC):
"""
Abstract base class for error callbacks.
Implement this interface to receive error notifications from core
operations.
"""
@abstractmethod
def on_error(self, context: ErrorContext) -> None:
"""
Called when an error occurs during an operation.
Args:
context: Complete error context information
"""
pass
class CompletionCallback(ABC):
"""
Abstract base class for completion callbacks.
Implement this interface to receive completion notifications from
core operations.
"""
@abstractmethod
def on_completion(self, context: CompletionContext) -> None:
"""
Called when an operation completes (successfully or not).
Args:
context: Complete completion context information
"""
pass
class CallbackManager:
"""
Manages multiple callbacks for an operation.
This class allows registering multiple progress, error, and completion
callbacks and dispatching events to all registered callbacks.
"""
def __init__(self):
"""Initialize the callback manager."""
self._progress_callbacks: list[ProgressCallback] = []
self._error_callbacks: list[ErrorCallback] = []
self._completion_callbacks: list[CompletionCallback] = []
def register_progress_callback(self, callback: ProgressCallback) -> None:
"""
Register a progress callback.
Args:
callback: Progress callback to register
"""
if callback not in self._progress_callbacks:
self._progress_callbacks.append(callback)
def register_error_callback(self, callback: ErrorCallback) -> None:
"""
Register an error callback.
Args:
callback: Error callback to register
"""
if callback not in self._error_callbacks:
self._error_callbacks.append(callback)
def register_completion_callback(
self,
callback: CompletionCallback
) -> None:
"""
Register a completion callback.
Args:
callback: Completion callback to register
"""
if callback not in self._completion_callbacks:
self._completion_callbacks.append(callback)
def unregister_progress_callback(self, callback: ProgressCallback) -> None:
"""
Unregister a progress callback.
Args:
callback: Progress callback to unregister
"""
if callback in self._progress_callbacks:
self._progress_callbacks.remove(callback)
def unregister_error_callback(self, callback: ErrorCallback) -> None:
"""
Unregister an error callback.
Args:
callback: Error callback to unregister
"""
if callback in self._error_callbacks:
self._error_callbacks.remove(callback)
def unregister_completion_callback(
self,
callback: CompletionCallback
) -> None:
"""
Unregister a completion callback.
Args:
callback: Completion callback to unregister
"""
if callback in self._completion_callbacks:
self._completion_callbacks.remove(callback)
def notify_progress(self, context: ProgressContext) -> None:
"""
Notify all registered progress callbacks.
Args:
context: Progress context to send
"""
for callback in self._progress_callbacks:
try:
callback.on_progress(context)
except Exception as e:
# Log but don't let callback errors break the operation
logger.error(
"Error in progress callback %s: %s",
callback,
e,
exc_info=True
)
def notify_error(self, context: ErrorContext) -> None:
"""
Notify all registered error callbacks.
Args:
context: Error context to send
"""
for callback in self._error_callbacks:
try:
callback.on_error(context)
except Exception as e:
# Log but don't let callback errors break the operation
logger.error(
"Error in error callback %s: %s",
callback,
e,
exc_info=True
)
def notify_completion(self, context: CompletionContext) -> None:
"""
Notify all registered completion callbacks.
Args:
context: Completion context to send
"""
for callback in self._completion_callbacks:
try:
callback.on_completion(context)
except Exception as e:
# Log but don't let callback errors break the operation
logger.error(
"Error in completion callback %s: %s",
callback,
e,
exc_info=True
)
def clear_all_callbacks(self) -> None:
"""Clear all registered callbacks."""
self._progress_callbacks.clear()
self._error_callbacks.clear()
self._completion_callbacks.clear()

View File

@@ -0,0 +1,11 @@
from ..providers.streaming.Provider import Provider
from ..providers.streaming.voe import VOE
class Providers:
def __init__(self):
self.dict = {"VOE": VOE()}
def GetProvider(self, key: str) -> Provider:
return self.dict[key]

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,104 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, List
class Loader(ABC):
"""Abstract base class for anime data loaders/providers."""
@abstractmethod
def subscribe_download_progress(self, handler):
"""Subscribe a handler to the download_progress event.
Args:
handler: Callable to be called with progress dict.
"""
@abstractmethod
def unsubscribe_download_progress(self, handler):
"""Unsubscribe a handler from the download_progress event.
Args:
handler: Callable previously subscribed.
"""
@abstractmethod
def search(self, word: str) -> List[Dict[str, Any]]:
"""Search for anime series by name.
Args:
word: Search term to look for
Returns:
List of found series as dictionaries containing series information
"""
@abstractmethod
def is_language(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
) -> bool:
"""Check if episode exists in specified language.
Args:
season: Season number (1-indexed)
episode: Episode number (1-indexed)
key: Unique series identifier/key
language: Language to check (default: German Dub)
Returns:
True if episode exists in specified language, False otherwise
"""
@abstractmethod
def download(
self,
base_directory: str,
serie_folder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub"
) -> bool:
"""Download episode to specified directory.
Args:
base_directory: Base directory for downloads
serie_folder: Series folder name within base directory
season: Season number (0 for movies, 1+ for series)
episode: Episode number within season
key: Unique series identifier/key
language: Language version to download (default: German Dub)
Returns:
True if download successful, False otherwise
"""
@abstractmethod
def get_site_key(self) -> str:
"""Get the site key/identifier for this provider.
Returns:
Site key string (e.g., 'aniworld.to', 'voe.com')
"""
@abstractmethod
def get_title(self, key: str) -> str:
"""Get the human-readable title of a series.
Args:
key: Unique series identifier/key
Returns:
Series title string
"""
@abstractmethod
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
"""Get season and episode counts for a series.
Args:
slug: Series slug/key identifier
Returns:
Dictionary mapping season number (int) to episode count (int)
"""

View File

@@ -0,0 +1,351 @@
"""Dynamic provider configuration management.
This module provides runtime configuration management for anime providers,
allowing dynamic updates without application restart.
"""
import json
import logging
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class ProviderSettings:
"""Configuration settings for a single provider."""
name: str
enabled: bool = True
priority: int = 0
timeout_seconds: int = 30
max_retries: int = 3
retry_delay_seconds: float = 1.0
max_concurrent_downloads: int = 3
bandwidth_limit_mbps: Optional[float] = None
custom_headers: Optional[Dict[str, str]] = None
custom_params: Optional[Dict[str, Any]] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert settings to dictionary."""
return {
k: v for k, v in asdict(self).items() if v is not None
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ProviderSettings":
"""Create settings from dictionary."""
return cls(**{k: v for k, v in data.items() if hasattr(cls, k)})
class ProviderConfigManager:
"""Manages dynamic configuration for anime providers."""
def __init__(self, config_file: Optional[Path] = None):
"""Initialize provider configuration manager.
Args:
config_file: Path to configuration file (optional).
"""
self._config_file = config_file
self._provider_settings: Dict[str, ProviderSettings] = {}
self._global_settings: Dict[str, Any] = {
"default_timeout": 30,
"default_max_retries": 3,
"default_retry_delay": 1.0,
"enable_health_monitoring": True,
"enable_failover": True,
}
# Load configuration if file exists
if config_file and config_file.exists():
self.load_config()
logger.info("Provider configuration manager initialized")
def get_provider_settings(
self, provider_name: str
) -> Optional[ProviderSettings]:
"""Get settings for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
Provider settings or None if not configured.
"""
return self._provider_settings.get(provider_name)
def set_provider_settings(
self, provider_name: str, settings: ProviderSettings
) -> None:
"""Set settings for a specific provider.
Args:
provider_name: Name of the provider.
settings: Provider settings to apply.
"""
self._provider_settings[provider_name] = settings
logger.info("Updated settings for provider: %s", provider_name)
def update_provider_settings(
self, provider_name: str, **kwargs
) -> bool:
"""Update specific provider settings.
Args:
provider_name: Name of the provider.
**kwargs: Settings to update.
Returns:
True if updated, False if provider not found.
"""
if provider_name not in self._provider_settings:
# Create new settings
self._provider_settings[provider_name] = ProviderSettings(
name=provider_name, **kwargs
)
logger.info("Created new settings for provider: %s", provider_name) # noqa: E501
return True
settings = self._provider_settings[provider_name]
# Update settings
for key, value in kwargs.items():
if hasattr(settings, key):
setattr(settings, key, value)
logger.info(
f"Updated settings for provider {provider_name}: {kwargs}"
)
return True
def get_all_provider_settings(self) -> Dict[str, ProviderSettings]:
"""Get settings for all configured providers.
Returns:
Dictionary mapping provider names to their settings.
"""
return self._provider_settings.copy()
def get_enabled_providers(self) -> List[str]:
"""Get list of enabled providers.
Returns:
List of enabled provider names.
"""
return [
name
for name, settings in self._provider_settings.items()
if settings.enabled
]
def enable_provider(self, provider_name: str) -> bool:
"""Enable a provider.
Args:
provider_name: Name of the provider.
Returns:
True if enabled, False if not found.
"""
if provider_name in self._provider_settings:
self._provider_settings[provider_name].enabled = True
logger.info("Enabled provider: %s", provider_name)
return True
return False
def disable_provider(self, provider_name: str) -> bool:
"""Disable a provider.
Args:
provider_name: Name of the provider.
Returns:
True if disabled, False if not found.
"""
if provider_name in self._provider_settings:
self._provider_settings[provider_name].enabled = False
logger.info("Disabled provider: %s", provider_name)
return True
return False
def set_provider_priority(
self, provider_name: str, priority: int
) -> bool:
"""Set priority for a provider.
Lower priority values = higher priority.
Args:
provider_name: Name of the provider.
priority: Priority value (lower = higher priority).
Returns:
True if updated, False if not found.
"""
if provider_name in self._provider_settings:
self._provider_settings[provider_name].priority = priority
logger.info(
f"Set priority for {provider_name} to {priority}"
)
return True
return False
def get_providers_by_priority(self) -> List[str]:
"""Get providers sorted by priority.
Returns:
List of provider names sorted by priority (low to high).
"""
sorted_providers = sorted(
self._provider_settings.items(),
key=lambda x: x[1].priority,
)
return [name for name, _ in sorted_providers]
def get_global_setting(self, key: str) -> Optional[Any]:
"""Get a global setting value.
Args:
key: Setting key.
Returns:
Setting value or None if not found.
"""
return self._global_settings.get(key)
def set_global_setting(self, key: str, value: Any) -> None:
"""Set a global setting value.
Args:
key: Setting key.
value: Setting value.
"""
self._global_settings[key] = value
logger.info("Updated global setting %s: %s", key, value)
def get_all_global_settings(self) -> Dict[str, Any]:
"""Get all global settings.
Returns:
Dictionary of global settings.
"""
return self._global_settings.copy()
def load_config(self, file_path: Optional[Path] = None) -> bool:
"""Load configuration from file.
Args:
file_path: Path to configuration file (uses default if None).
Returns:
True if loaded successfully, False otherwise.
"""
config_path = file_path or self._config_file
if not config_path or not config_path.exists():
logger.warning(
f"Configuration file not found: {config_path}"
)
return False
try:
with open(config_path, "r", encoding="utf-8") as f:
data = json.load(f)
# Load provider settings
if "providers" in data:
for name, settings_data in data["providers"].items():
self._provider_settings[name] = (
ProviderSettings.from_dict(settings_data)
)
# Load global settings
if "global" in data:
self._global_settings.update(data["global"])
logger.info(
f"Loaded configuration from {config_path} "
f"({len(self._provider_settings)} providers)"
)
return True
except Exception as e:
logger.error(
f"Failed to load configuration from {config_path}: {e}",
exc_info=True,
)
return False
def save_config(self, file_path: Optional[Path] = None) -> bool:
"""Save configuration to file.
Args:
file_path: Path to save to (uses default if None).
Returns:
True if saved successfully, False otherwise.
"""
config_path = file_path or self._config_file
if not config_path:
logger.error("No configuration file path specified")
return False
try:
# Ensure parent directory exists
config_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"providers": {
name: settings.to_dict()
for name, settings in self._provider_settings.items()
},
"global": self._global_settings,
}
with open(config_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
logger.info("Saved configuration to %s", config_path)
return True
except Exception as e:
logger.error(
f"Failed to save configuration to {config_path}: {e}",
exc_info=True,
)
return False
def reset_to_defaults(self) -> None:
"""Reset all settings to defaults."""
self._provider_settings.clear()
self._global_settings = {
"default_timeout": 30,
"default_max_retries": 3,
"default_retry_delay": 1.0,
"enable_health_monitoring": True,
"enable_failover": True,
}
logger.info("Reset configuration to defaults")
# Global configuration manager instance
_config_manager: Optional[ProviderConfigManager] = None
def get_config_manager(
config_file: Optional[Path] = None,
) -> ProviderConfigManager:
"""Get or create global provider configuration manager.
Args:
config_file: Configuration file path (used on first call).
Returns:
Global ProviderConfigManager instance.
"""
global _config_manager
if _config_manager is None:
_config_manager = ProviderConfigManager(config_file=config_file)
return _config_manager

View File

@@ -0,0 +1,998 @@
"""
Enhanced AniWorld Loader with Error Handling and Recovery
This module extends the original AniWorldLoader with comprehensive
error handling, retry mechanisms, and recovery strategies.
"""
import html
import json
import logging
import os
import re
import shutil
from pathlib import Path
from typing import Any, Callable, Dict, Optional
from urllib.parse import quote
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from yt_dlp import YoutubeDL
from ...infrastructure.security.file_integrity import get_integrity_manager
from ..error_handler import (
DownloadError,
NetworkError,
NonRetryableError,
RetryableError,
file_corruption_detector,
recovery_strategies,
with_error_recovery,
)
from ..interfaces.providers import Providers
from .base_provider import Loader
from .provider_config import (
ANIWORLD_HEADERS,
DEFAULT_PROVIDERS,
INVALID_PATH_CHARS,
LULUVDO_USER_AGENT,
ProviderType,
)
def _cleanup_temp_file(
temp_path: str,
logger: Optional[logging.Logger] = None,
) -> None:
"""Remove a temp file and any associated yt-dlp partial files.
Args:
temp_path: Path to the primary temp file.
logger: Optional logger for diagnostic messages.
"""
_log = logger or logging.getLogger(__name__)
candidates = [temp_path]
# yt-dlp creates fragment files like <file>.part
candidates.extend(
str(p) for p in Path(temp_path).parent.glob(
Path(temp_path).name + ".*"
)
)
for path in candidates:
if os.path.exists(path):
try:
os.remove(path)
_log.debug(f"Removed temp file: {path}")
except OSError as exc:
_log.warning(f"Failed to remove temp file {path}: {exc}")
class EnhancedAniWorldLoader(Loader):
"""Aniworld provider with retry and recovery strategies.
Also exposes metrics hooks for download statistics.
"""
def __init__(self) -> None:
super().__init__()
self.logger = logging.getLogger(__name__)
self.SUPPORTED_PROVIDERS = DEFAULT_PROVIDERS
# local copy so modifications don't mutate shared constant
self.AniworldHeaders = dict(ANIWORLD_HEADERS)
self.INVALID_PATH_CHARS = INVALID_PATH_CHARS
self.RANDOM_USER_AGENT = UserAgent().random
self.LULUVDO_USER_AGENT = LULUVDO_USER_AGENT
self.PROVIDER_HEADERS = {
ProviderType.VIDMOLY.value: ['Referer: "https://vidmoly.to"'],
ProviderType.DOODSTREAM.value: [
'Referer: "https://dood.li/"',
'Referer: "https://playmogo.com/"',
],
ProviderType.VOE.value: [f'User-Agent: {self.RANDOM_USER_AGENT}'],
ProviderType.LULUVDO.value: [
f'User-Agent: {self.LULUVDO_USER_AGENT}',
"Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
'Origin: "https://luluvdo.com"',
'Referer: "https://luluvdo.com/"',
],
}
self.ANIWORLD_TO = "https://aniworld.to"
self.DEFAULT_REQUEST_TIMEOUT = 30
# Initialize session with enhanced retry configuration
self.session = self._create_robust_session()
# Cache dictionaries
self._KeyHTMLDict = {}
self._EpisodeHTMLDict = {}
# Provider manager
self.Providers = Providers()
# Download statistics
self.download_stats = {
'total_downloads': 0,
'successful_downloads': 0,
'failed_downloads': 0,
'retried_downloads': 0
}
# Read timeout from environment variable (string->int safely)
self.download_timeout = int(os.getenv("DOWNLOAD_TIMEOUT") or "600")
# Setup logging
self._setup_logging()
def _create_robust_session(self) -> requests.Session:
"""Create a session with robust retry and error handling
configuration.
"""
session = requests.Session()
# Configure retries so transient network problems are retried while we
# still fail fast on permanent errors. The status codes cover
# timeouts, rate limits, and the Cloudflare-origin 52x responses that
# AniWorld occasionally emits under load.
retries = Retry(
total=5,
backoff_factor=2, # More aggressive backoff
status_forcelist=[
408,
429,
500,
502,
503,
504,
520,
521,
522,
523,
524,
],
allowed_methods=["GET", "POST", "HEAD"],
raise_on_status=False, # Handle status errors manually
)
adapter = HTTPAdapter(
max_retries=retries,
pool_connections=10,
pool_maxsize=20,
pool_block=True
)
session.mount("https://", adapter)
session.mount("http://", adapter)
# Set default headers
session.headers.update(self.AniworldHeaders)
return session
def _setup_logging(self):
"""Setup specialized logging for download errors and missing keys."""
# Determine project root so log files land in a predictable location
# regardless of the working directory at runtime.
_project_root = Path(__file__).parent.parent.parent.parent
_logs_dir = _project_root / "logs"
_logs_dir.mkdir(parents=True, exist_ok=True)
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Download error logger — records every failed download attempt
self.download_error_logger = logging.getLogger("DownloadErrors")
if not self.download_error_logger.handlers:
handler = logging.FileHandler(str(_logs_dir / "download_errors.log"))
handler.setLevel(logging.ERROR)
handler.setFormatter(formatter)
self.download_error_logger.addHandler(handler)
self.download_error_logger.setLevel(logging.ERROR)
# No-key logger — records episodes for which no stream key was found
self.nokey_logger = logging.getLogger("NoKeyFound")
if not self.nokey_logger.handlers:
handler = logging.FileHandler(str(_logs_dir / "no_key_found.log"))
handler.setLevel(logging.ERROR)
handler.setFormatter(formatter)
self.nokey_logger.addHandler(handler)
self.nokey_logger.setLevel(logging.ERROR)
def ClearCache(self):
"""Clear all cached data."""
self._KeyHTMLDict.clear()
self._EpisodeHTMLDict.clear()
self.logger.debug("Cache cleared")
def RemoveFromCache(self):
"""Remove episode HTML cache."""
self._EpisodeHTMLDict.clear()
self.logger.debug("Episode cache cleared")
@with_error_recovery(max_retries=3, context="anime_search")
def Search(self, word: str) -> list:
"""Search for anime with error handling."""
if not word or not word.strip():
raise ValueError("Search term cannot be empty")
search_url = (
f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
)
try:
return self._fetch_anime_list_with_recovery(search_url)
except Exception as e:
self.logger.error("Search failed for term '%s': %s", word, e)
raise RetryableError(f"Search failed: {e}") from e
def _fetch_anime_list_with_recovery(self, url: str) -> list:
"""Fetch anime list with comprehensive error handling."""
try:
response = recovery_strategies.handle_network_failure(
self.session.get,
url,
timeout=self.DEFAULT_REQUEST_TIMEOUT
)
if not response.ok:
if response.status_code == 404:
raise NonRetryableError(f"URL not found: {url}")
elif response.status_code == 403:
raise NonRetryableError(f"Access forbidden: {url}")
elif response.status_code >= 500:
# Log suspicious server errors for monitoring
self.logger.warning(
f"Server error {response.status_code} from {url} "
f"- will retry"
)
raise RetryableError(f"Server error {response.status_code}")
else:
raise RetryableError(f"HTTP error {response.status_code}")
return self._parse_anime_response(response.text)
except (requests.RequestException, ConnectionError) as e:
raise NetworkError(f"Network error during anime search: {e}") from e
def _parse_anime_response(self, response_text: str) -> list:
"""Parse anime search response with error handling."""
if not response_text or not response_text.strip():
raise ValueError("Empty response from server")
clean_text = response_text.strip()
# Quick fail for obviously non-JSON responses
if not (clean_text.startswith('[') or clean_text.startswith('{')):
# Check if it's HTML error page
if clean_text.lower().startswith('<!doctype') or \
clean_text.lower().startswith('<html'):
raise ValueError("Received HTML instead of JSON")
# If doesn't start with JSON markers, likely not JSON
self.logger.warning(
"Response doesn't start with JSON markers, "
"attempting parse anyway"
)
# Attempt increasingly permissive parsing strategies to cope with
# upstream anomalies such as HTML escaping, stray BOM markers, and
# injected control characters.
parsing_strategies = [
lambda text: json.loads(html.unescape(text)),
lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
lambda text: json.loads(re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text))
]
for i, strategy in enumerate(parsing_strategies):
try:
decoded_data = strategy(clean_text)
if isinstance(decoded_data, list):
msg = (
f"Successfully parsed anime response with "
f"strategy {i + 1}"
)
self.logger.debug(msg)
return decoded_data
else:
msg = (
f"Strategy {i + 1} returned non-list data: "
f"{type(decoded_data)}"
)
self.logger.warning(msg)
except json.JSONDecodeError as e:
msg = f"Parsing strategy {i + 1} failed: {e}"
self.logger.debug(msg)
continue
raise ValueError(
"Could not parse anime search response with any strategy"
)
def _GetLanguageKey(self, language: str) -> int:
"""Get numeric language code."""
language_map = {
"German Dub": 1,
"English Sub": 2,
"German Sub": 3,
}
return language_map.get(language, 0)
@with_error_recovery(max_retries=2, context="language_check")
def IsLanguage(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
) -> bool:
"""Check if episode is available in specified language."""
try:
languageCode = self._GetLanguageKey(language)
if languageCode == 0:
raise ValueError(f"Unknown language: {language}")
episode_response = self._GetEpisodeHTML(season, episode, key)
soup = BeautifulSoup(episode_response.content, "html.parser")
lang_box = soup.find("div", class_="changeLanguageBox")
if not lang_box:
debug_msg = (
f"No language box found for {key} S{season}E{episode}"
)
self.logger.debug(debug_msg)
return False
img_tags = lang_box.find_all("img")
available_languages = []
for img in img_tags:
lang_key = img.get("data-lang-key")
if lang_key and lang_key.isdigit():
available_languages.append(int(lang_key))
is_available = languageCode in available_languages
debug_msg = (
f"Language check for {key} S{season}E{episode}: "
f"Requested={languageCode}, "
f"Available={available_languages}, "
f"Result={is_available}"
)
self.logger.debug(debug_msg)
return is_available
except Exception as e:
error_msg = (
f"Language check failed for {key} S{season}E{episode}: {e}"
)
self.logger.error(error_msg)
raise RetryableError(f"Language check failed: {e}") from e
def Download(
self,
baseDirectory: str,
serieFolder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub",
progress_callback: Optional[Callable] = None,
) -> bool:
"""Download episode with comprehensive error handling.
Args:
baseDirectory: Base download directory path
serieFolder: Filesystem folder name (metadata only, used for
file path construction)
season: Season number (0 for movies)
episode: Episode number
key: Series unique identifier from provider (used for
identification and API calls)
language: Audio language preference (default: German Dub)
progress_callback: Optional callback for download progress
updates
Returns:
bool: True if download succeeded, False otherwise
Raises:
DownloadError: If download fails after all retry attempts
ValueError: If required parameters are missing or invalid
"""
self.download_stats["total_downloads"] += 1
try:
# Validate inputs
if not all([baseDirectory, serieFolder, key]):
raise ValueError("Missing required parameters for download")
if season < 0 or episode < 0:
raise ValueError("Season and episode must be non-negative")
# Prepare file paths
sanitized_anime_title = "".join(
char
for char in self.GetTitle(key)
if char not in self.INVALID_PATH_CHARS
)
if not sanitized_anime_title:
sanitized_anime_title = f"Unknown_{key}"
# Generate output filename
if season == 0:
output_file = (
f"{sanitized_anime_title} - Movie {episode:02} - "
f"({language}).mp4"
)
else:
output_file = (
f"{sanitized_anime_title} - S{season:02}E{episode:03} - "
f"({language}).mp4"
)
# Create directory structure
folder_path = os.path.join(
baseDirectory, serieFolder, f"Season {season}"
)
output_path = os.path.join(folder_path, output_file)
# Check if file already exists and is valid
if os.path.exists(output_path):
is_valid = file_corruption_detector.is_valid_video_file(
output_path
)
# Also verify checksum if available
integrity_mgr = get_integrity_manager()
checksum_valid = True
if integrity_mgr.has_checksum(Path(output_path)):
checksum_valid = integrity_mgr.verify_checksum(
Path(output_path)
)
if not checksum_valid:
self.logger.warning(
f"Checksum verification failed for {output_file}"
)
if is_valid and checksum_valid:
msg = (
f"File already exists and is valid: "
f"{output_file}"
)
self.logger.info(msg)
self.download_stats["successful_downloads"] += 1
return True
else:
warning_msg = (
f"Existing file appears corrupted, removing: "
f"{output_path}"
)
self.logger.warning(warning_msg)
try:
os.remove(output_path)
# Remove checksum entry
integrity_mgr.remove_checksum(Path(output_path))
except OSError as e:
error_msg = f"Failed to remove corrupted file: {e}"
self.logger.error(error_msg)
os.makedirs(folder_path, exist_ok=True)
# Create temp directory
temp_dir = "./Temp/"
os.makedirs(temp_dir, exist_ok=True)
temp_path = os.path.join(temp_dir, output_file)
# Attempt download with recovery strategies
success = self._download_with_recovery(
season,
episode,
key,
language,
temp_path,
output_path,
progress_callback,
)
if success:
self.download_stats["successful_downloads"] += 1
success_msg = f"Successfully downloaded: {output_file}"
self.logger.info(success_msg)
else:
self.download_stats["failed_downloads"] += 1
fail_msg = (
f"Download failed for {key} S{season}E{episode} "
f"({language})"
)
self.download_error_logger.error(fail_msg)
return success
except Exception as e:
self.download_stats["failed_downloads"] += 1
err_msg = (
f"Download error for {key} S{season}E{episode}: {e}"
)
self.download_error_logger.error(err_msg, exc_info=True)
raise DownloadError(f"Download failed: {e}") from e
finally:
self.ClearCache()
def _download_with_recovery(
self,
season: int,
episode: int,
key: str,
language: str,
temp_path: str,
output_path: str,
progress_callback: Optional[Callable],
) -> bool:
"""Attempt download with multiple providers and recovery."""
for provider_name in self.SUPPORTED_PROVIDERS:
try:
info_msg = (
f"Attempting download with provider: {provider_name}"
)
self.logger.info(info_msg)
# Get download link and headers for provider
link, headers = recovery_strategies.handle_network_failure(
self._get_direct_link_from_provider,
season,
episode,
key,
language,
)
if not link:
warn_msg = (
f"No download link found for provider: "
f"{provider_name}"
)
self.logger.warning(warn_msg)
continue
# Configure yt-dlp options
ydl_opts = {
"fragment_retries": float("inf"),
"outtmpl": temp_path,
"quiet": True,
"no_warnings": True,
"progress_with_newline": False,
"nocheckcertificate": True,
"socket_timeout": self.download_timeout,
"http_chunk_size": 1024 * 1024, # 1MB chunks
"logger": self.logger,
# Use ffmpeg for HLS streams and transport stream format
"downloader": "ffmpeg",
"hls_use_mpegts": True,
}
if headers:
ydl_opts['http_headers'] = headers
if progress_callback:
ydl_opts['progress_hooks'] = [progress_callback]
# Perform download with recovery
success = recovery_strategies.handle_download_failure(
self._perform_ytdl_download,
temp_path,
ydl_opts,
link
)
if success and os.path.exists(temp_path):
# Verify downloaded file
if file_corruption_detector.is_valid_video_file(temp_path):
# Move to final location
# Use copyfile instead of copy2 to avoid metadata permission issues
shutil.copyfile(temp_path, output_path)
# Calculate and store checksum for integrity
integrity_mgr = get_integrity_manager()
try:
checksum = integrity_mgr.store_checksum(
Path(output_path)
)
filename = Path(output_path).name
self.logger.info(
f"Stored checksum for {filename}: "
f"{checksum[:16]}..."
)
except Exception as e:
self.logger.warning(
f"Failed to store checksum: {e}"
)
# Clean up temp file
try:
os.remove(temp_path)
except Exception as e:
warn_msg = f"Failed to remove temp file: {e}"
self.logger.warning(warn_msg)
return True
else:
warn_msg = (
f"Downloaded file failed validation: "
f"{temp_path}"
)
self.logger.warning(warn_msg)
try:
os.remove(temp_path)
except OSError as e:
warn_msg = f"Failed to remove temp file: {e}"
self.logger.warning(warn_msg)
except Exception as e:
self.logger.warning("Provider %s failed: %s", provider_name, e)
# Clean up any partial temp files left by this failed attempt
_cleanup_temp_file(temp_path, self.logger)
self.download_stats['retried_downloads'] += 1
continue
# All providers failed make sure no temp remnants are left behind
_cleanup_temp_file(temp_path, self.logger)
return False
def _perform_ytdl_download(
self, ydl_opts: Dict[str, Any], link: str
) -> bool:
"""Perform actual download using yt-dlp."""
try:
with YoutubeDL(ydl_opts) as ydl:
ydl.download([link])
return True
except Exception as e:
self.logger.error("yt-dlp download failed: %s", e)
raise DownloadError(f"Download failed: {e}") from e
@with_error_recovery(max_retries=2, context="get_title")
def GetTitle(self, key: str) -> str:
"""Get anime title with error handling."""
try:
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
title_div = soup.find('div', class_='series-title')
if title_div:
title_span = title_div.find('h1')
if title_span:
span = title_span.find('span')
if span:
return span.text.strip()
self.logger.warning("Could not extract title for key: %s", key)
return f"Unknown_Title_{key}"
except Exception as e:
self.logger.error("Failed to get title for key %s: %s", key, e)
raise RetryableError(f"Title extraction failed: {e}") from e
def GetSiteKey(self) -> str:
"""Get site identifier."""
return "aniworld.to"
@with_error_recovery(max_retries=2, context="get_key_html")
def _GetKeyHTML(self, key: str):
"""Get cached HTML for anime key."""
if key in self._KeyHTMLDict:
return self._KeyHTMLDict[key]
try:
url = f"{self.ANIWORLD_TO}/anime/stream/{key}"
response = recovery_strategies.handle_network_failure(
self.session.get,
url,
timeout=self.DEFAULT_REQUEST_TIMEOUT
)
if not response.ok:
if response.status_code == 404:
msg = f"Anime key not found: {key}"
self.nokey_logger.error(msg)
raise NonRetryableError(msg)
else:
err_msg = (
f"HTTP error {response.status_code} for key {key}"
)
raise RetryableError(err_msg)
self._KeyHTMLDict[key] = response
return self._KeyHTMLDict[key]
except Exception as e:
error_msg = f"Failed to get HTML for key {key}: {e}"
self.logger.error(error_msg)
raise
@with_error_recovery(max_retries=2, context="get_episode_html")
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
"""Get cached HTML for specific episode.
Args:
season: Season number (must be 1-999)
episode: Episode number (must be 1-9999)
key: Series identifier (should be non-empty)
Returns:
Cached or fetched HTML response
Raises:
ValueError: If parameters are invalid
NonRetryableError: If episode not found (404)
RetryableError: If HTTP error occurs
"""
# Validate parameters
if not key or not key.strip():
raise ValueError("Series key cannot be empty")
if season < 1 or season > 999:
raise ValueError(
f"Invalid season number: {season} (must be 1-999)"
)
if episode < 1 or episode > 9999:
raise ValueError(
f"Invalid episode number: {episode} (must be 1-9999)"
)
cache_key = (key, season, episode)
if cache_key in self._EpisodeHTMLDict:
return self._EpisodeHTMLDict[cache_key]
try:
url = (
f"{self.ANIWORLD_TO}/anime/stream/{key}/"
f"staffel-{season}/episode-{episode}"
)
response = recovery_strategies.handle_network_failure(
self.session.get, url, timeout=self.DEFAULT_REQUEST_TIMEOUT
)
if not response.ok:
if response.status_code == 404:
err_msg = (
f"Episode not found: {key} S{season}E{episode}"
)
raise NonRetryableError(err_msg)
else:
err_msg = (
f"HTTP error {response.status_code} for episode"
)
raise RetryableError(err_msg)
self._EpisodeHTMLDict[cache_key] = response
return self._EpisodeHTMLDict[cache_key]
except Exception as e:
error_msg = (
f"Failed to get episode HTML for {key} "
f"S{season}E{episode}: {e}"
)
self.logger.error(error_msg)
raise
def _get_provider_from_html(
self, season: int, episode: int, key: str
) -> dict:
"""Extract providers from HTML with error handling."""
try:
episode_html = self._GetEpisodeHTML(season, episode, key)
soup = BeautifulSoup(episode_html.content, "html.parser")
providers: dict[str, dict] = {}
episode_links = soup.find_all(
"li", class_=lambda x: x and x.startswith("episodeLink")
)
if not episode_links:
warn_msg = (
f"No episode links found for {key} S{season}E{episode}"
)
self.logger.warning(warn_msg)
return providers
for link in episode_links:
provider_name_tag = link.find("h4")
provider_name = (
provider_name_tag.text.strip()
if provider_name_tag
else None
)
redirect_link_tag = link.find("a", class_="watchEpisode")
redirect_link = (
redirect_link_tag["href"]
if redirect_link_tag
else None
)
lang_key = link.get("data-lang-key")
lang_key = (
int(lang_key)
if lang_key and lang_key.isdigit()
else None
)
if provider_name and redirect_link and lang_key:
if provider_name not in providers:
providers[provider_name] = {}
providers[provider_name][lang_key] = (
f"{self.ANIWORLD_TO}{redirect_link}"
)
debug_msg = (
f"Found {len(providers)} providers for "
f"{key} S{season}E{episode}"
)
self.logger.debug(debug_msg)
return providers
except Exception as e:
error_msg = f"Failed to parse providers from HTML: {e}"
self.logger.error(error_msg)
raise RetryableError(f"Provider parsing failed: {e}") from e
def _get_redirect_link(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
):
"""Get redirect link for episode with error handling."""
languageCode = self._GetLanguageKey(language)
if not self.IsLanguage(season, episode, key, language):
err_msg = (
f"Language {language} not available for "
f"{key} S{season}E{episode}"
)
raise NonRetryableError(err_msg)
providers = self._get_provider_from_html(season, episode, key)
for provider_name, lang_dict in providers.items():
if languageCode in lang_dict:
return lang_dict[languageCode], provider_name
err_msg = (
f"No provider found for {language} in "
f"{key} S{season}E{episode}"
)
raise NonRetryableError(err_msg)
def _get_embeded_link(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
):
"""Get embedded link with error handling."""
try:
redirect_link, provider_name = self._get_redirect_link(
season, episode, key, language
)
response = recovery_strategies.handle_network_failure(
self.session.get,
redirect_link,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
headers={"User-Agent": self.RANDOM_USER_AGENT},
)
return response.url
except Exception as e:
error_msg = f"Failed to get embedded link: {e}"
self.logger.error(error_msg)
raise
def _get_direct_link_from_provider(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
):
"""Get direct download link from provider."""
try:
embedded_link = self._get_embeded_link(
season, episode, key, language
)
if not embedded_link:
raise NonRetryableError("No embedded link found")
# Use VOE provider as default (could be made configurable)
provider = self.Providers.GetProvider("VOE")
if not provider:
raise NonRetryableError("VOE provider not available")
return provider.get_link(
embedded_link, self.DEFAULT_REQUEST_TIMEOUT
)
except Exception as e:
error_msg = f"Failed to get direct link from provider: {e}"
self.logger.error(error_msg)
raise
@with_error_recovery(max_retries=2, context="get_season_episode_count")
def get_season_episode_count(self, slug: str) -> dict:
"""Get episode count per season with error handling."""
try:
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"
response = recovery_strategies.handle_network_failure(
requests.get,
base_url,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
)
soup = BeautifulSoup(response.content, "html.parser")
season_meta = soup.find("meta", itemprop="numberOfSeasons")
number_of_seasons = (
int(season_meta["content"]) if season_meta else 0
)
episode_counts = {}
for season in range(1, number_of_seasons + 1):
season_url = f"{base_url}staffel-{season}"
season_response = (
recovery_strategies.handle_network_failure(
requests.get,
season_url,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
)
)
season_soup = BeautifulSoup(
season_response.content, "html.parser"
)
episode_links = season_soup.find_all("a", href=True)
unique_links = set(
link["href"]
for link in episode_links
if f"staffel-{season}/episode-" in link['href']
)
episode_counts[season] = len(unique_links)
return episode_counts
except Exception as e:
self.logger.error("Failed to get episode counts for %s: %s", slug, e)
raise RetryableError(f"Episode count retrieval failed: {e}") from e
def get_download_statistics(self) -> Dict[str, Any]:
"""Get download statistics."""
stats = self.download_stats.copy()
stats['success_rate'] = (
(stats['successful_downloads'] / stats['total_downloads'] * 100)
if stats['total_downloads'] > 0 else 0
)
return stats
def reset_statistics(self):
"""Reset download statistics."""
self.download_stats = {
'total_downloads': 0,
'successful_downloads': 0,
'failed_downloads': 0,
'retried_downloads': 0
}
# For backward compatibility, create wrapper that uses enhanced loader
class AniworldLoader(EnhancedAniWorldLoader):
"""Backward compatibility wrapper for the enhanced loader."""
pass

View File

@@ -0,0 +1,325 @@
"""Provider failover system for automatic fallback on failures.
This module implements automatic failover between multiple providers,
ensuring high availability by switching to backup providers when the
primary fails.
"""
import asyncio
import logging
from typing import Any, Callable, Dict, List, Optional, TypeVar
from src.server.providers.health_monitor import get_health_monitor
from src.server.providers.provider_config import DEFAULT_PROVIDERS
logger = logging.getLogger(__name__)
T = TypeVar("T")
class ProviderFailover:
"""Manages automatic failover between multiple providers."""
def __init__(
self,
providers: Optional[List[str]] = None,
max_retries: int = 3,
retry_delay: float = 1.0,
enable_health_monitoring: bool = True,
):
"""Initialize provider failover manager.
Args:
providers: List of provider names to use (default: all).
max_retries: Maximum retry attempts per provider.
retry_delay: Delay between retries in seconds.
enable_health_monitoring: Whether to use health monitoring.
"""
self._providers = providers or DEFAULT_PROVIDERS.copy()
self._max_retries = max_retries
self._retry_delay = retry_delay
self._enable_health_monitoring = enable_health_monitoring
# Current provider index
self._current_index = 0
# Health monitor
self._health_monitor = (
get_health_monitor() if enable_health_monitoring else None
)
logger.info(
f"Provider failover initialized with "
f"{len(self._providers)} providers"
)
def get_current_provider(self) -> str:
"""Get the current active provider.
Returns:
Name of current provider.
"""
if self._enable_health_monitoring and self._health_monitor:
# Try to get best available provider
best = self._health_monitor.get_best_provider()
if best and best in self._providers:
return best
# Fall back to round-robin selection
return self._providers[self._current_index % len(self._providers)]
def get_next_provider(self) -> Optional[str]:
"""Get the next provider in the failover chain.
Returns:
Name of next provider or None if none available.
"""
if self._enable_health_monitoring and self._health_monitor:
# Get available providers
available = [
p
for p in self._providers
if p in self._health_monitor.get_available_providers()
]
if not available:
logger.warning("No available providers for failover")
return None
# Find next available provider
current = self.get_current_provider()
try:
current_idx = available.index(current)
next_idx = (current_idx + 1) % len(available)
return available[next_idx]
except ValueError:
# Current provider not in available list
return available[0]
# Fall back to simple rotation
self._current_index = (self._current_index + 1) % len(
self._providers
)
return self._providers[self._current_index]
async def execute_with_failover(
self,
operation: Callable[[str], Any],
operation_name: str = "operation",
**kwargs,
) -> Any:
"""Execute an operation with automatic failover.
Args:
operation: Async callable that takes provider name.
operation_name: Name for logging purposes.
**kwargs: Additional arguments to pass to operation.
Returns:
Result from successful operation.
Raises:
Exception: If all providers fail.
"""
providers_tried = []
last_error = None
# Try each provider
for attempt in range(len(self._providers)):
provider = self.get_current_provider()
# Skip if already tried
if provider in providers_tried:
self.get_next_provider()
continue
providers_tried.append(provider)
# Try operation with retries
for retry in range(self._max_retries):
try:
logger.info(
f"Executing {operation_name} with provider "
f"{provider} (attempt {retry + 1}/{self._max_retries})" # noqa: E501
)
# Execute operation
import time
start_time = time.time()
result = await operation(provider, **kwargs)
elapsed_ms = (time.time() - start_time) * 1000
# Record success
if self._health_monitor:
self._health_monitor.record_request(
provider_name=provider,
success=True,
response_time_ms=elapsed_ms,
)
logger.info(
f"{operation_name} succeeded with provider "
f"{provider} in {elapsed_ms:.2f}ms"
)
return result
except Exception as e:
last_error = e
logger.warning(
f"{operation_name} failed with provider "
f"{provider} (attempt {retry + 1}): {e}"
)
# Record failure
if self._health_monitor:
import time
elapsed_ms = (time.time() - start_time) * 1000
self._health_monitor.record_request(
provider_name=provider,
success=False,
response_time_ms=elapsed_ms,
error_message=str(e),
)
# Retry with delay
if retry < self._max_retries - 1:
await asyncio.sleep(self._retry_delay)
# Try next provider
next_provider = self.get_next_provider()
if next_provider is None:
break
# All providers failed
error_msg = (
f"{operation_name} failed with all providers. "
f"Tried: {', '.join(providers_tried)}"
)
logger.error(error_msg)
raise Exception(error_msg) from last_error
def add_provider(self, provider_name: str) -> None:
"""Add a provider to the failover chain.
Args:
provider_name: Name of provider to add.
"""
if provider_name not in self._providers:
self._providers.append(provider_name)
logger.info("Added provider to failover chain: %s", provider_name)
def remove_provider(self, provider_name: str) -> bool:
"""Remove a provider from the failover chain.
Args:
provider_name: Name of provider to remove.
Returns:
True if removed, False if not found.
"""
if provider_name in self._providers:
self._providers.remove(provider_name)
logger.info(
f"Removed provider from failover chain: {provider_name}"
)
return True
return False
def get_providers(self) -> List[str]:
"""Get list of all providers in failover chain.
Returns:
List of provider names.
"""
return self._providers.copy()
def set_provider_priority(
self, provider_name: str, priority_index: int
) -> bool:
"""Set priority of a provider by moving it in the chain.
Args:
provider_name: Name of provider to prioritize.
priority_index: New index position (0 = highest priority).
Returns:
True if updated, False if provider not found.
"""
if provider_name not in self._providers:
return False
self._providers.remove(provider_name)
self._providers.insert(
min(priority_index, len(self._providers)), provider_name
)
logger.info(
f"Set provider {provider_name} priority to index {priority_index}"
)
return True
def get_failover_stats(self) -> Dict[str, Any]:
"""Get failover statistics and configuration.
Returns:
Dictionary with failover stats.
"""
stats = {
"total_providers": len(self._providers),
"providers": self._providers.copy(),
"current_provider": self.get_current_provider(),
"max_retries": self._max_retries,
"retry_delay": self._retry_delay,
"health_monitoring_enabled": self._enable_health_monitoring,
}
if self._health_monitor:
available = self._health_monitor.get_available_providers()
stats["available_providers"] = [
p for p in self._providers if p in available
]
stats["unavailable_providers"] = [
p for p in self._providers if p not in available
]
return stats
# Global failover instance
_failover: Optional[ProviderFailover] = None
def get_failover() -> ProviderFailover:
"""Get or create global provider failover instance.
Returns:
Global ProviderFailover instance.
"""
global _failover
if _failover is None:
_failover = ProviderFailover()
return _failover
def configure_failover(
providers: Optional[List[str]] = None,
max_retries: int = 3,
retry_delay: float = 1.0,
) -> ProviderFailover:
"""Configure global provider failover instance.
Args:
providers: List of provider names to use.
max_retries: Maximum retry attempts per provider.
retry_delay: Delay between retries in seconds.
Returns:
Configured ProviderFailover instance.
"""
global _failover
_failover = ProviderFailover(
providers=providers,
max_retries=max_retries,
retry_delay=retry_delay,
)
return _failover

View File

@@ -0,0 +1,416 @@
"""Provider health monitoring system for tracking availability and performance.
This module provides health monitoring capabilities for anime providers,
tracking metrics like availability, response times, success rates, and
bandwidth usage.
"""
import asyncio
import logging
from collections import defaultdict, deque
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Deque, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class ProviderHealthMetrics:
"""Health metrics for a single provider."""
provider_name: str
is_available: bool = True
last_check_time: Optional[datetime] = None
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
average_response_time_ms: float = 0.0
last_error: Optional[str] = None
last_error_time: Optional[datetime] = None
consecutive_failures: int = 0
total_bytes_downloaded: int = 0
uptime_percentage: float = 100.0
@property
def success_rate(self) -> float:
"""Calculate success rate as percentage."""
if self.total_requests == 0:
return 0.0
return (self.successful_requests / self.total_requests) * 100
@property
def failure_rate(self) -> float:
"""Calculate failure rate as percentage."""
return 100.0 - self.success_rate
def to_dict(self) -> Dict[str, Any]:
"""Convert metrics to dictionary."""
return {
"provider_name": self.provider_name,
"is_available": self.is_available,
"last_check_time": (
self.last_check_time.isoformat()
if self.last_check_time
else None
),
"total_requests": self.total_requests,
"successful_requests": self.successful_requests,
"failed_requests": self.failed_requests,
"success_rate": round(self.success_rate, 2),
"average_response_time_ms": round(
self.average_response_time_ms, 2
),
"last_error": self.last_error,
"last_error_time": (
self.last_error_time.isoformat()
if self.last_error_time
else None
),
"consecutive_failures": self.consecutive_failures,
"total_bytes_downloaded": self.total_bytes_downloaded,
"uptime_percentage": round(self.uptime_percentage, 2),
}
@dataclass
class RequestMetric:
"""Individual request metric."""
timestamp: datetime
success: bool
response_time_ms: float
bytes_transferred: int = 0
error_message: Optional[str] = None
class ProviderHealthMonitor:
"""Monitors health and performance of anime providers."""
def __init__(
self,
max_history_size: int = 1000,
health_check_interval: int = 300, # 5 minutes
failure_threshold: int = 3,
):
"""Initialize provider health monitor.
Args:
max_history_size: Maximum number of request metrics to keep
per provider.
health_check_interval: Interval between health checks in
seconds.
failure_threshold: Number of consecutive failures before
marking unavailable.
"""
self._max_history_size = max_history_size
self._health_check_interval = health_check_interval
self._failure_threshold = failure_threshold
# Provider metrics storage
self._metrics: Dict[str, ProviderHealthMetrics] = {}
self._request_history: Dict[str, Deque[RequestMetric]] = defaultdict(
lambda: deque(maxlen=max_history_size)
)
# Health check task
self._health_check_task: Optional[asyncio.Task] = None
self._is_running = False
logger.info("Provider health monitor initialized")
def start_monitoring(self) -> None:
"""Start background health monitoring."""
if self._is_running:
logger.warning("Health monitoring already running")
return
self._is_running = True
self._health_check_task = asyncio.create_task(
self._health_check_loop()
)
logger.info("Provider health monitoring started")
async def stop_monitoring(self) -> None:
"""Stop background health monitoring."""
self._is_running = False
if self._health_check_task:
self._health_check_task.cancel()
try:
await self._health_check_task
except asyncio.CancelledError:
pass
self._health_check_task = None
logger.info("Provider health monitoring stopped")
async def _health_check_loop(self) -> None:
"""Background health check loop."""
while self._is_running:
try:
await self._perform_health_checks()
await asyncio.sleep(self._health_check_interval)
except asyncio.CancelledError:
break
except Exception as e:
logger.exception("Error in health check loop: %s", e)
await asyncio.sleep(self._health_check_interval)
async def _perform_health_checks(self) -> None:
"""Perform health checks on all registered providers."""
for provider_name in list(self._metrics.keys()):
try:
metrics = self._metrics[provider_name]
metrics.last_check_time = datetime.now()
# Update uptime percentage based on recent history
recent_metrics = self._get_recent_metrics(
provider_name, minutes=60
)
if recent_metrics:
successful = sum(1 for m in recent_metrics if m.success)
metrics.uptime_percentage = (
successful / len(recent_metrics)
) * 100
logger.debug(
f"Health check for {provider_name}: "
f"available={metrics.is_available}, "
f"success_rate={metrics.success_rate:.2f}%"
)
except Exception as e:
logger.error(
f"Error checking health for {provider_name}: {e}",
exc_info=True,
)
def record_request(
self,
provider_name: str,
success: bool,
response_time_ms: float,
bytes_transferred: int = 0,
error_message: Optional[str] = None,
) -> None:
"""Record a provider request for health tracking.
Args:
provider_name: Name of the provider.
success: Whether the request was successful.
response_time_ms: Response time in milliseconds.
bytes_transferred: Number of bytes transferred.
error_message: Error message if request failed.
"""
# Initialize metrics if not exists
if provider_name not in self._metrics:
self._metrics[provider_name] = ProviderHealthMetrics(
provider_name=provider_name
)
metrics = self._metrics[provider_name]
# Update request counts
metrics.total_requests += 1
if success:
metrics.successful_requests += 1
metrics.consecutive_failures = 0
else:
metrics.failed_requests += 1
metrics.consecutive_failures += 1
metrics.last_error = error_message
metrics.last_error_time = datetime.now()
# Update availability based on consecutive failures
if metrics.consecutive_failures >= self._failure_threshold:
if metrics.is_available:
logger.warning(
f"Provider {provider_name} marked as unavailable after "
f"{metrics.consecutive_failures} consecutive failures"
)
metrics.is_available = False
else:
metrics.is_available = True
# Update average response time
total_time = metrics.average_response_time_ms * (
metrics.total_requests - 1
)
metrics.average_response_time_ms = (
total_time + response_time_ms
) / metrics.total_requests
# Update bytes transferred
metrics.total_bytes_downloaded += bytes_transferred
# Store request metric in history
request_metric = RequestMetric(
timestamp=datetime.now(),
success=success,
response_time_ms=response_time_ms,
bytes_transferred=bytes_transferred,
error_message=error_message,
)
self._request_history[provider_name].append(request_metric)
logger.debug(
f"Recorded request for {provider_name}: "
f"success={success}, time={response_time_ms:.2f}ms"
)
def get_provider_metrics(
self, provider_name: str
) -> Optional[ProviderHealthMetrics]:
"""Get health metrics for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
Provider health metrics or None if not found.
"""
return self._metrics.get(provider_name)
def get_all_metrics(self) -> Dict[str, ProviderHealthMetrics]:
"""Get health metrics for all providers.
Returns:
Dictionary mapping provider names to their metrics.
"""
return self._metrics.copy()
def get_available_providers(self) -> List[str]:
"""Get list of currently available providers.
Returns:
List of available provider names.
"""
return [
name
for name, metrics in self._metrics.items()
if metrics.is_available
]
def get_best_provider(self) -> Optional[str]:
"""Get the best performing available provider.
Best is determined by:
1. Availability
2. Success rate
3. Response time
Returns:
Name of best provider or None if none available.
"""
available = [
(name, metrics)
for name, metrics in self._metrics.items()
if metrics.is_available
]
if not available:
return None
# Sort by success rate (descending) then response time (ascending)
available.sort(
key=lambda x: (-x[1].success_rate, x[1].average_response_time_ms)
)
best_provider = available[0][0]
logger.debug("Best provider selected: %s", best_provider)
return best_provider
def _get_recent_metrics(
self, provider_name: str, minutes: int = 60
) -> List[RequestMetric]:
"""Get recent request metrics for a provider.
Args:
provider_name: Name of the provider.
minutes: Number of minutes to look back.
Returns:
List of recent request metrics.
"""
if provider_name not in self._request_history:
return []
cutoff_time = datetime.now() - timedelta(minutes=minutes)
return [
metric
for metric in self._request_history[provider_name]
if metric.timestamp >= cutoff_time
]
def reset_provider_metrics(self, provider_name: str) -> bool:
"""Reset metrics for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
True if reset successful, False if provider not found.
"""
if provider_name not in self._metrics:
return False
self._metrics[provider_name] = ProviderHealthMetrics(
provider_name=provider_name
)
self._request_history[provider_name].clear()
logger.info("Reset metrics for provider: %s", provider_name)
return True
def get_health_summary(self) -> Dict[str, Any]:
"""Get summary of overall provider health.
Returns:
Dictionary with health summary statistics.
"""
total_providers = len(self._metrics)
available_providers = len(self.get_available_providers())
if total_providers == 0:
return {
"total_providers": 0,
"available_providers": 0,
"availability_percentage": 0.0,
"average_success_rate": 0.0,
"average_response_time_ms": 0.0,
}
avg_success_rate = sum(
m.success_rate for m in self._metrics.values()
) / total_providers
avg_response_time = sum(
m.average_response_time_ms for m in self._metrics.values()
) / total_providers
return {
"total_providers": total_providers,
"available_providers": available_providers,
"availability_percentage": (
available_providers / total_providers
)
* 100,
"average_success_rate": round(avg_success_rate, 2),
"average_response_time_ms": round(avg_response_time, 2),
"providers": {
name: metrics.to_dict()
for name, metrics in self._metrics.items()
},
}
# Global health monitor instance
_health_monitor: Optional[ProviderHealthMonitor] = None
def get_health_monitor() -> ProviderHealthMonitor:
"""Get or create global provider health monitor instance.
Returns:
Global ProviderHealthMonitor instance.
"""
global _health_monitor
if _health_monitor is None:
_health_monitor = ProviderHealthMonitor()
return _health_monitor

View File

@@ -0,0 +1,307 @@
"""Performance monitoring wrapper for anime providers.
This module provides a wrapper that adds automatic performance tracking
to any provider implementation.
"""
import logging
import time
from typing import Any, Callable, Dict, List, Optional
from src.server.providers.base_provider import Loader
from src.server.providers.health_monitor import get_health_monitor
logger = logging.getLogger(__name__)
class MonitoredProviderWrapper(Loader):
"""Wrapper that adds performance monitoring to any provider."""
def __init__(
self,
provider: Loader,
enable_monitoring: bool = True,
):
"""Initialize monitored provider wrapper.
Args:
provider: Provider instance to wrap.
enable_monitoring: Whether to enable performance monitoring.
"""
self._provider = provider
self._enable_monitoring = enable_monitoring
self._health_monitor = (
get_health_monitor() if enable_monitoring else None
)
logger.info(
f"Monitoring wrapper initialized for provider: "
f"{provider.get_site_key()}"
)
def _record_operation(
self,
operation_name: str,
start_time: float,
success: bool,
bytes_transferred: int = 0,
error_message: Optional[str] = None,
) -> None:
"""Record operation metrics.
Args:
operation_name: Name of the operation.
start_time: Operation start time (from time.time()).
success: Whether operation succeeded.
bytes_transferred: Number of bytes transferred.
error_message: Error message if operation failed.
"""
if not self._enable_monitoring or not self._health_monitor:
return
elapsed_ms = (time.time() - start_time) * 1000
provider_name = self._provider.get_site_key()
self._health_monitor.record_request(
provider_name=provider_name,
success=success,
response_time_ms=elapsed_ms,
bytes_transferred=bytes_transferred,
error_message=error_message,
)
if success:
logger.debug(
f"{operation_name} succeeded for {provider_name} "
f"in {elapsed_ms:.2f}ms"
)
else:
logger.warning(
f"{operation_name} failed for {provider_name} "
f"in {elapsed_ms:.2f}ms: {error_message}"
)
def search(self, word: str) -> List[Dict[str, Any]]:
"""Search for anime series by name (with monitoring).
Args:
word: Search term to look for.
Returns:
List of found series as dictionaries.
"""
start_time = time.time()
try:
result = self._provider.search(word)
self._record_operation(
operation_name="search",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="search",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
def is_language(
self,
season: int,
episode: int,
key: str,
language: str = "German Dub",
) -> bool:
"""Check if episode exists in specified language (monitored).
Args:
season: Season number (1-indexed).
episode: Episode number (1-indexed).
key: Unique series identifier/key.
language: Language to check (default: German Dub).
Returns:
True if episode exists in specified language.
"""
start_time = time.time()
try:
result = self._provider.is_language(
season, episode, key, language
)
self._record_operation(
operation_name="is_language",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="is_language",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
def download(
self,
base_directory: str,
serie_folder: str,
season: int,
episode: int,
key: str,
language: str = "German Dub",
progress_callback: Optional[Callable[[str, Dict], None]] = None,
) -> bool:
"""Download episode to specified directory (with monitoring).
Args:
base_directory: Base directory for downloads.
serie_folder: Series folder name.
season: Season number.
episode: Episode number.
key: Unique series identifier/key.
language: Language version to download.
progress_callback: Optional callback for progress updates.
Returns:
True if download successful.
"""
start_time = time.time()
bytes_transferred = 0
# Wrap progress callback to track bytes
if progress_callback and self._enable_monitoring:
def monitored_callback(event_type: str, data: Dict) -> None:
nonlocal bytes_transferred
if event_type == "progress" and "downloaded" in data:
bytes_transferred = data.get("downloaded", 0)
progress_callback(event_type, data)
wrapped_callback = monitored_callback
else:
wrapped_callback = progress_callback
try:
result = self._provider.download(
base_directory=base_directory,
serie_folder=serie_folder,
season=season,
episode=episode,
key=key,
language=language,
progress_callback=wrapped_callback,
)
self._record_operation(
operation_name="download",
start_time=start_time,
success=result,
bytes_transferred=bytes_transferred,
)
return result
except Exception as e:
self._record_operation(
operation_name="download",
start_time=start_time,
success=False,
bytes_transferred=bytes_transferred,
error_message=str(e),
)
raise
def get_site_key(self) -> str:
"""Get the site key/identifier for this provider.
Returns:
Site key string.
"""
return self._provider.get_site_key()
def get_title(self, key: str) -> str:
"""Get the human-readable title of a series.
Args:
key: Unique series identifier/key.
Returns:
Series title string.
"""
start_time = time.time()
try:
result = self._provider.get_title(key)
self._record_operation(
operation_name="get_title",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="get_title",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
"""Get season and episode counts for a series.
Args:
slug: Series slug/key identifier.
Returns:
Dictionary mapping season number to episode count.
"""
start_time = time.time()
try:
result = self._provider.get_season_episode_count(slug)
self._record_operation(
operation_name="get_season_episode_count",
start_time=start_time,
success=True,
)
return result
except Exception as e:
self._record_operation(
operation_name="get_season_episode_count",
start_time=start_time,
success=False,
error_message=str(e),
)
raise
@property
def wrapped_provider(self) -> Loader:
"""Get the underlying provider instance.
Returns:
Wrapped provider instance.
"""
return self._provider
def wrap_provider(
provider: Loader,
enable_monitoring: bool = True,
) -> Loader:
"""Wrap a provider with performance monitoring.
Args:
provider: Provider to wrap.
enable_monitoring: Whether to enable monitoring.
Returns:
Monitored provider wrapper.
"""
if isinstance(provider, MonitoredProviderWrapper):
# Already wrapped
return provider
return MonitoredProviderWrapper(
provider=provider,
enable_monitoring=enable_monitoring,
)

View File

@@ -0,0 +1,79 @@
"""Shared provider configuration constants for AniWorld providers.
Centralizes user-agent strings, provider lists and common headers so
multiple provider implementations can import a single source of truth.
"""
from enum import Enum
from typing import Dict, List
class ProviderType(str, Enum):
"""Enumeration of supported video providers."""
VOE = "VOE"
DOODSTREAM = "Doodstream"
VIDMOLY = "Vidmoly"
VIDOZA = "Vidoza"
SPEEDFILES = "SpeedFiles"
STREAMTAPE = "Streamtape"
LULUVDO = "Luluvdo"
DEFAULT_PROVIDERS: List[str] = [
ProviderType.VOE.value,
ProviderType.DOODSTREAM.value,
ProviderType.VIDMOLY.value,
ProviderType.VIDOZA.value,
ProviderType.SPEEDFILES.value,
ProviderType.STREAMTAPE.value,
ProviderType.LULUVDO.value,
]
ANIWORLD_HEADERS: Dict[str, str] = {
"accept": (
"text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,image/apng,*/*;q=0.8"
),
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": (
"de,de-DE;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"
),
"cache-control": "max-age=0",
"priority": "u=0, i",
"sec-ch-ua": (
'"Chromium";v="136", "Microsoft Edge";v="136", '
'"Not.A/Brand";v="99"'
),
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
),
}
INVALID_PATH_CHARS: List[str] = [
"<",
">",
":",
'"',
"/",
"\\",
"|",
"?",
"*",
"&",
]
LULUVDO_USER_AGENT = (
"Mozilla/5.0 (Android 15; Mobile; rv:132.0) "
"Gecko/132.0 Firefox/132.0"
)
# Default download timeout (seconds)
DEFAULT_DOWNLOAD_TIMEOUT = 600

View File

@@ -0,0 +1,56 @@
"""Provider factory for managing anime content providers.
This module provides a factory class for accessing different anime content
providers (loaders). The factory uses provider identifiers (keys) to return
the appropriate provider instance.
Note: The 'key' parameter in this factory refers to the provider identifier
(e.g., 'aniworld.to'), not to be confused with series keys used within
providers to identify specific anime series.
"""
from typing import Dict
from .aniworld_provider import AniworldLoader
from .base_provider import Loader
class Loaders:
"""Factory class for managing and retrieving anime content providers.
This factory maintains a registry of available providers and provides
access to them via provider keys. Each provider implements the Loader
interface for searching and downloading anime content.
Attributes:
dict: Dictionary mapping provider keys to provider instances.
Provider keys are site identifiers (e.g., 'aniworld.to').
"""
def __init__(self) -> None:
"""Initialize the provider factory with available providers.
Currently supports:
- 'aniworld.to': AniworldLoader for aniworld.to content
"""
self.dict: Dict[str, Loader] = {"aniworld.to": AniworldLoader()}
def GetLoader(self, key: str) -> Loader:
"""Retrieve a provider instance by its provider key.
Args:
key: Provider identifier (e.g., 'aniworld.to').
This is the site/provider key, not a series key.
Returns:
Loader instance for the specified provider.
Raises:
KeyError: If the provider key is not found in the registry.
Note:
The 'key' parameter here identifies the provider/site, while
series-specific operations on the returned Loader use series
keys to identify individual anime series.
"""
return self.dict[key]

View File

@@ -0,0 +1,27 @@
from abc import ABC, abstractmethod
from typing import Any
class Provider(ABC):
"""Abstract base class for streaming providers."""
@abstractmethod
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict[str, Any]]:
"""
Extract direct download link from embedded player link.
Args:
embedded_link: URL of the embedded player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link: str, headers: dict)
- direct_link: Direct URL to download resource
- headers: Dictionary of HTTP headers to use for download
"""
raise NotImplementedError(
"Streaming providers must implement get_link"
)

View File

@@ -0,0 +1,139 @@
import base64
import json
import re
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from .Provider import Provider
# Precompile the different pattern matchers used during extraction:
# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
# script so we can follow the provider's hand-off.
# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
# field once decoded.
# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
# no direct MP4 link is present.
REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
B64_PATTERN = re.compile(r"var a168c='([^']+)'")
HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")
class VOE(Provider):
"""VOE video provider implementation."""
def __init__(self):
self.RANDOM_USER_AGENT = UserAgent().random
self.Header = {"User-Agent": self.RANDOM_USER_AGENT}
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict]:
"""
Extract direct download link from VOE embedded player.
Args:
embedded_link: URL of the embedded VOE player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link, headers)
"""
self.session = requests.Session()
# Configure retries with backoff
retries = Retry(
total=5, # Number of retries
backoff_factor=1, # Delay multiplier (1s, 2s, 4s, ...)
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET"],
)
adapter = HTTPAdapter(max_retries=retries)
self.session.mount("https://", adapter)
timeout = 30
response = self.session.get(
embedded_link,
headers={"User-Agent": self.RANDOM_USER_AGENT},
timeout=timeout,
)
redirect = re.search(r"https?://[^'\"<>]+", response.text)
if not redirect:
raise ValueError("No redirect found.")
redirect_url = redirect.group(0)
parts = redirect_url.strip().split("/")
self.Header["Referer"] = f"{parts[0]}//{parts[2]}/"
response = self.session.get(
redirect_url, headers={"User-Agent": self.RANDOM_USER_AGENT}
)
html = response.content
# Method 1: Extract from script tag
extracted = self.extract_voe_from_script(html)
if extracted:
return extracted, self.Header
# Method 2: Extract from base64 encoded variable
htmlText = html.decode("utf-8")
b64_match = B64_PATTERN.search(htmlText)
if b64_match:
decoded = base64.b64decode(b64_match.group(1)).decode()[::-1]
source = json.loads(decoded).get("source")
if source:
return source, self.Header
# Method 3: Extract HLS source
hls_match = HLS_PATTERN.search(htmlText)
if hls_match:
decoded_hls = base64.b64decode(hls_match.group("hls")).decode()
return decoded_hls, self.Header
raise ValueError("Could not extract download link from VOE")
def shift_letters(self, input_str: str) -> str:
"""Apply ROT13 shift to letters."""
result = ""
for c in input_str:
code = ord(c)
if 65 <= code <= 90:
code = (code - 65 + 13) % 26 + 65
elif 97 <= code <= 122:
code = (code - 97 + 13) % 26 + 97
result += chr(code)
return result
def replace_junk(self, input_str: str) -> str:
"""Replace junk character sequences."""
junk_parts = ["@$", "^^", "~@", "%?", "*~", "!!", "#&"]
for part in junk_parts:
input_str = re.sub(re.escape(part), "_", input_str)
return input_str
def shift_back(self, s: str, n: int) -> str:
"""Shift characters back by n positions."""
return "".join(chr(ord(c) - n) for c in s)
def decode_voe_string(self, encoded: str) -> dict:
"""Decode VOE-encoded string to extract video source."""
step1 = self.shift_letters(encoded)
step2 = self.replace_junk(step1).replace("_", "")
step3 = base64.b64decode(step2).decode()
step4 = self.shift_back(step3, 3)
step5 = base64.b64decode(step4[::-1]).decode()
return json.loads(step5)
def extract_voe_from_script(self, html: bytes) -> str:
"""Extract download link from VOE script tag."""
soup = BeautifulSoup(html, "html.parser")
script = soup.find("script", type="application/json")
return self.decode_voe_string(script.text[2:-2])["source"]

View File

@@ -8,7 +8,7 @@ from typing import Optional
import structlog
from src.core.SeriesApp import SeriesApp
from src.server.SeriesApp import SeriesApp
from src.server.services.progress_service import (
ProgressService,
ProgressType,
@@ -942,47 +942,16 @@ class AnimeService:
in-memory episodeDict, so downloaded episodes are not shown
as missing.
"""
from src.core.entities.series import Serie
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
async with get_db_session() as db:
anime_series_list = await AnimeSeriesService.get_all(
db, with_episodes=True
)
# Convert to Serie objects
series_list = []
for anime_series in anime_series_list:
# Build episode_dict from episodes relationship
# Only include episodes that are NOT downloaded (is_downloaded=False)
# so the missing-episode list stays accurate
episode_dict: dict[int, list[int]] = {}
if anime_series.episodes:
for episode in anime_series.episodes:
# Skip downloaded episodes — they are not missing
if episode.is_downloaded:
continue
season = episode.season
if season not in episode_dict:
episode_dict[season] = []
episode_dict[season].append(episode.episode_number)
# Sort episode numbers
for season in episode_dict:
episode_dict[season].sort()
serie = Serie(
key=anime_series.key,
name=anime_series.name,
site=anime_series.site,
folder=anime_series.folder,
episodeDict=episode_dict,
year=anime_series.year
)
series_list.append(serie)
# Load into SeriesApp
self._app.load_series_from_list(series_list)
# Load AnimeSeries objects directly into SeriesApp
self._app.load_series_from_list(anime_series_list)
async def sync_episodes_to_db(self, series_key: str) -> int:
"""
@@ -1178,17 +1147,17 @@ class AnimeService:
async def add_series_to_db(
self,
serie,
anime,
db
):
"""
Add a series to the database if it doesn't already exist.
Uses serie.key for identification. Creates a new AnimeSeries
Uses anime.key for identification. Creates a new AnimeSeries
record in the database if it doesn't already exist.
Args:
serie: The Serie instance to add
anime: The AnimeSeries instance to add
db: Database session for async operations
Returns:
@@ -1197,41 +1166,40 @@ class AnimeService:
from src.server.database.service import AnimeSeriesService, EpisodeService
# Check if series already exists in DB
existing = await AnimeSeriesService.get_by_key(db, serie.key)
existing = await AnimeSeriesService.get_by_key(db, anime.key)
if existing:
logger.debug(
"Series already exists in database: %s (key=%s)",
serie.name,
serie.key
anime.name,
anime.key
)
return None
# Create new series in database
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=serie.folder,
year=serie.year if hasattr(serie, 'year') else None,
key=anime.key,
name=anime.name,
site=anime.site,
folder=anime.folder,
year=anime.year if hasattr(anime, 'year') else None,
)
# Create Episode records for each episode in episodeDict
if serie.episodeDict:
for season, episode_numbers in serie.episodeDict.items():
for episode_number in episode_numbers:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=season,
episode_number=episode_number,
)
# Create Episode records for each episode in episodes relationship
if anime.episodes:
for episode in anime.episodes:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=episode.season,
episode_number=episode.episode_number,
)
logger.info(
"Added series to database: %s (key=%s, year=%s)",
serie.name,
serie.key,
serie.year if hasattr(serie, 'year') else None
anime.name,
anime.key,
anime.year if hasattr(anime, 'year') else None
)
return anime_series

View File

@@ -14,7 +14,7 @@ import structlog
from lxml import etree
from src.config.settings import settings as _settings
from src.core.utils.image_downloader import ImageDownloader
from src.server.utils.image_downloader import ImageDownloader
logger = structlog.get_logger(__name__)

View File

@@ -0,0 +1,424 @@
"""TMDB API client for fetching TV show metadata.
This module provides an async client for The Movie Database (TMDB) API,
adapted from the scraper project to fit the AniworldMain architecture.
Example:
>>> async with TMDBClient(api_key="your_key") as client:
... results = await client.search_tv_show("Attack on Titan")
... show_id = results["results"][0]["id"]
... details = await client.get_tv_show_details(show_id)
"""
import asyncio
import logging
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
import aiohttp
logger = logging.getLogger(__name__)
class TMDBAPIError(Exception):
"""Exception raised for TMDB API errors."""
pass
class TMDBClient:
"""Async TMDB API client for TV show metadata.
Attributes:
api_key: TMDB API key for authentication
base_url: Base URL for TMDB API
image_base_url: Base URL for TMDB images
max_connections: Maximum concurrent connections
session: aiohttp ClientSession for requests
"""
DEFAULT_BASE_URL = "https://api.themoviedb.org/3"
DEFAULT_IMAGE_BASE_URL = "https://image.tmdb.org/t/p"
NEGATIVE_CACHE_TTL = 86400 # 24 hours
def __init__(
self,
api_key: str,
base_url: str = DEFAULT_BASE_URL,
image_base_url: str = DEFAULT_IMAGE_BASE_URL,
max_connections: int = 10
):
"""Initialize TMDB client.
Args:
api_key: TMDB API key
base_url: TMDB API base URL
image_base_url: TMDB image base URL
max_connections: Maximum concurrent connections
"""
if not api_key:
raise ValueError("TMDB API key is required")
self.api_key = api_key
self.base_url = base_url.rstrip('/')
self.image_base_url = image_base_url.rstrip('/')
self.max_connections = max_connections
self.session: Optional[aiohttp.ClientSession] = None
self._cache: Dict[str, Any] = {}
self._negative_cache: Dict[str, float] = {} # query -> timestamp when cached
# TMDB allows ~40 req/s; use 30 concurrent + per-second throttle to stay safe
self._semaphore = asyncio.Semaphore(30)
self._rate_limit_lock = asyncio.Lock()
self._request_timestamps: List[float] = []
self._max_requests_per_second = 35 # Stay under TMDB's ~40/s limit
async def __aenter__(self):
"""Async context manager entry."""
await self._ensure_session()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit."""
await self.close()
async def _ensure_session(self):
"""Ensure aiohttp session is created."""
if self.session is None or self.session.closed:
connector = aiohttp.TCPConnector(limit=self.max_connections)
self.session = aiohttp.ClientSession(connector=connector)
async def _request(
self,
endpoint: str,
params: Optional[Dict[str, Any]] = None,
max_retries: int = 5
) -> Dict[str, Any]:
"""Make an async request to TMDB API with retries.
Args:
endpoint: API endpoint (e.g., 'search/tv')
params: Query parameters
max_retries: Maximum retry attempts
Returns:
API response as dictionary
Raises:
TMDBAPIError: If request fails after retries
"""
await self._ensure_session()
url = f"{self.base_url}/{endpoint}"
params = params or {}
params["api_key"] = self.api_key
# Cache key for deduplication
cache_key = f"{endpoint}:{str(sorted(params.items()))}"
if cache_key in self._cache:
logger.debug("Cache hit for %s", endpoint)
return self._cache[cache_key]
# Check negative cache (cached empty results)
negative_cache_key = f"{endpoint}:{str(sorted(params.items()))}"
if negative_cache_key in self._negative_cache:
if time.monotonic() - self._negative_cache[negative_cache_key] < self.NEGATIVE_CACHE_TTL:
logger.debug("Negative cache hit for %s (cached empty result)", endpoint)
return {"results": []}
else:
# Expired negative cache entry
del self._negative_cache[negative_cache_key]
delay = 1
last_error = None
# Rate limiting: ensure we don't exceed ~35 requests/second
async with self._rate_limit_lock:
now = time.monotonic()
# Remove timestamps older than 1 second
self._request_timestamps = [
ts for ts in self._request_timestamps if now - ts < 1.0
]
if len(self._request_timestamps) >= self._max_requests_per_second:
sleep_time = 1.0 - (now - self._request_timestamps[0])
if sleep_time > 0:
logger.debug("Rate throttling: waiting %.2fs", sleep_time)
await asyncio.sleep(sleep_time)
self._request_timestamps.append(time.monotonic())
async with self._semaphore:
for attempt in range(max_retries):
try:
# Re-ensure session before each attempt in case it was closed
await self._ensure_session()
if self.session is None:
raise TMDBAPIError("Session is not available")
logger.debug("TMDB API request: %s (attempt %s)", endpoint, attempt + 1)
async with self.session.get(url, params=params, timeout=aiohttp.ClientTimeout(total=60)) as resp:
if resp.status == 401:
raise TMDBAPIError("Invalid TMDB API key")
elif resp.status == 404:
raise TMDBAPIError(f"Resource not found: {endpoint}")
elif resp.status == 429:
# Rate limit - wait longer with exponential backoff
retry_after = int(resp.headers.get('Retry-After', max(delay * 2, 2)))
logger.warning("Rate limited, waiting %ss", retry_after)
await asyncio.sleep(retry_after)
continue
resp.raise_for_status()
data = await resp.json()
self._cache[cache_key] = data
# Cache negative result if empty
if endpoint.startswith("search/") and not data.get("results"):
self._negative_cache[negative_cache_key] = time.monotonic()
logger.debug("Cached negative result for %s", endpoint)
return data
except asyncio.TimeoutError as e:
last_error = e
if attempt < max_retries - 1:
logger.warning("Request timeout (attempt %s), retrying in %ss", attempt + 1, delay)
await asyncio.sleep(delay)
delay *= 2
else:
logger.error("Request timed out after %s attempts", max_retries)
except (aiohttp.ClientError, AttributeError) as e:
last_error = e
# If connector/session was closed, try to recreate it
if "Connector is closed" in str(e) or isinstance(e, AttributeError):
logger.warning(
"Session issue detected, recreating session: %s",
e,
exc_info=True,
)
self.session = None
await self._ensure_session()
# DNS / host-unreachable errors are not transient — abort immediately
error_str = str(e)
if "name resolution" in error_str.lower() or (
isinstance(e, aiohttp.ClientConnectorError) and
"Cannot connect to host" in error_str
):
logger.error("Non-transient connection error, aborting retries: %s", e)
raise TMDBAPIError(f"Request failed after {attempt + 1} attempts: {e}") from e
if attempt < max_retries - 1:
logger.warning("Request failed (attempt %s): %s, retrying in %ss", attempt + 1, e, delay)
await asyncio.sleep(delay)
delay *= 2
else:
logger.error("Request failed after %s attempts: %s", max_retries, e)
raise TMDBAPIError(f"Request failed after {max_retries} attempts: {last_error}")
async def search_tv_show(
self,
query: str,
language: str = "de-DE",
page: int = 1
) -> Dict[str, Any]:
"""Search for TV shows by name.
Args:
query: Search query (show name)
language: Language for results (default: German)
page: Page number for pagination
Returns:
Search results with list of shows
Example:
>>> results = await client.search_tv_show("Attack on Titan")
>>> shows = results["results"]
"""
return await self._request(
"search/tv",
{"query": query, "language": language, "page": page}
)
async def search_multi(
self,
query: str,
language: str = "en-US",
page: int = 1
) -> Dict[str, Any]:
"""Search for movies and TV shows by name using TMDB multi search.
Multi search returns both movies and TV shows, useful for anime
that might be indexed as movies on TMDB.
Args:
query: Search query (show name)
language: Language for results (default: English)
page: Page number for pagination
Returns:
Search results with list of movies and TV shows
Example:
>>> results = await client.search_multi("Suzume no Tojimari")
>>> shows = [r for r in results["results"] if r["media_type"] == "tv"]
"""
return await self._request(
"search/multi",
{"query": query, "language": language, "page": page}
)
async def get_tv_show_details(
self,
tv_id: int,
language: str = "de-DE",
append_to_response: Optional[str] = None
) -> Dict[str, Any]:
"""Get detailed information about a TV show.
Args:
tv_id: TMDB TV show ID
language: Language for metadata
append_to_response: Additional data to include (e.g., "credits,images")
Returns:
TV show details including metadata, cast, etc.
"""
params = {"language": language}
if append_to_response:
params["append_to_response"] = append_to_response
return await self._request(f"tv/{tv_id}", params)
async def get_tv_show_content_ratings(self, tv_id: int) -> Dict[str, Any]:
"""Get content ratings for a TV show.
Args:
tv_id: TMDB TV show ID
Returns:
Content ratings by country
"""
return await self._request(f"tv/{tv_id}/content_ratings")
async def get_tv_show_external_ids(self, tv_id: int) -> Dict[str, Any]:
"""Get external IDs (IMDB, TVDB) for a TV show.
Args:
tv_id: TMDB TV show ID
Returns:
Dictionary with external IDs (imdb_id, tvdb_id, etc.)
"""
return await self._request(f"tv/{tv_id}/external_ids")
async def get_tv_show_images(
self,
tv_id: int,
language: Optional[str] = None
) -> Dict[str, Any]:
"""Get images (posters, backdrops, logos) for a TV show.
Args:
tv_id: TMDB TV show ID
language: Language filter for images (None = all languages)
Returns:
Dictionary with poster, backdrop, and logo lists
"""
params = {}
if language:
params["language"] = language
return await self._request(f"tv/{tv_id}/images", params)
async def download_image(
self,
image_path: str,
local_path: Path,
size: str = "original"
) -> None:
"""Download an image from TMDB.
Args:
image_path: Image path from TMDB API (e.g., "/abc123.jpg")
local_path: Local file path to save image
size: Image size (w500, original, etc.)
Raises:
TMDBAPIError: If download fails
"""
await self._ensure_session()
url = f"{self.image_base_url}/{size}{image_path}"
try:
logger.debug("Downloading image from %s", url)
async with self.session.get(url, timeout=aiohttp.ClientTimeout(total=60)) as resp:
resp.raise_for_status()
# Ensure parent directory exists
local_path.parent.mkdir(parents=True, exist_ok=True)
# Write image data
with open(local_path, "wb") as f:
f.write(await resp.read())
logger.info("Downloaded image to %s", local_path)
except aiohttp.ClientError as e:
raise TMDBAPIError(f"Failed to download image: {e}")
def get_image_url(self, image_path: str, size: str = "original") -> str:
"""Get full URL for an image.
Args:
image_path: Image path from TMDB API
size: Image size (w500, original, etc.)
Returns:
Full image URL
"""
return f"{self.image_base_url}/{size}{image_path}"
async def close(self):
"""Close the aiohttp session and clean up resources."""
if self.session and not self.session.closed:
await self.session.close()
self.session = None
logger.debug("TMDB client session closed")
def __del__(self):
"""Warn if session is unclosed during garbage collection."""
if self.session is not None and not self.session.closed:
logger.warning(
"TMDBClient: unclosed session detected. "
"Use 'async with TMDBClient(...)' or call close() explicitly."
)
def clear_cache(self):
"""Clear the request cache."""
self._cache.clear()
logger.debug("TMDB client cache cleared")
def clear_negative_cache(self):
"""Clear the negative result cache."""
self._negative_cache.clear()
logger.debug("TMDB negative cache cleared")
def cleanup_expired_negative_cache(self) -> int:
"""Remove expired entries from negative cache.
Returns:
Number of entries removed
"""
now = time.monotonic()
expired_keys = [
key for key, timestamp in self._negative_cache.items()
if now - timestamp >= self.NEGATIVE_CACHE_TTL
]
for key in expired_keys:
del self._negative_cache[key]
if expired_keys:
logger.debug("Removed %d expired negative cache entries", len(expired_keys))
return len(expired_keys)

View File

@@ -20,7 +20,7 @@ except Exception: # pragma: no cover - optional dependency
AsyncSession = object
from src.config.settings import settings
from src.core.SeriesApp import SeriesApp
from src.server.SeriesApp import SeriesApp
from src.server.services.auth_service import AuthError, auth_service
logger = logging.getLogger(__name__)
@@ -58,16 +58,16 @@ _RATE_LIMIT_WINDOW_SECONDS = 60.0
def _make_db_lookup():
"""Build a synchronous ``(folder) -> Serie | None`` callable for SerieScanner.
"""Build a synchronous ``(folder) -> AnimeSeries | None`` callable for SerieScanner.
The returned function opens a short-lived sync DB session, queries for a
series whose ``folder`` column matches the given name, and converts the
ORM row to a ``Serie`` domain object. Returns ``None`` when the DB is not
yet initialised or no matching row is found.
series whose ``folder`` column matches the given name, and returns the
AnimeSeries ORM object. Returns ``None`` when the DB is not yet initialised
or no matching row is found.
"""
from src.core.entities.series import Serie
from src.server.database.models import AnimeSeries
def _lookup(folder: str) -> Optional["Serie"]:
def _lookup(folder: str) -> Optional["AnimeSeries"]:
try:
from src.server.database.connection import get_sync_session
from src.server.database.service import AnimeSeriesService
@@ -78,16 +78,7 @@ def _make_db_lookup():
finally:
db.close()
if row is None:
return None
return Serie(
key=row.key,
name=row.name or "",
site=row.site,
folder=row.folder,
episodeDict={},
year=row.year,
)
return row
except RuntimeError:
# DB not initialised yet (e.g. first boot before init_db())
return None

View File

@@ -0,0 +1,354 @@
"""Image downloader utility for NFO media files.
This module provides functions to download poster, logo, and fanart images
from TMDB and validate them.
Example:
>>> downloader = ImageDownloader()
>>> await downloader.download_poster(poster_url, "/path/to/poster.jpg")
"""
import asyncio
import logging
from pathlib import Path
from typing import Optional
import aiohttp
from PIL import Image
logger = logging.getLogger(__name__)
class ImageDownloadError(Exception):
"""Exception raised for image download failures."""
pass
class ImageDownloader:
"""Utility for downloading and validating images.
Supports async context manager protocol for proper resource cleanup.
Attributes:
max_retries: Maximum retry attempts for downloads
timeout: Request timeout in seconds
min_file_size: Minimum valid file size in bytes
session: Optional aiohttp session (managed internally)
Example:
>>> async with ImageDownloader() as downloader:
... await downloader.download_poster(url, path)
"""
def __init__(
self,
max_retries: int = 3,
timeout: int = 30,
min_file_size: int = 1024, # 1 KB
retry_delay: float = 1.0
):
"""Initialize image downloader.
Args:
max_retries: Maximum retry attempts
timeout: Request timeout in seconds
min_file_size: Minimum valid file size in bytes
retry_delay: Delay between retries in seconds
"""
self.max_retries = max_retries
self.timeout = timeout
self.min_file_size = min_file_size
self.retry_delay = retry_delay
self.session: Optional[aiohttp.ClientSession] = None
async def __aenter__(self):
"""Enter async context manager and create session."""
self._get_session() # Ensure session is created
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Exit async context manager and cleanup resources."""
await self.close()
return False
async def close(self):
"""Close aiohttp session if open."""
if self.session and not self.session.closed:
await self.session.close()
self.session = None
def _get_session(self) -> aiohttp.ClientSession:
"""Get or create aiohttp session.
Returns:
Active aiohttp session
"""
# If no session, create one
if self.session is None:
timeout = aiohttp.ClientTimeout(total=self.timeout)
self.session = aiohttp.ClientSession(timeout=timeout)
return self.session
# If session exists, check if it's closed (handle real sessions only)
# Mock sessions from tests won't have a boolean closed attribute
try:
if hasattr(self.session, 'closed') and self.session.closed is True:
timeout = aiohttp.ClientTimeout(total=self.timeout)
self.session = aiohttp.ClientSession(timeout=timeout)
except (AttributeError, TypeError):
# Mock session or unusual object, just use it as-is
pass
return self.session
async def download_image(
self,
url: str,
local_path: Path,
skip_existing: bool = True,
validate: bool = True
) -> bool:
"""Download an image from URL to local path.
Args:
url: Image URL
local_path: Local file path to save image
skip_existing: Skip download if file already exists
validate: Validate image after download
Returns:
True if download successful, False otherwise
Raises:
ImageDownloadError: If download fails after retries
"""
# Check if file already exists
if skip_existing and local_path.exists():
if local_path.stat().st_size >= self.min_file_size:
logger.debug("Image already exists: %s", local_path)
return True
# Ensure parent directory exists
local_path.parent.mkdir(parents=True, exist_ok=True)
delay = self.retry_delay
last_error = None
for attempt in range(self.max_retries):
try:
logger.debug(
"Downloading image from %s (attempt %d)",
url,
attempt + 1,
)
# Use persistent session
session = self._get_session()
async with session.get(url) as resp:
if resp.status == 404:
logger.warning("Image not found: %s", url)
return False
resp.raise_for_status()
# Download image data
data = await resp.read()
# Check file size
if len(data) < self.min_file_size:
raise ImageDownloadError(
f"Downloaded file too small: {len(data)} bytes"
)
# Write to file
with open(local_path, "wb") as f:
f.write(data)
# Validate image if requested
if validate and not self.validate_image(local_path):
local_path.unlink(missing_ok=True)
raise ImageDownloadError("Image validation failed")
logger.info("Downloaded image to %s", local_path)
return True
except (aiohttp.ClientError, IOError, ImageDownloadError) as e:
last_error = e
if attempt < self.max_retries - 1:
logger.warning(
"Download failed (attempt %d): %s, retrying in %s",
attempt + 1,
e,
delay,
)
await asyncio.sleep(delay)
delay *= 2
else:
logger.error(
"Download failed after %d attempts: %s",
self.max_retries,
e,
)
raise ImageDownloadError(
f"Failed to download image after {self.max_retries} attempts: {last_error}"
)
async def download_poster(
self,
url: str,
series_folder: Path,
filename: str = "poster.jpg",
skip_existing: bool = True
) -> bool:
"""Download poster image.
Args:
url: Poster URL
series_folder: Series folder path
filename: Output filename (default: poster.jpg)
skip_existing: Skip if file exists
Returns:
True if successful
"""
local_path = series_folder / filename
try:
return await self.download_image(url, local_path, skip_existing)
except ImageDownloadError as e:
logger.warning("Failed to download poster: %s", e)
return False
async def download_logo(
self,
url: str,
series_folder: Path,
filename: str = "logo.png",
skip_existing: bool = True
) -> bool:
"""Download logo image.
Args:
url: Logo URL
series_folder: Series folder path
filename: Output filename (default: logo.png)
skip_existing: Skip if file exists
Returns:
True if successful
"""
local_path = series_folder / filename
try:
return await self.download_image(url, local_path, skip_existing)
except ImageDownloadError as e:
logger.warning("Failed to download logo: %s", e)
return False
async def download_fanart(
self,
url: str,
series_folder: Path,
filename: str = "fanart.jpg",
skip_existing: bool = True
) -> bool:
"""Download fanart/backdrop image.
Args:
url: Fanart URL
series_folder: Series folder path
filename: Output filename (default: fanart.jpg)
skip_existing: Skip if file exists
Returns:
True if successful
"""
local_path = series_folder / filename
try:
return await self.download_image(url, local_path, skip_existing)
except ImageDownloadError as e:
logger.warning("Failed to download fanart: %s", e)
return False
def validate_image(self, image_path: Path) -> bool:
"""Validate that file is a valid image.
Args:
image_path: Path to image file
Returns:
True if valid image, False otherwise
"""
try:
with Image.open(image_path) as img:
# Verify it's a valid image
img.verify()
# Check file size
if image_path.stat().st_size < self.min_file_size:
logger.warning("Image file too small: %s", image_path)
return False
return True
except Exception as e:
logger.warning("Image validation failed for %s: %s", image_path, e)
return False
async def download_all_media(
self,
series_folder: Path,
poster_url: Optional[str] = None,
logo_url: Optional[str] = None,
fanart_url: Optional[str] = None,
skip_existing: bool = True
) -> dict[str, bool]:
"""Download all media files (poster, logo, fanart).
Args:
series_folder: Series folder path
poster_url: Poster URL (optional)
logo_url: Logo URL (optional)
fanart_url: Fanart URL (optional)
skip_existing: Skip existing files
Returns:
Dictionary with download status for each file type
"""
results = {
"poster": None,
"logo": None,
"fanart": None
}
tasks = []
if poster_url:
tasks.append(("poster", self.download_poster(
poster_url, series_folder, skip_existing=skip_existing
)))
if logo_url:
tasks.append(("logo", self.download_logo(
logo_url, series_folder, skip_existing=skip_existing
)))
if fanart_url:
tasks.append(("fanart", self.download_fanart(
fanart_url, series_folder, skip_existing=skip_existing
)))
# Download concurrently
if tasks:
task_results = await asyncio.gather(
*[task for _, task in tasks],
return_exceptions=True
)
for (media_type, _), result in zip(tasks, task_results):
if isinstance(result, Exception):
logger.error("Error downloading %s: %s", media_type, result)
results[media_type] = False
else:
results[media_type] = result
return results

View File

@@ -0,0 +1,248 @@
"""Utility functions for generating URL-safe keys from folder names.
This module provides key generation and normalization for anime series,
handling edge cases like non-Latin characters and special symbols.
"""
from __future__ import annotations
import re
import unicodedata
import uuid
from typing import Optional
# Valid key pattern: alphanumeric, hyphens, underscores
# Must be at least 1 char, URL-safe
VALID_KEY_PATTERN = re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$')
def normalize_key(key: str) -> str:
"""Normalize a key to a URL-safe format.
Args:
key: The key to normalize
Returns:
Normalized lowercase key with spaces replaced by hyphens
"""
if not key:
return ""
# Convert to lowercase
normalized = key.lower()
# Replace spaces and underscores with hyphens
normalized = re.sub(r'[\s_]+', '-', normalized)
# Remove any characters that aren't alphanumeric or hyphens
normalized = re.sub(r'[^a-z0-9-]', '', normalized)
# Collapse multiple consecutive hyphens
normalized = re.sub(r'-+', '-', normalized)
# Remove leading/trailing hyphens
normalized = normalized.strip('-')
return normalized
def is_valid_key(key: str) -> bool:
"""Check if a key is valid for URL-safe use.
Args:
key: The key to validate
Returns:
True if key is valid (non-empty, URL-safe, alphanumeric start/end, min 2 chars)
"""
if not key or not key.strip():
return False
if len(key) < 2:
return False
return bool(VALID_KEY_PATTERN.match(key))
def generate_key_from_folder(folder_name: str) -> str:
"""Generate a URL-safe key from a folder name.
Handles edge cases:
- Non-Latin characters (Japanese, Chinese, etc.)
- Special characters
- All-invalid names that normalize to empty
Args:
folder_name: The folder name to convert to a key
Returns:
A URL-safe key string. Never returns empty string.
Examples:
>>> generate_key_from_folder("Attack on Titan (2013)")
'attack-on-titan-2013'
>>> generate_key_from_folder("A Time Called You (2023)")
'a-time-called-you-2023'
>>> generate_key_from_folder("25-sai no Joshikousei (2018)")
'25-sai-no-joshikousei-2018'
"""
if not folder_name or not folder_name.strip():
raise ValueError("Folder name cannot be empty")
# Step 1: Unicode NFC normalization (preserves international chars)
normalized = unicodedata.normalize('NFC', folder_name.strip())
# Step 2: Extract alphanumeric parts, preserving international chars
# This keeps Japanese/Chinese characters but removes special symbols
parts = []
for char in normalized:
# Keep Unicode alphanumeric characters (letters/numbers from any script)
if char.isalnum():
parts.append(char)
elif char.isspace():
parts.append(' ')
# Handle apostrophes - treat as part of word (remove, don't replace with space)
# This normalizes e.g., "Hell's" -> "Hells"
# Includes: ' (0x27), ' (0x2018), ' (0x2019), ' (0x02BC), ` (0x0060)
elif char in ("'", "'", "'", "'", "`", """, """):
pass # Skip - drop the apostrophe
else:
parts.append(' ')
working = ''.join(parts)
# Step 3: Split into words and normalize each
words = working.split()
# Step 4: Convert to lowercase and create hyphenated key
key = '-'.join(word.lower() for word in words if word)
# Step 5: If we got a valid key, return it
if key and is_valid_key(key):
return key
# Step 6: Try just alphanumeric characters
alphanumeric_only = re.sub(r'[^a-zA-Z0-9\s]', '', working)
words = alphanumeric_only.split()
key = '-'.join(word.lower() for word in words if word)
if key and is_valid_key(key):
return key
# Step 7: Last resort - use folder name directly with transliteration
# Try to convert non-ASCII to ASCII equivalents
try:
# Use NFD normalization and strip combining characters
# This effectively Latinizes some characters
nfd_form = unicodedata.normalize('NFD', folder_name)
latinized = ''.join(
char for char in nfd_form
if unicodedata.category(char) != 'Mn' # Strip combining marks
)
# Remove non-ASCII letters
latinized = re.sub(r'[^a-zA-Z0-9\s]', ' ', latinized)
words = latinized.split()
key = '-'.join(word.lower() for word in words if word)
if key and is_valid_key(key):
return key
except Exception:
pass
# Step 8: Absolute fallback - generate UUID-based key
# Use first 8 chars of UUID for brevity
uuid_key = uuid.uuid4().hex[:8]
# Try to extract any meaningful words from the original name
meaningful_parts = []
for char in folder_name:
if char.isalnum():
meaningful_parts.append(char.lower())
elif len(meaningful_parts) > 0:
meaningful_parts.append('-')
fallback_base = ''.join(meaningful_parts).strip('-')
if fallback_base and len(fallback_base) >= 2:
# Combine meaningful parts with UUID for uniqueness
# Truncate meaningful parts if too long
if len(fallback_base) > 20:
fallback_base = fallback_base[:20]
return f"{fallback_base}-{uuid_key}"
return f"series-{uuid_key}"
def validate_key_uniqueness(
key: str,
existing_keys: set[str],
) -> tuple[bool, str]:
"""Validate that a key is unique among existing keys.
Args:
key: The key to validate
existing_keys: Set of keys that already exist
Returns:
Tuple of (is_valid, error_message)
"""
if not key or not key.strip():
return False, "Key cannot be empty"
stripped = key.strip()
if len(stripped) < 2:
return False, "Key must be at least 2 characters"
if not is_valid_key(stripped):
return False, "Key must be URL-safe (alphanumeric, hyphens, underscores only)"
if stripped in existing_keys:
return False, f"Key '{stripped}' is already in use"
return True, ""
def sanitize_key_for_url(key: str) -> str:
"""Sanitize a key for safe URL usage.
Args:
key: The key to sanitize
Returns:
URL-safe version of the key
"""
if not key:
return ""
# Replace spaces with hyphens first
sanitized = key.replace(' ', '-')
# Remove any characters that could cause URL issues (keep alphanumerics, hyphens, underscores)
sanitized = re.sub(r'[^\w\-]', '', sanitized)
# Collapse multiple hyphens
sanitized = re.sub(r'-+', '-', sanitized)
return sanitized.strip('-')
def sanitize_url_for_logging(url: str, max_length: int = 100) -> str:
"""Sanitize a URL for safe logging by removing sensitive query parameters.
Removes or truncates query parameters that may contain tokens, keys,
or other sensitive data while preserving enough structure for debugging.
Args:
url: The URL to sanitize
max_length: Maximum length of the returned URL string
Returns:
Sanitized URL safe for logging
"""
if not url:
return ""
# Truncate if too long
if len(url) > max_length:
return url[:max_length] + "..."
return url