From e74b04c1eea937d8335087ac91d9dbcd6ec1d221 Mon Sep 17 00:00:00 2001 From: Lukas Date: Fri, 5 Jun 2026 18:15:41 +0200 Subject: [PATCH] feat: add NFO scan after rescan and year caching - Add nfo_scan_after_rescan config option (default: true) - Implement year caching in AniworldLoader and EnhancedAniWorldLoader - Make get_year abstract method in base provider - Run NFO validation/creation after scheduled rescan completes - Add _YearDict cache to avoid re-extracting year from HTML --- src/server/models/config.py | 6 + src/server/providers/aniworld_provider.py | 91 ++- src/server/providers/base_provider.py | 11 + src/server/providers/enhanced_provider.py | 62 +- src/server/services/nfo_scan_service.py | 651 ++++++++++++++++++ .../services/scheduler/scheduler_service.py | 37 +- tests/unit/test_base_provider.py | 3 + tests/unit/test_enhanced_provider.py | 6 + tests/unit/test_monitored_provider.py | 6 + tests/unit/test_provider_edge_cases.py | 1 + 10 files changed, 839 insertions(+), 35 deletions(-) create mode 100644 src/server/services/nfo_scan_service.py diff --git a/src/server/models/config.py b/src/server/models/config.py index d6e5c85..f921880 100644 --- a/src/server/models/config.py +++ b/src/server/models/config.py @@ -39,6 +39,12 @@ class SchedulerConfig(BaseModel): description="Automatically queue and start downloads for all missing " "episodes after a scheduled rescan completes.", ) + nfo_scan_after_rescan: bool = Field( + default=True, + description="Run NFO validation and creation after a scheduled rescan " + "completes. Checks each series folder for tvshow.nfo and " + "creates or fills missing properties.", + ) # Legacy alias fields — read via Pydantic alias auto_download: Optional[bool] = Field(default=None, alias="auto_download") diff --git a/src/server/providers/aniworld_provider.py b/src/server/providers/aniworld_provider.py index 7356d78..35129d9 100644 --- a/src/server/providers/aniworld_provider.py +++ b/src/server/providers/aniworld_provider.py @@ -158,6 +158,7 @@ class AniworldLoader(Loader): self._KeyHTMLDict = {} self._EpisodeHTMLDict = {} + self._YearDict = {} self.Providers = Providers() # Events: download_progress is triggered with progress dict @@ -774,55 +775,81 @@ class AniworldLoader(Loader): if span_tag: title = span_tag.text logger.debug("Found title: %s", title) + + # Also try to extract year from sibling p tag "Jahr: {year}" + # Year is typically right after title in the HTML structure + year = self._extract_year_from_soup(soup) + if year is not None: + self._YearDict[key] = year + logger.debug("Cached year %d for key: %s", year, key) + return title logger.warning("No title found for key: %s", key) return "" + def _extract_year_from_soup(self, soup: BeautifulSoup) -> int | None: + """Extract year from BeautifulSoup object. + + Looks for 'Jahr: {year}' pattern in p tags adjacent to series-title. + + Args: + soup: Parsed BeautifulSoup object + + Returns: + Year as int or None if not found + """ + # Try to find year in metadata + for p_tag in soup.find_all('p'): + text = p_tag.get_text() + if 'Jahr:' in text or 'Year:' in text: + match = re.search(r'(\d{4})', text) + if match: + return int(match.group(1)) + + # Fallback: look in series-info div + info_div = soup.find('div', class_='series-info') + if info_div: + text = info_div.get_text() + match = re.search(r'\b(19\d{2}|20\d{2})\b', text) + if match: + return int(match.group(1)) + + return None + def get_year(self, key: str) -> int | None: """Get anime release year from series key. - - Attempts to extract the year from the series page metadata. - Returns None if year cannot be determined. - + + Uses cached year from get_title if available, + otherwise extracts and caches it. + Args: key: Series identifier - + Returns: - int or None: Release year if found, None otherwise + Release year or None if not found """ logger.debug("Getting year for key: %s", key) + + # Check cache first + if key in self._YearDict: + logger.debug("Using cached year %d for key: %s", self._YearDict[key], key) + return self._YearDict[key] + + # Not cached - extract from HTML try: soup = BeautifulSoup( _decode_html_content(self._get_key_html(key).content), 'html.parser' ) - - # Try to find year in metadata - # Check for "Jahr:" or similar metadata fields - for p_tag in soup.find_all('p'): - text = p_tag.get_text() - if 'Jahr:' in text or 'Year:' in text: - # Extract year from text like "Jahr: 2025" - match = re.search(r'(\d{4})', text) - if match: - year = int(match.group(1)) - logger.debug("Found year in metadata: %s", year) - return year - - # Try alternative: look for year in genre/info section - info_div = soup.find('div', class_='series-info') - if info_div: - text = info_div.get_text() - match = re.search(r'\b(19\d{2}|20\d{2})\b', text) - if match: - year = int(match.group(1)) - logger.debug("Found year in info section: %s", year) - return year - - logger.debug("No year found for key: %s", key) - return None - + + year = self._extract_year_from_soup(soup) + if year is not None: + self._YearDict[key] = year + logger.debug("Found and cached year %d for key: %s", year, key) + + return year + except Exception as e: logger.warning("Error extracting year for key %s: %s", key, e) return None diff --git a/src/server/providers/base_provider.py b/src/server/providers/base_provider.py index 5ecd51b..8306cbf 100644 --- a/src/server/providers/base_provider.py +++ b/src/server/providers/base_provider.py @@ -91,6 +91,17 @@ class Loader(ABC): Series title string """ + @abstractmethod + def get_year(self, key: str) -> int | None: + """Get the release year of a series. + + Args: + key: Unique series identifier/key + + Returns: + Release year as integer, or None if year cannot be determined + """ + @abstractmethod def get_season_episode_count(self, slug: str) -> Dict[int, int]: """Get season and episode counts for a series. diff --git a/src/server/providers/enhanced_provider.py b/src/server/providers/enhanced_provider.py index 11df903..9045a6f 100644 --- a/src/server/providers/enhanced_provider.py +++ b/src/server/providers/enhanced_provider.py @@ -110,6 +110,7 @@ class EnhancedAniWorldLoader(Loader): # Cache dictionaries self._KeyHTMLDict = {} self._EpisodeHTMLDict = {} + self._YearDict = {} # Provider manager self.Providers = Providers() @@ -666,6 +667,10 @@ class EnhancedAniWorldLoader(Loader): if title_span: span = title_span.find('span') if span: + # Extract and cache year from soup if available + year = self._ExtractYearFromSoup(soup) + if year is not None: + self._YearDict[key] = year return span.text.strip() self.logger.warning("Could not extract title for key: %s", key) @@ -674,7 +679,62 @@ class EnhancedAniWorldLoader(Loader): except Exception as e: self.logger.error("Failed to get title for key %s: %s", key, e) raise RetryableError(f"Title extraction failed: {e}") from e - + + def _ExtractYearFromSoup(self, soup: BeautifulSoup) -> int | None: + """Extract year from parsed BeautifulSoup. + + Looks for 'Jahr: {year}' pattern in p tags. + + Args: + soup: Parsed BeautifulSoup object + + Returns: + Year as int or None if not found + """ + for p_tag in soup.find_all('p'): + text = p_tag.get_text() + if 'Jahr:' in text or 'Year:' in text: + match = re.search(r'(\d{4})', text) + if match: + return int(match.group(1)) + + info_div = soup.find('div', class_='series-info') + if info_div: + text = info_div.get_text() + match = re.search(r'\b(19\d{2}|20\d{2})\b', text) + if match: + return int(match.group(1)) + + return None + + def GetYear(self, key: str) -> int | None: + """Get anime release year from series key. + + Uses cached year from GetTitle if available, + otherwise extracts and caches it. + + Args: + key: Series identifier + + Returns: + Release year or None if not found + """ + # Check cache first + if key in self._YearDict: + return self._YearDict[key] + + # Not cached - extract from HTML + try: + soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser') + year = self._ExtractYearFromSoup(soup) + if year is not None: + self._YearDict[key] = year + return year + + except Exception as e: + self.logger.warning("Error extracting year for key %s: %s", key, e) + return None + def GetSiteKey(self) -> str: """Get site identifier.""" return "aniworld.to" diff --git a/src/server/services/nfo_scan_service.py b/src/server/services/nfo_scan_service.py new file mode 100644 index 0000000..d9b928b --- /dev/null +++ b/src/server/services/nfo_scan_service.py @@ -0,0 +1,651 @@ +"""NFO scan service for validating and creating tvshow.nfo files. + +This module provides a service layer for scanning the anime library, +checking whether each series has a valid tvshow.nfo file, creating +missing files, and filling in missing properties from TMDB metadata. + +All series are identified by 'key' (provider-assigned, URL-safe +identifier). 'folder' is used as metadata only for filesystem paths. +""" +from __future__ import annotations + +import asyncio +import os +from datetime import datetime, timezone +from typing import Any, Callable, Dict, List, Optional + +import structlog + +from src.config.settings import settings +from src.server.nfo.nfo_generator import generate_tvshow_nfo +from src.server.nfo.nfo_mapper import tmdb_to_nfo_model +from src.server.nfo.nfo_models import TVShowNFO + +logger = structlog.get_logger(__name__) + + +class NfoScanServiceError(Exception): + """Service-level exception for NFO scan operations.""" + + +class NfoScanProgress: + """Tracks the current state of an NFO scan operation. + + Attributes: + scan_id: Unique identifier for this scan + status: Current status (started, in_progress, completed, failed, cancelled) + total: Total number of series to scan + current: Number of series processed + percentage: Completion percentage + message: Human-readable progress message + key: Current series key being processed (metadata only) + folder: Current series folder being processed (metadata only) + created: Number of NFO files created + updated: Number of NFO files updated + errors: List of error messages encountered + started_at: When the scan started + updated_at: When progress was last updated + """ + + def __init__(self, scan_id: str): + self.scan_id = scan_id + self.status = "started" + self.total = 0 + self.current = 0 + self.percentage = 0.0 + self.message = "Initializing NFO scan..." + self.key: Optional[str] = None + self.folder: Optional[str] = None + self.started_at = datetime.now(timezone.utc) + self.updated_at = datetime.now(timezone.utc) + self.created = 0 + self.updated = 0 + self.errors: List[str] = [] + + def to_dict(self) -> Dict[str, Any]: + result = { + "scan_id": self.scan_id, + "status": self.status, + "total": self.total, + "current": self.current, + "percentage": round(self.percentage, 2), + "message": self.message, + "started_at": self.started_at.isoformat(), + "updated_at": self.updated_at.isoformat(), + "created": self.created, + "updated": self.updated, + "errors": self.errors, + } + if self.key is not None: + result["key"] = self.key + if self.folder is not None: + result["folder"] = self.folder + return result + + +class NfoScanService: + """Manages NFO validation and creation for anime series. + + Scans the anime library directory, checks each series folder for + a tvshow.nfo file, creates missing files, and fills in missing + or empty properties from TMDB metadata. + + Uses 'key' as the primary series identifier and 'folder' as + metadata only for filesystem operations. + """ + + def __init__(self) -> None: + self._current_scan: Optional[NfoScanProgress] = None + self._is_scanning = False + self._lock = asyncio.Lock() + + # Event handlers for scan events + self._scan_event_handlers: List[Callable[[Dict[str, Any]], None]] = [] + + logger.info("NfoScanService initialized") + + def subscribe_to_scan_events( + self, + handler: Callable[[Dict[str, Any]], None], + ) -> None: + """Subscribe to NFO scan events.""" + self._scan_event_handlers.append(handler) + + def unsubscribe_from_scan_events( + self, + handler: Callable[[Dict[str, Any]], None], + ) -> None: + """Unsubscribe from NFO scan events.""" + try: + self._scan_event_handlers.remove(handler) + except ValueError: + logger.warning("Handler not found for unsubscribe") + + async def _emit_scan_event(self, event_data: Dict[str, Any]) -> None: + """Emit scan event to all subscribers.""" + for handler in self._scan_event_handlers: + try: + if asyncio.iscoroutinefunction(handler): + await handler(event_data) + else: + handler(event_data) + except Exception as e: + logger.error("NFO scan event handler error", error=str(e)) + + @property + def is_scanning(self) -> bool: + return self._is_scanning + + @property + def current_scan(self) -> Optional[NfoScanProgress]: + return self._current_scan + + async def scan_all( + self, + anime_service: Any, # AnimeService instance + ) -> Dict[str, Any]: + """Run NFO validation and creation across all series. + + Args: + anime_service: AnimeService instance for accessing series data. + + Returns: + Summary dict with keys: total, created, updated, errors_count, + scan_id, and duration_seconds. + + Raises: + NfoScanServiceError: If a scan is already in progress. + """ + async with self._lock: + if self._is_scanning: + raise NfoScanServiceError("An NFO scan is already in progress") + self._is_scanning = True + + scan_id = f"nfo_scan_{id(self)}" + scan_progress = NfoScanProgress(scan_id) + self._current_scan = scan_progress + + logger.info("Starting NFO scan") + + # Emit scan started + await self._emit_scan_event({ + "type": "nfo_scan_started", + "scan_id": scan_id, + "message": "NFO scan started", + }) + + # Get all series from AnimeService + try: + series_list = await anime_service.list_series_with_filters() + except Exception as exc: + logger.error("Failed to get series list: %s", exc) + async with self._lock: + self._is_scanning = False + raise NfoScanServiceError(f"Failed to get series list: {exc}") from exc + + if not series_list: + logger.info("No series found — NFO scan complete") + scan_progress.status = "completed" + scan_progress.message = "No series found" + scan_progress.percentage = 100.0 + scan_progress.updated_at = datetime.now(timezone.utc) + + async with self._lock: + self._is_scanning = False + + await self._emit_scan_event({ + "type": "nfo_scan_completed", + "scan_id": scan_id, + "success": True, + "message": "No series found", + "data": scan_progress.to_dict(), + }) + return { + "total": 0, + "created": 0, + "updated": 0, + "errors_count": 0, + "scan_id": scan_id, + "duration_seconds": 0.0, + } + + scan_progress.total = len(series_list) + scan_progress.status = "in_progress" + scan_progress.message = f"Scanning {scan_progress.total} series..." + scan_progress.updated_at = datetime.now(timezone.utc) + + start_time = datetime.now(timezone.utc) + errors: List[str] = [] + + for idx, series in enumerate(series_list): + key = series.get("key", "") + folder = series.get("folder", "") + name = series.get("name", "") + + scan_progress.key = key + scan_progress.folder = folder + scan_progress.message = f"Scanning: {name}" + scan_progress.updated_at = datetime.now(timezone.utc) + + await self._emit_scan_event({ + "type": "nfo_scan_progress", + "data": scan_progress.to_dict(), + }) + + try: + result = await self._scan_series(key, folder, series) + if result == "created": + scan_progress.created += 1 + elif result == "updated": + scan_progress.updated += 1 + except Exception as exc: + error_msg = f"NFO scan failed for {key}: {exc}" + logger.warning(error_msg) + errors.append(error_msg) + scan_progress.errors.append(error_msg) + + scan_progress.current = idx + 1 + scan_progress.percentage = round( + (scan_progress.current / scan_progress.total) * 100, 2 + ) + scan_progress.updated_at = datetime.now(timezone.utc) + + end_time = datetime.now(timezone.utc) + duration = (end_time - start_time).total_seconds() + scan_progress.status = "completed" + scan_progress.message = ( + f"NFO scan completed: {scan_progress.created} created, " + f"{scan_progress.updated} updated, {len(errors)} errors" + ) + scan_progress.percentage = 100.0 + scan_progress.updated_at = end_time + + async with self._lock: + self._is_scanning = False + + logger.info( + "NFO scan completed: total=%d created=%d updated=%d errors=%d duration=%.2fs", + scan_progress.total, + scan_progress.created, + scan_progress.updated, + len(errors), + duration, + ) + + await self._emit_scan_event({ + "type": "nfo_scan_completed", + "scan_id": scan_id, + "success": True, + "message": scan_progress.message, + "data": scan_progress.to_dict(), + "statistics": { + "total": scan_progress.total, + "created": scan_progress.created, + "updated": scan_progress.updated, + "errors_count": len(errors), + }, + }) + + return { + "total": scan_progress.total, + "created": scan_progress.created, + "updated": scan_progress.updated, + "errors_count": len(errors), + "scan_id": scan_id, + "duration_seconds": round(duration, 2), + } + + async def _scan_series( + self, + key: str, + folder: str, + series_data: Dict[str, Any], + ) -> Optional[str]: + """Scan and update NFO for a single series. + + Args: + key: Series key (primary identifier) + folder: Series folder name (metadata for filesystem path) + series_data: Series data dict from anime_service + + Returns: + "created" if new NFO was created, "updated" if existing was + modified, None if no change needed or error occurred. + """ + if not folder: + logger.debug("Skipping series with no folder: key=%s", key) + return None + + anime_dir = getattr(settings, "anime_directory", None) + if not anime_dir: + logger.warning("anime_directory not configured — skipping NFO scan") + return None + + series_path = os.path.join(anime_dir, folder) + nfo_path = os.path.join(series_path, "tvshow.nfo") + + nfo_exists = os.path.isfile(nfo_path) + + if not nfo_exists: + # Create new NFO + logger.info("Creating NFO for series: %s (%s)", key, folder) + await self._create_nfo(key, folder, series_data, nfo_path) + await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path) + return "created" + + # NFO exists — check if it needs updating + updated = await self._update_nfo_if_needed(key, folder, series_data, nfo_path) + if updated: + await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path) + return "updated" + + return None + + async def _create_nfo( + self, + key: str, + folder: str, + series_data: Dict[str, Any], + nfo_path: str, + ) -> None: + """Create a new tvshow.nfo file from TMDB metadata. + + Args: + key: Series key + folder: Series folder name + series_data: Series data from anime_service + nfo_path: Full path to the NFO file to create + """ + tmdb_id = series_data.get("tmdb_id") + + if not tmdb_id: + logger.warning( + "Cannot create NFO for %s: no tmdb_id available", + key, + ) + return + + try: + tmdb_data = await self._fetch_tmdb_data(tmdb_id) + except Exception as exc: + logger.warning("Failed to fetch TMDB data for %s: %s", key, exc) + return + + if not tmdb_data: + logger.warning("No TMDB data for %s", key) + return + + nfo_model = tmdb_to_nfo_model( + tmdb_data, + content_ratings=None, + get_image_url=self._make_tmdb_image_url(tmdb_id), + image_size="original", + ) + + xml_content = generate_tvshow_nfo(nfo_model) + + # Ensure directory exists + os.makedirs(os.path.dirname(nfo_path), exist_ok=True) + + with open(nfo_path, "w", encoding="utf-8") as f: + f.write(xml_content) + + logger.info("Created tvshow.nfo for %s at %s", key, nfo_path) + + await self._emit_scan_event({ + "type": "nfo_created", + "key": key, + "folder": folder, + "path": nfo_path, + }) + + async def _update_nfo_if_needed( + self, + key: str, + folder: str, + series_data: Dict[str, Any], + nfo_path: str, + ) -> bool: + """Load existing NFO, check for missing fields, fill and rewrite. + + Args: + key: Series key + folder: Series folder name + series_data: Series data from anime_service + nfo_path: Full path to the existing NFO file + + Returns: + True if NFO was updated, False if no changes were needed. + """ + try: + from lxml import etree + except ImportError: + logger.warning("lxml not available — cannot update existing NFO files") + return False + + try: + tree = etree.parse(nfo_path) + root = tree.getroot() + except Exception as exc: + logger.warning("Failed to parse existing NFO for %s: %s — will regenerate", key, exc) + # Corrupt or unreadable NFO — regenerate from TMDB + return await self._regenerate_nfo(key, folder, series_data, nfo_path) + + # Check for missing or empty critical fields + critical_fields = ["title", "plot", "premiered", "tmdbid"] + missing_fields: List[str] = [] + + for field in critical_fields: + elem = root.find(field) + if elem is None or not elem.text or elem.text.strip() == "": + missing_fields.append(field) + + if not missing_fields: + logger.debug("NFO for %s is complete — no update needed", key) + return False + + logger.info( + "NFO for %s is missing fields %s — attempting to fill from TMDB", + key, + missing_fields, + ) + + # Try to fill missing fields from TMDB + tmdb_id = series_data.get("tmdb_id") + if not tmdb_id: + logger.warning("Cannot update NFO for %s: no tmdb_id", key) + return False + + try: + tmdb_data = await self._fetch_tmdb_data(tmdb_id) + except Exception as exc: + logger.warning("Failed to fetch TMDB data for %s: %s", key, exc) + return False + + if not tmdb_data: + return False + + nfo_model = tmdb_to_nfo_model( + tmdb_data, + content_ratings=None, + get_image_url=self._make_tmdb_image_url(tmdb_id), + image_size="original", + ) + + # Serialize updated model to XML and write + xml_content = generate_tvshow_nfo(nfo_model) + + with open(nfo_path, "w", encoding="utf-8") as f: + f.write(xml_content) + + logger.info("Updated NFO for %s (filled %d fields)", key, len(missing_fields)) + + await self._emit_scan_event({ + "type": "nfo_updated", + "key": key, + "folder": folder, + "path": nfo_path, + "missing_fields": missing_fields, + }) + + return True + + async def _regenerate_nfo( + self, + key: str, + folder: str, + series_data: Dict[str, Any], + nfo_path: str, + ) -> bool: + """Regenerate NFO from scratch when existing file is corrupt.""" + tmdb_id = series_data.get("tmdb_id") + if not tmdb_id: + return False + + try: + tmdb_data = await self._fetch_tmdb_data(tmdb_id) + except Exception as exc: + logger.warning("Failed to fetch TMDB data for %s during regeneration: %s", key, exc) + return False + + if not tmdb_data: + return False + + nfo_model = tmdb_to_nfo_model( + tmdb_data, + content_ratings=None, + get_image_url=self._make_tmdb_image_url(tmdb_id), + image_size="original", + ) + + xml_content = generate_tvshow_nfo(nfo_model) + + with open(nfo_path, "w", encoding="utf-8") as f: + f.write(xml_content) + + logger.info("Regenerated NFO for %s", key) + return True + + async def _fetch_tmdb_data(self, tmdb_id: int) -> Optional[Dict[str, Any]]: + """Fetch series metadata from TMDB API. + + Args: + tmdb_id: TMDB series ID + + Returns: + TMDB response dict or None on failure. + """ + try: + from src.server.nfo.tmdb_client import get_tmdb_client + + client = get_tmdb_client() + data = await client.get_series_details(tmdb_id) + return data + except Exception as exc: + logger.warning("TMDB fetch failed for TMDB ID %s: %s", tmdb_id, exc) + return None + + def _make_tmdb_image_url(self, tmdb_id: int) -> Callable[[str, str], str]: + """Create a get_image_url closure bound to a TMDB account.""" + from src.server.nfo.tmdb_client import get_tmdb_image_base_url + + base = get_tmdb_image_base_url(tmdb_id) + + def get_image_url(path: str, size: str = "original") -> str: + if not path: + return "" + return f"{base}{size}{path}" + + return get_image_url + + async def _update_series_nfo_flag( + self, + key: str, + has_nfo: bool, + nfo_path: str, + ) -> None: + """Update the has_nfo flag and nfo_path in the database. + + Args: + key: Series key (primary identifier) + has_nfo: Whether the series now has an NFO file + nfo_path: Path to the NFO file + """ + try: + from src.server.database.connection import get_db_session + from src.server.database.service import AnimeSeriesService + + async with get_db_session() as db: + series = await AnimeSeriesService.get_by_key(db, key) + if series: + now = datetime.now(timezone.utc) + series.has_nfo = has_nfo + series.nfo_path = nfo_path + if series.nfo_created_at is None: + series.nfo_created_at = now + series.nfo_updated_at = now + await db.flush() + logger.debug("Updated NFO flag for series: %s", key) + except Exception as exc: + logger.warning("Failed to update NFO flag for %s: %s", key, exc) + + async def cancel_scan(self) -> bool: + """Cancel the current NFO scan if one is in progress. + + Returns: + True if scan was cancelled, False if no scan in progress. + """ + async with self._lock: + if not self._is_scanning: + return False + + self._is_scanning = False + + if self._current_scan: + self._current_scan.status = "cancelled" + self._current_scan.message = "NFO scan cancelled by user" + self._current_scan.updated_at = datetime.now(timezone.utc) + + if self._current_scan: + await self._emit_scan_event({ + "type": "nfo_scan_cancelled", + "scan_id": self._current_scan.scan_id, + "message": "NFO scan cancelled by user", + }) + + logger.info("NFO scan cancelled") + return True + + async def get_scan_status(self) -> Dict[str, Any]: + """Get the current NFO scan status. + + Returns: + Dict with is_scanning and current_scan data. + """ + return { + "is_scanning": self._is_scanning, + "current_scan": ( + self._current_scan.to_dict() if self._current_scan else None + ), + } + + +# --------------------------------------------------------------------------- +# Module-level singleton +# --------------------------------------------------------------------------- + +_nfo_scan_service: Optional[NfoScanService] = None + + +def get_nfo_scan_service() -> NfoScanService: + """Return the singleton NfoScanService instance.""" + global _nfo_scan_service + if _nfo_scan_service is None: + _nfo_scan_service = NfoScanService() + return _nfo_scan_service + + +def reset_nfo_scan_service() -> None: + """Reset the singleton NfoScanService instance (for testing).""" + global _nfo_scan_service + _nfo_scan_service = None \ No newline at end of file diff --git a/src/server/services/scheduler/scheduler_service.py b/src/server/services/scheduler/scheduler_service.py index 592b2b9..241ed33 100644 --- a/src/server/services/scheduler/scheduler_service.py +++ b/src/server/services/scheduler/scheduler_service.py @@ -11,7 +11,7 @@ from __future__ import annotations import logging from datetime import datetime, timedelta, timezone -from typing import List, Optional +from typing import Dict, List, Optional from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.triggers.cron import CronTrigger @@ -261,6 +261,9 @@ class SchedulerService: "auto_download_after_rescan": ( self._config.auto_download_after_rescan if self._config else False ), + "nfo_scan_after_rescan": ( + self._config.nfo_scan_after_rescan if self._config else True + ), "last_run": ( self._last_scan_time.isoformat() if self._last_scan_time @@ -375,7 +378,19 @@ class SchedulerService: # 1. Main library rescan await self._run_rescan() - # 2. Auto-download (if enabled) + # 2. NFO scan (if enabled) + if self._config and self._config.nfo_scan_after_rescan: + try: + nfo_result = await self._run_nfo_scan() + await self._broadcast("nfo_scan_started", { + "created": nfo_result.get("created", 0), + "updated": nfo_result.get("updated", 0), + }) + except Exception as exc: + logger.error("NFO scan failed: %s", exc, exc_info=True) + await self._broadcast("nfo_scan_error", {"error": str(exc)}) + + # 3. Auto-download (if enabled) if self._config and self._config.auto_download_after_rescan: try: queued = await self._run_auto_download() @@ -419,6 +434,24 @@ class SchedulerService: await anime_service.rescan() logger.info("anime_service.rescan() completed") + async def _run_nfo_scan(self) -> Dict[str, Any]: + """Run NFO validation and creation across all series.""" + from src.server.services.nfo_scan_service import get_nfo_scan_service + from src.server.utils.dependencies import get_anime_service + + anime_service = get_anime_service() + nfo_scan_service = get_nfo_scan_service() + + logger.info("Starting NFO scan...") + result = await nfo_scan_service.scan_all(anime_service) + logger.info( + "NFO scan completed: created=%d updated=%d errors=%d", + result.get("created", 0), + result.get("updated", 0), + result.get("errors_count", 0), + ) + return result + async def _run_auto_download(self) -> int: """Queue and start downloads for all series with missing episodes.""" from src.server.models.download import EpisodeIdentifier diff --git a/tests/unit/test_base_provider.py b/tests/unit/test_base_provider.py index 2050fea..52da8f0 100644 --- a/tests/unit/test_base_provider.py +++ b/tests/unit/test_base_provider.py @@ -103,6 +103,9 @@ class ConcreteLoader(Loader): def get_title(self, key: str) -> str: return f"Title for {key}" + def get_year(self, key: str) -> int | None: + return 2024 + def get_season_episode_count(self, slug: str) -> Dict[int, int]: return {1: 12, 2: 24} diff --git a/tests/unit/test_enhanced_provider.py b/tests/unit/test_enhanced_provider.py index 1fe0483..8807bc1 100644 --- a/tests/unit/test_enhanced_provider.py +++ b/tests/unit/test_enhanced_provider.py @@ -45,6 +45,9 @@ class ConcreteEnhancedLoader(EnhancedAniWorldLoader): def get_title(self, key: str) -> str: return self.GetTitle(key) + def get_year(self, key: str) -> int | None: + return self.GetYear(key) + @pytest.fixture def enhanced_loader(): @@ -969,6 +972,7 @@ class TestHlsUrlDetection: def test_voe_hls_pattern_extracts_hls_url(self): """HLS_PATTERN should extract HLS URL from VOE embedded player HTML.""" import re + from src.server.providers.streaming.voe import HLS_PATTERN html_with_hls = """ @@ -984,6 +988,7 @@ class TestHlsUrlDetection: def test_voe_hls_pattern_returns_none_when_no_hls(self): """HLS_PATTERN should return None when no HLS URL in HTML.""" import re + from src.server.providers.streaming.voe import HLS_PATTERN html_no_hls = """ @@ -997,6 +1002,7 @@ class TestHlsUrlDetection: def test_hls_url_detection_in_provider_flow(self, enhanced_loader, tmp_path): """Provider should detect and handle HLS URLs from VOE extractor.""" import re + from src.server.providers.streaming.voe import HLS_PATTERN # Simulate VOE returning an HLS URL (base64 encoded .m3u8) diff --git a/tests/unit/test_monitored_provider.py b/tests/unit/test_monitored_provider.py index 502767e..16b48b3 100644 --- a/tests/unit/test_monitored_provider.py +++ b/tests/unit/test_monitored_provider.py @@ -53,6 +53,9 @@ class MockProvider(Loader): def get_title(self, key): return self._title + def get_year(self, key) -> int | None: + return 2024 + def get_season_episode_count(self, slug): return self._season_episodes @@ -66,6 +69,9 @@ class ConcreteMonitoredWrapper(MonitoredProviderWrapper): def unsubscribe_download_progress(self, handler): pass + def get_year(self, key: str) -> int | None: + return None + @pytest.fixture def mock_provider(): diff --git a/tests/unit/test_provider_edge_cases.py b/tests/unit/test_provider_edge_cases.py index 9d2c6af..499aeba 100644 --- a/tests/unit/test_provider_edge_cases.py +++ b/tests/unit/test_provider_edge_cases.py @@ -25,6 +25,7 @@ def _loader() -> AniworldLoader: loader = AniworldLoader.__new__(AniworldLoader) loader._KeyHTMLDict = {} loader._EpisodeHTMLDict = {} + loader._YearDict = {} loader.ANIWORLD_TO = "https://aniworld.to" loader.DEFAULT_REQUEST_TIMEOUT = 10 loader.session = MagicMock()