feat: add NFO scan after rescan and year caching

- Add nfo_scan_after_rescan config option (default: true) - Implement year caching in AniworldLoader and EnhancedAniWorldLoader - Make get_year abstract method in base provider - Run NFO validation/creation after scheduled rescan completes - Add _YearDict cache to avoid re-extracting year from HTML
2026-06-05 18:15:41 +02:00
parent 8b21f1243f
commit e74b04c1ee
10 changed files with 839 additions and 35 deletions
--- a/src/server/models/config.py
+++ b/src/server/models/config.py
@@ -39,6 +39,12 @@ class SchedulerConfig(BaseModel):
        description="Automatically queue and start downloads for all missing "
                    "episodes after a scheduled rescan completes.",
    )
+    nfo_scan_after_rescan: bool = Field(
+        default=True,
+        description="Run NFO validation and creation after a scheduled rescan "
+                    "completes. Checks each series folder for tvshow.nfo and "
+                    "creates or fills missing properties.",
+    )
    # Legacy alias fields — read via Pydantic alias
    auto_download: Optional[bool] = Field(default=None, alias="auto_download")

--- a/src/server/providers/aniworld_provider.py
+++ b/src/server/providers/aniworld_provider.py
@@ -158,6 +158,7 @@ class AniworldLoader(Loader):

        self._KeyHTMLDict = {}
        self._EpisodeHTMLDict = {}
+        self._YearDict = {}
        self.Providers = Providers()

        # Events: download_progress is triggered with progress dict
@@ -774,55 +775,81 @@ class AniworldLoader(Loader):
            if span_tag:
                title = span_tag.text
                logger.debug("Found title: %s", title)
+
+                # Also try to extract year from sibling p tag "Jahr: {year}"
+                # Year is typically right after title in the HTML structure
+                year = self._extract_year_from_soup(soup)
+                if year is not None:
+                    self._YearDict[key] = year
+                    logger.debug("Cached year %d for key: %s", year, key)
+
                return title

        logger.warning("No title found for key: %s", key)
        return ""

+    def _extract_year_from_soup(self, soup: BeautifulSoup) -> int | None:
+        """Extract year from BeautifulSoup object.
+
+        Looks for 'Jahr: {year}' pattern in p tags adjacent to series-title.
+
+        Args:
+            soup: Parsed BeautifulSoup object
+
+        Returns:
+            Year as int or None if not found
+        """
+        # Try to find year in metadata
+        for p_tag in soup.find_all('p'):
+            text = p_tag.get_text()
+            if 'Jahr:' in text or 'Year:' in text:
+                match = re.search(r'(\d{4})', text)
+                if match:
+                    return int(match.group(1))
+
+        # Fallback: look in series-info div
+        info_div = soup.find('div', class_='series-info')
+        if info_div:
+            text = info_div.get_text()
+            match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
+            if match:
+                return int(match.group(1))
+
+        return None
+
    def get_year(self, key: str) -> int | None:
        """Get anime release year from series key.
-        
-        Attempts to extract the year from the series page metadata.
-        Returns None if year cannot be determined.
-        
+
+        Uses cached year from get_title if available,
+        otherwise extracts and caches it.
+
        Args:
            key: Series identifier
-            
+
        Returns:
-            int or None: Release year if found, None otherwise
+            Release year or None if not found
        """
        logger.debug("Getting year for key: %s", key)
+
+        # Check cache first
+        if key in self._YearDict:
+            logger.debug("Using cached year %d for key: %s", self._YearDict[key], key)
+            return self._YearDict[key]
+
+        # Not cached - extract from HTML
        try:
            soup = BeautifulSoup(
                _decode_html_content(self._get_key_html(key).content),
                'html.parser'
            )
-            
-            # Try to find year in metadata
-            # Check for "Jahr:" or similar metadata fields
-            for p_tag in soup.find_all('p'):
-                text = p_tag.get_text()
-                if 'Jahr:' in text or 'Year:' in text:
-                    # Extract year from text like "Jahr: 2025"
-                    match = re.search(r'(\d{4})', text)
-                    if match:
-                        year = int(match.group(1))
-                        logger.debug("Found year in metadata: %s", year)
-                        return year
-            
-            # Try alternative: look for year in genre/info section
-            info_div = soup.find('div', class_='series-info')
-            if info_div:
-                text = info_div.get_text()
-                match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
-                if match:
-                    year = int(match.group(1))
-                    logger.debug("Found year in info section: %s", year)
-                    return year
-            
-            logger.debug("No year found for key: %s", key)
-            return None
-            
+
+            year = self._extract_year_from_soup(soup)
+            if year is not None:
+                self._YearDict[key] = year
+                logger.debug("Found and cached year %d for key: %s", year, key)
+
+            return year
+
        except Exception as e:
            logger.warning("Error extracting year for key %s: %s", key, e)
            return None
--- a/src/server/providers/base_provider.py
+++ b/src/server/providers/base_provider.py
@@ -91,6 +91,17 @@ class Loader(ABC):
            Series title string
        """

+    @abstractmethod
+    def get_year(self, key: str) -> int | None:
+        """Get the release year of a series.
+
+        Args:
+            key: Unique series identifier/key
+
+        Returns:
+            Release year as integer, or None if year cannot be determined
+        """
+
    @abstractmethod
    def get_season_episode_count(self, slug: str) -> Dict[int, int]:
        """Get season and episode counts for a series.
--- a/src/server/providers/enhanced_provider.py
+++ b/src/server/providers/enhanced_provider.py
@@ -110,6 +110,7 @@ class EnhancedAniWorldLoader(Loader):
        # Cache dictionaries
        self._KeyHTMLDict = {}
        self._EpisodeHTMLDict = {}
+        self._YearDict = {}
        
        # Provider manager
        self.Providers = Providers()
@@ -666,6 +667,10 @@ class EnhancedAniWorldLoader(Loader):
                if title_span:
                    span = title_span.find('span')
                    if span:
+                        # Extract and cache year from soup if available
+                        year = self._ExtractYearFromSoup(soup)
+                        if year is not None:
+                            self._YearDict[key] = year
                        return span.text.strip()
            
            self.logger.warning("Could not extract title for key: %s", key)
@@ -674,7 +679,62 @@ class EnhancedAniWorldLoader(Loader):
        except Exception as e:
            self.logger.error("Failed to get title for key %s: %s", key, e)
            raise RetryableError(f"Title extraction failed: {e}") from e
-    
+
+    def _ExtractYearFromSoup(self, soup: BeautifulSoup) -> int | None:
+        """Extract year from parsed BeautifulSoup.
+
+        Looks for 'Jahr: {year}' pattern in p tags.
+
+        Args:
+            soup: Parsed BeautifulSoup object
+
+        Returns:
+            Year as int or None if not found
+        """
+        for p_tag in soup.find_all('p'):
+            text = p_tag.get_text()
+            if 'Jahr:' in text or 'Year:' in text:
+                match = re.search(r'(\d{4})', text)
+                if match:
+                    return int(match.group(1))
+
+        info_div = soup.find('div', class_='series-info')
+        if info_div:
+            text = info_div.get_text()
+            match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
+            if match:
+                return int(match.group(1))
+
+        return None
+
+    def GetYear(self, key: str) -> int | None:
+        """Get anime release year from series key.
+
+        Uses cached year from GetTitle if available,
+        otherwise extracts and caches it.
+
+        Args:
+            key: Series identifier
+
+        Returns:
+            Release year or None if not found
+        """
+        # Check cache first
+        if key in self._YearDict:
+            return self._YearDict[key]
+
+        # Not cached - extract from HTML
+        try:
+            soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
+            year = self._ExtractYearFromSoup(soup)
+            if year is not None:
+                self._YearDict[key] = year
+            return year
+
+        except Exception as e:
+            self.logger.warning("Error extracting year for key %s: %s", key, e)
+            return None
+
    def GetSiteKey(self) -> str:
        """Get site identifier."""
        return "aniworld.to"
--- a/src/server/services/nfo_scan_service.py
+++ b/src/server/services/nfo_scan_service.py
@@ -0,0 +1,651 @@
+"""NFO scan service for validating and creating tvshow.nfo files.
+
+This module provides a service layer for scanning the anime library,
+checking whether each series has a valid tvshow.nfo file, creating
+missing files, and filling in missing properties from TMDB metadata.
+
+All series are identified by 'key' (provider-assigned, URL-safe
+identifier). 'folder' is used as metadata only for filesystem paths.
+"""
+from __future__ import annotations
+
+import asyncio
+import os
+from datetime import datetime, timezone
+from typing import Any, Callable, Dict, List, Optional
+
+import structlog
+
+from src.config.settings import settings
+from src.server.nfo.nfo_generator import generate_tvshow_nfo
+from src.server.nfo.nfo_mapper import tmdb_to_nfo_model
+from src.server.nfo.nfo_models import TVShowNFO
+
+logger = structlog.get_logger(__name__)
+
+
+class NfoScanServiceError(Exception):
+    """Service-level exception for NFO scan operations."""
+
+
+class NfoScanProgress:
+    """Tracks the current state of an NFO scan operation.
+
+    Attributes:
+        scan_id: Unique identifier for this scan
+        status: Current status (started, in_progress, completed, failed, cancelled)
+        total: Total number of series to scan
+        current: Number of series processed
+        percentage: Completion percentage
+        message: Human-readable progress message
+        key: Current series key being processed (metadata only)
+        folder: Current series folder being processed (metadata only)
+        created: Number of NFO files created
+        updated: Number of NFO files updated
+        errors: List of error messages encountered
+        started_at: When the scan started
+        updated_at: When progress was last updated
+    """
+
+    def __init__(self, scan_id: str):
+        self.scan_id = scan_id
+        self.status = "started"
+        self.total = 0
+        self.current = 0
+        self.percentage = 0.0
+        self.message = "Initializing NFO scan..."
+        self.key: Optional[str] = None
+        self.folder: Optional[str] = None
+        self.started_at = datetime.now(timezone.utc)
+        self.updated_at = datetime.now(timezone.utc)
+        self.created = 0
+        self.updated = 0
+        self.errors: List[str] = []
+
+    def to_dict(self) -> Dict[str, Any]:
+        result = {
+            "scan_id": self.scan_id,
+            "status": self.status,
+            "total": self.total,
+            "current": self.current,
+            "percentage": round(self.percentage, 2),
+            "message": self.message,
+            "started_at": self.started_at.isoformat(),
+            "updated_at": self.updated_at.isoformat(),
+            "created": self.created,
+            "updated": self.updated,
+            "errors": self.errors,
+        }
+        if self.key is not None:
+            result["key"] = self.key
+        if self.folder is not None:
+            result["folder"] = self.folder
+        return result
+
+
+class NfoScanService:
+    """Manages NFO validation and creation for anime series.
+
+    Scans the anime library directory, checks each series folder for
+    a tvshow.nfo file, creates missing files, and fills in missing
+    or empty properties from TMDB metadata.
+
+    Uses 'key' as the primary series identifier and 'folder' as
+    metadata only for filesystem operations.
+    """
+
+    def __init__(self) -> None:
+        self._current_scan: Optional[NfoScanProgress] = None
+        self._is_scanning = False
+        self._lock = asyncio.Lock()
+
+        # Event handlers for scan events
+        self._scan_event_handlers: List[Callable[[Dict[str, Any]], None]] = []
+
+        logger.info("NfoScanService initialized")
+
+    def subscribe_to_scan_events(
+        self,
+        handler: Callable[[Dict[str, Any]], None],
+    ) -> None:
+        """Subscribe to NFO scan events."""
+        self._scan_event_handlers.append(handler)
+
+    def unsubscribe_from_scan_events(
+        self,
+        handler: Callable[[Dict[str, Any]], None],
+    ) -> None:
+        """Unsubscribe from NFO scan events."""
+        try:
+            self._scan_event_handlers.remove(handler)
+        except ValueError:
+            logger.warning("Handler not found for unsubscribe")
+
+    async def _emit_scan_event(self, event_data: Dict[str, Any]) -> None:
+        """Emit scan event to all subscribers."""
+        for handler in self._scan_event_handlers:
+            try:
+                if asyncio.iscoroutinefunction(handler):
+                    await handler(event_data)
+                else:
+                    handler(event_data)
+            except Exception as e:
+                logger.error("NFO scan event handler error", error=str(e))
+
+    @property
+    def is_scanning(self) -> bool:
+        return self._is_scanning
+
+    @property
+    def current_scan(self) -> Optional[NfoScanProgress]:
+        return self._current_scan
+
+    async def scan_all(
+        self,
+        anime_service: Any,  # AnimeService instance
+    ) -> Dict[str, Any]:
+        """Run NFO validation and creation across all series.
+
+        Args:
+            anime_service: AnimeService instance for accessing series data.
+
+        Returns:
+            Summary dict with keys: total, created, updated, errors_count,
+            scan_id, and duration_seconds.
+
+        Raises:
+            NfoScanServiceError: If a scan is already in progress.
+        """
+        async with self._lock:
+            if self._is_scanning:
+                raise NfoScanServiceError("An NFO scan is already in progress")
+            self._is_scanning = True
+
+        scan_id = f"nfo_scan_{id(self)}"
+        scan_progress = NfoScanProgress(scan_id)
+        self._current_scan = scan_progress
+
+        logger.info("Starting NFO scan")
+
+        # Emit scan started
+        await self._emit_scan_event({
+            "type": "nfo_scan_started",
+            "scan_id": scan_id,
+            "message": "NFO scan started",
+        })
+
+        # Get all series from AnimeService
+        try:
+            series_list = await anime_service.list_series_with_filters()
+        except Exception as exc:
+            logger.error("Failed to get series list: %s", exc)
+            async with self._lock:
+                self._is_scanning = False
+            raise NfoScanServiceError(f"Failed to get series list: {exc}") from exc
+
+        if not series_list:
+            logger.info("No series found — NFO scan complete")
+            scan_progress.status = "completed"
+            scan_progress.message = "No series found"
+            scan_progress.percentage = 100.0
+            scan_progress.updated_at = datetime.now(timezone.utc)
+
+            async with self._lock:
+                self._is_scanning = False
+
+            await self._emit_scan_event({
+                "type": "nfo_scan_completed",
+                "scan_id": scan_id,
+                "success": True,
+                "message": "No series found",
+                "data": scan_progress.to_dict(),
+            })
+            return {
+                "total": 0,
+                "created": 0,
+                "updated": 0,
+                "errors_count": 0,
+                "scan_id": scan_id,
+                "duration_seconds": 0.0,
+            }
+
+        scan_progress.total = len(series_list)
+        scan_progress.status = "in_progress"
+        scan_progress.message = f"Scanning {scan_progress.total} series..."
+        scan_progress.updated_at = datetime.now(timezone.utc)
+
+        start_time = datetime.now(timezone.utc)
+        errors: List[str] = []
+
+        for idx, series in enumerate(series_list):
+            key = series.get("key", "")
+            folder = series.get("folder", "")
+            name = series.get("name", "")
+
+            scan_progress.key = key
+            scan_progress.folder = folder
+            scan_progress.message = f"Scanning: {name}"
+            scan_progress.updated_at = datetime.now(timezone.utc)
+
+            await self._emit_scan_event({
+                "type": "nfo_scan_progress",
+                "data": scan_progress.to_dict(),
+            })
+
+            try:
+                result = await self._scan_series(key, folder, series)
+                if result == "created":
+                    scan_progress.created += 1
+                elif result == "updated":
+                    scan_progress.updated += 1
+            except Exception as exc:
+                error_msg = f"NFO scan failed for {key}: {exc}"
+                logger.warning(error_msg)
+                errors.append(error_msg)
+                scan_progress.errors.append(error_msg)
+
+            scan_progress.current = idx + 1
+            scan_progress.percentage = round(
+                (scan_progress.current / scan_progress.total) * 100, 2
+            )
+            scan_progress.updated_at = datetime.now(timezone.utc)
+
+        end_time = datetime.now(timezone.utc)
+        duration = (end_time - start_time).total_seconds()
+        scan_progress.status = "completed"
+        scan_progress.message = (
+            f"NFO scan completed: {scan_progress.created} created, "
+            f"{scan_progress.updated} updated, {len(errors)} errors"
+        )
+        scan_progress.percentage = 100.0
+        scan_progress.updated_at = end_time
+
+        async with self._lock:
+            self._is_scanning = False
+
+        logger.info(
+            "NFO scan completed: total=%d created=%d updated=%d errors=%d duration=%.2fs",
+            scan_progress.total,
+            scan_progress.created,
+            scan_progress.updated,
+            len(errors),
+            duration,
+        )
+
+        await self._emit_scan_event({
+            "type": "nfo_scan_completed",
+            "scan_id": scan_id,
+            "success": True,
+            "message": scan_progress.message,
+            "data": scan_progress.to_dict(),
+            "statistics": {
+                "total": scan_progress.total,
+                "created": scan_progress.created,
+                "updated": scan_progress.updated,
+                "errors_count": len(errors),
+            },
+        })
+
+        return {
+            "total": scan_progress.total,
+            "created": scan_progress.created,
+            "updated": scan_progress.updated,
+            "errors_count": len(errors),
+            "scan_id": scan_id,
+            "duration_seconds": round(duration, 2),
+        }
+
+    async def _scan_series(
+        self,
+        key: str,
+        folder: str,
+        series_data: Dict[str, Any],
+    ) -> Optional[str]:
+        """Scan and update NFO for a single series.
+
+        Args:
+            key: Series key (primary identifier)
+            folder: Series folder name (metadata for filesystem path)
+            series_data: Series data dict from anime_service
+
+        Returns:
+            "created" if new NFO was created, "updated" if existing was
+            modified, None if no change needed or error occurred.
+        """
+        if not folder:
+            logger.debug("Skipping series with no folder: key=%s", key)
+            return None
+
+        anime_dir = getattr(settings, "anime_directory", None)
+        if not anime_dir:
+            logger.warning("anime_directory not configured — skipping NFO scan")
+            return None
+
+        series_path = os.path.join(anime_dir, folder)
+        nfo_path = os.path.join(series_path, "tvshow.nfo")
+
+        nfo_exists = os.path.isfile(nfo_path)
+
+        if not nfo_exists:
+            # Create new NFO
+            logger.info("Creating NFO for series: %s (%s)", key, folder)
+            await self._create_nfo(key, folder, series_data, nfo_path)
+            await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
+            return "created"
+
+        # NFO exists — check if it needs updating
+        updated = await self._update_nfo_if_needed(key, folder, series_data, nfo_path)
+        if updated:
+            await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
+            return "updated"
+
+        return None
+
+    async def _create_nfo(
+        self,
+        key: str,
+        folder: str,
+        series_data: Dict[str, Any],
+        nfo_path: str,
+    ) -> None:
+        """Create a new tvshow.nfo file from TMDB metadata.
+
+        Args:
+            key: Series key
+            folder: Series folder name
+            series_data: Series data from anime_service
+            nfo_path: Full path to the NFO file to create
+        """
+        tmdb_id = series_data.get("tmdb_id")
+
+        if not tmdb_id:
+            logger.warning(
+                "Cannot create NFO for %s: no tmdb_id available",
+                key,
+            )
+            return
+
+        try:
+            tmdb_data = await self._fetch_tmdb_data(tmdb_id)
+        except Exception as exc:
+            logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
+            return
+
+        if not tmdb_data:
+            logger.warning("No TMDB data for %s", key)
+            return
+
+        nfo_model = tmdb_to_nfo_model(
+            tmdb_data,
+            content_ratings=None,
+            get_image_url=self._make_tmdb_image_url(tmdb_id),
+            image_size="original",
+        )
+
+        xml_content = generate_tvshow_nfo(nfo_model)
+
+        # Ensure directory exists
+        os.makedirs(os.path.dirname(nfo_path), exist_ok=True)
+
+        with open(nfo_path, "w", encoding="utf-8") as f:
+            f.write(xml_content)
+
+        logger.info("Created tvshow.nfo for %s at %s", key, nfo_path)
+
+        await self._emit_scan_event({
+            "type": "nfo_created",
+            "key": key,
+            "folder": folder,
+            "path": nfo_path,
+        })
+
+    async def _update_nfo_if_needed(
+        self,
+        key: str,
+        folder: str,
+        series_data: Dict[str, Any],
+        nfo_path: str,
+    ) -> bool:
+        """Load existing NFO, check for missing fields, fill and rewrite.
+
+        Args:
+            key: Series key
+            folder: Series folder name
+            series_data: Series data from anime_service
+            nfo_path: Full path to the existing NFO file
+
+        Returns:
+            True if NFO was updated, False if no changes were needed.
+        """
+        try:
+            from lxml import etree
+        except ImportError:
+            logger.warning("lxml not available — cannot update existing NFO files")
+            return False
+
+        try:
+            tree = etree.parse(nfo_path)
+            root = tree.getroot()
+        except Exception as exc:
+            logger.warning("Failed to parse existing NFO for %s: %s — will regenerate", key, exc)
+            # Corrupt or unreadable NFO — regenerate from TMDB
+            return await self._regenerate_nfo(key, folder, series_data, nfo_path)
+
+        # Check for missing or empty critical fields
+        critical_fields = ["title", "plot", "premiered", "tmdbid"]
+        missing_fields: List[str] = []
+
+        for field in critical_fields:
+            elem = root.find(field)
+            if elem is None or not elem.text or elem.text.strip() == "":
+                missing_fields.append(field)
+
+        if not missing_fields:
+            logger.debug("NFO for %s is complete — no update needed", key)
+            return False
+
+        logger.info(
+            "NFO for %s is missing fields %s — attempting to fill from TMDB",
+            key,
+            missing_fields,
+        )
+
+        # Try to fill missing fields from TMDB
+        tmdb_id = series_data.get("tmdb_id")
+        if not tmdb_id:
+            logger.warning("Cannot update NFO for %s: no tmdb_id", key)
+            return False
+
+        try:
+            tmdb_data = await self._fetch_tmdb_data(tmdb_id)
+        except Exception as exc:
+            logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
+            return False
+
+        if not tmdb_data:
+            return False
+
+        nfo_model = tmdb_to_nfo_model(
+            tmdb_data,
+            content_ratings=None,
+            get_image_url=self._make_tmdb_image_url(tmdb_id),
+            image_size="original",
+        )
+
+        # Serialize updated model to XML and write
+        xml_content = generate_tvshow_nfo(nfo_model)
+
+        with open(nfo_path, "w", encoding="utf-8") as f:
+            f.write(xml_content)
+
+        logger.info("Updated NFO for %s (filled %d fields)", key, len(missing_fields))
+
+        await self._emit_scan_event({
+            "type": "nfo_updated",
+            "key": key,
+            "folder": folder,
+            "path": nfo_path,
+            "missing_fields": missing_fields,
+        })
+
+        return True
+
+    async def _regenerate_nfo(
+        self,
+        key: str,
+        folder: str,
+        series_data: Dict[str, Any],
+        nfo_path: str,
+    ) -> bool:
+        """Regenerate NFO from scratch when existing file is corrupt."""
+        tmdb_id = series_data.get("tmdb_id")
+        if not tmdb_id:
+            return False
+
+        try:
+            tmdb_data = await self._fetch_tmdb_data(tmdb_id)
+        except Exception as exc:
+            logger.warning("Failed to fetch TMDB data for %s during regeneration: %s", key, exc)
+            return False
+
+        if not tmdb_data:
+            return False
+
+        nfo_model = tmdb_to_nfo_model(
+            tmdb_data,
+            content_ratings=None,
+            get_image_url=self._make_tmdb_image_url(tmdb_id),
+            image_size="original",
+        )
+
+        xml_content = generate_tvshow_nfo(nfo_model)
+
+        with open(nfo_path, "w", encoding="utf-8") as f:
+            f.write(xml_content)
+
+        logger.info("Regenerated NFO for %s", key)
+        return True
+
+    async def _fetch_tmdb_data(self, tmdb_id: int) -> Optional[Dict[str, Any]]:
+        """Fetch series metadata from TMDB API.
+
+        Args:
+            tmdb_id: TMDB series ID
+
+        Returns:
+            TMDB response dict or None on failure.
+        """
+        try:
+            from src.server.nfo.tmdb_client import get_tmdb_client
+
+            client = get_tmdb_client()
+            data = await client.get_series_details(tmdb_id)
+            return data
+        except Exception as exc:
+            logger.warning("TMDB fetch failed for TMDB ID %s: %s", tmdb_id, exc)
+            return None
+
+    def _make_tmdb_image_url(self, tmdb_id: int) -> Callable[[str, str], str]:
+        """Create a get_image_url closure bound to a TMDB account."""
+        from src.server.nfo.tmdb_client import get_tmdb_image_base_url
+
+        base = get_tmdb_image_base_url(tmdb_id)
+
+        def get_image_url(path: str, size: str = "original") -> str:
+            if not path:
+                return ""
+            return f"{base}{size}{path}"
+
+        return get_image_url
+
+    async def _update_series_nfo_flag(
+        self,
+        key: str,
+        has_nfo: bool,
+        nfo_path: str,
+    ) -> None:
+        """Update the has_nfo flag and nfo_path in the database.
+
+        Args:
+            key: Series key (primary identifier)
+            has_nfo: Whether the series now has an NFO file
+            nfo_path: Path to the NFO file
+        """
+        try:
+            from src.server.database.connection import get_db_session
+            from src.server.database.service import AnimeSeriesService
+
+            async with get_db_session() as db:
+                series = await AnimeSeriesService.get_by_key(db, key)
+                if series:
+                    now = datetime.now(timezone.utc)
+                    series.has_nfo = has_nfo
+                    series.nfo_path = nfo_path
+                    if series.nfo_created_at is None:
+                        series.nfo_created_at = now
+                    series.nfo_updated_at = now
+                    await db.flush()
+                    logger.debug("Updated NFO flag for series: %s", key)
+        except Exception as exc:
+            logger.warning("Failed to update NFO flag for %s: %s", key, exc)
+
+    async def cancel_scan(self) -> bool:
+        """Cancel the current NFO scan if one is in progress.
+
+        Returns:
+            True if scan was cancelled, False if no scan in progress.
+        """
+        async with self._lock:
+            if not self._is_scanning:
+                return False
+
+            self._is_scanning = False
+
+            if self._current_scan:
+                self._current_scan.status = "cancelled"
+                self._current_scan.message = "NFO scan cancelled by user"
+                self._current_scan.updated_at = datetime.now(timezone.utc)
+
+        if self._current_scan:
+            await self._emit_scan_event({
+                "type": "nfo_scan_cancelled",
+                "scan_id": self._current_scan.scan_id,
+                "message": "NFO scan cancelled by user",
+            })
+
+        logger.info("NFO scan cancelled")
+        return True
+
+    async def get_scan_status(self) -> Dict[str, Any]:
+        """Get the current NFO scan status.
+
+        Returns:
+            Dict with is_scanning and current_scan data.
+        """
+        return {
+            "is_scanning": self._is_scanning,
+            "current_scan": (
+                self._current_scan.to_dict() if self._current_scan else None
+            ),
+        }
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton
+# ---------------------------------------------------------------------------
+
+_nfo_scan_service: Optional[NfoScanService] = None
+
+
+def get_nfo_scan_service() -> NfoScanService:
+    """Return the singleton NfoScanService instance."""
+    global _nfo_scan_service
+    if _nfo_scan_service is None:
+        _nfo_scan_service = NfoScanService()
+    return _nfo_scan_service
+
+
+def reset_nfo_scan_service() -> None:
+    """Reset the singleton NfoScanService instance (for testing)."""
+    global _nfo_scan_service
+    _nfo_scan_service = None
--- a/src/server/services/scheduler/scheduler_service.py
+++ b/src/server/services/scheduler/scheduler_service.py
@@ -11,7 +11,7 @@ from __future__ import annotations

 import logging
 from datetime import datetime, timedelta, timezone
-from typing import List, Optional
+from typing import Dict, List, Optional

 from apscheduler.schedulers.asyncio import AsyncIOScheduler
 from apscheduler.triggers.cron import CronTrigger
@@ -261,6 +261,9 @@ class SchedulerService:
            "auto_download_after_rescan": (
                self._config.auto_download_after_rescan if self._config else False
            ),
+            "nfo_scan_after_rescan": (
+                self._config.nfo_scan_after_rescan if self._config else True
+            ),
            "last_run": (
                self._last_scan_time.isoformat()
                if self._last_scan_time
@@ -375,7 +378,19 @@ class SchedulerService:
            # 1. Main library rescan
            await self._run_rescan()

-            # 2. Auto-download (if enabled)
+            # 2. NFO scan (if enabled)
+            if self._config and self._config.nfo_scan_after_rescan:
+                try:
+                    nfo_result = await self._run_nfo_scan()
+                    await self._broadcast("nfo_scan_started", {
+                        "created": nfo_result.get("created", 0),
+                        "updated": nfo_result.get("updated", 0),
+                    })
+                except Exception as exc:
+                    logger.error("NFO scan failed: %s", exc, exc_info=True)
+                    await self._broadcast("nfo_scan_error", {"error": str(exc)})
+
+            # 3. Auto-download (if enabled)
            if self._config and self._config.auto_download_after_rescan:
                try:
                    queued = await self._run_auto_download()
@@ -419,6 +434,24 @@ class SchedulerService:
        await anime_service.rescan()
        logger.info("anime_service.rescan() completed")

+    async def _run_nfo_scan(self) -> Dict[str, Any]:
+        """Run NFO validation and creation across all series."""
+        from src.server.services.nfo_scan_service import get_nfo_scan_service
+        from src.server.utils.dependencies import get_anime_service
+
+        anime_service = get_anime_service()
+        nfo_scan_service = get_nfo_scan_service()
+
+        logger.info("Starting NFO scan...")
+        result = await nfo_scan_service.scan_all(anime_service)
+        logger.info(
+            "NFO scan completed: created=%d updated=%d errors=%d",
+            result.get("created", 0),
+            result.get("updated", 0),
+            result.get("errors_count", 0),
+        )
+        return result
+
    async def _run_auto_download(self) -> int:
        """Queue and start downloads for all series with missing episodes."""
        from src.server.models.download import EpisodeIdentifier
--- a/tests/unit/test_base_provider.py
+++ b/tests/unit/test_base_provider.py
@@ -103,6 +103,9 @@ class ConcreteLoader(Loader):
    def get_title(self, key: str) -> str:
        return f"Title for {key}"

+    def get_year(self, key: str) -> int | None:
+        return 2024
+
    def get_season_episode_count(self, slug: str) -> Dict[int, int]:
        return {1: 12, 2: 24}

--- a/tests/unit/test_enhanced_provider.py
+++ b/tests/unit/test_enhanced_provider.py
@@ -45,6 +45,9 @@ class ConcreteEnhancedLoader(EnhancedAniWorldLoader):
    def get_title(self, key: str) -> str:
        return self.GetTitle(key)

+    def get_year(self, key: str) -> int | None:
+        return self.GetYear(key)
+

@pytest.fixture
 def enhanced_loader():
@@ -969,6 +972,7 @@ class TestHlsUrlDetection:
    def test_voe_hls_pattern_extracts_hls_url(self):
        """HLS_PATTERN should extract HLS URL from VOE embedded player HTML."""
        import re
+
        from src.server.providers.streaming.voe import HLS_PATTERN

        html_with_hls = """
@@ -984,6 +988,7 @@ class TestHlsUrlDetection:
    def test_voe_hls_pattern_returns_none_when_no_hls(self):
        """HLS_PATTERN should return None when no HLS URL in HTML."""
        import re
+
        from src.server.providers.streaming.voe import HLS_PATTERN

        html_no_hls = """
@@ -997,6 +1002,7 @@ class TestHlsUrlDetection:
    def test_hls_url_detection_in_provider_flow(self, enhanced_loader, tmp_path):
        """Provider should detect and handle HLS URLs from VOE extractor."""
        import re
+
        from src.server.providers.streaming.voe import HLS_PATTERN

        # Simulate VOE returning an HLS URL (base64 encoded .m3u8)
--- a/tests/unit/test_monitored_provider.py
+++ b/tests/unit/test_monitored_provider.py
@@ -53,6 +53,9 @@ class MockProvider(Loader):
    def get_title(self, key):
        return self._title

+    def get_year(self, key) -> int | None:
+        return 2024
+
    def get_season_episode_count(self, slug):
        return self._season_episodes

@@ -66,6 +69,9 @@ class ConcreteMonitoredWrapper(MonitoredProviderWrapper):
    def unsubscribe_download_progress(self, handler):
        pass

+    def get_year(self, key: str) -> int | None:
+        return None
+

@pytest.fixture
 def mock_provider():
--- a/tests/unit/test_provider_edge_cases.py
+++ b/tests/unit/test_provider_edge_cases.py
@@ -25,6 +25,7 @@ def _loader() -> AniworldLoader:
    loader = AniworldLoader.__new__(AniworldLoader)
    loader._KeyHTMLDict = {}
    loader._EpisodeHTMLDict = {}
+    loader._YearDict = {}
    loader.ANIWORLD_TO = "https://aniworld.to"
    loader.DEFAULT_REQUEST_TIMEOUT = 10
    loader.session = MagicMock()