Files
Aniworld/src/server/services/nfo_scan_service.py
Lukas e74b04c1ee feat: add NFO scan after rescan and year caching
- Add nfo_scan_after_rescan config option (default: true)
- Implement year caching in AniworldLoader and EnhancedAniWorldLoader
- Make get_year abstract method in base provider
- Run NFO validation/creation after scheduled rescan completes
- Add _YearDict cache to avoid re-extracting year from HTML
2026-06-05 18:15:41 +02:00

651 lines
21 KiB
Python

"""NFO scan service for validating and creating tvshow.nfo files.
This module provides a service layer for scanning the anime library,
checking whether each series has a valid tvshow.nfo file, creating
missing files, and filling in missing properties from TMDB metadata.
All series are identified by 'key' (provider-assigned, URL-safe
identifier). 'folder' is used as metadata only for filesystem paths.
"""
from __future__ import annotations
import asyncio
import os
from datetime import datetime, timezone
from typing import Any, Callable, Dict, List, Optional
import structlog
from src.config.settings import settings
from src.server.nfo.nfo_generator import generate_tvshow_nfo
from src.server.nfo.nfo_mapper import tmdb_to_nfo_model
from src.server.nfo.nfo_models import TVShowNFO
logger = structlog.get_logger(__name__)
class NfoScanServiceError(Exception):
"""Service-level exception for NFO scan operations."""
class NfoScanProgress:
"""Tracks the current state of an NFO scan operation.
Attributes:
scan_id: Unique identifier for this scan
status: Current status (started, in_progress, completed, failed, cancelled)
total: Total number of series to scan
current: Number of series processed
percentage: Completion percentage
message: Human-readable progress message
key: Current series key being processed (metadata only)
folder: Current series folder being processed (metadata only)
created: Number of NFO files created
updated: Number of NFO files updated
errors: List of error messages encountered
started_at: When the scan started
updated_at: When progress was last updated
"""
def __init__(self, scan_id: str):
self.scan_id = scan_id
self.status = "started"
self.total = 0
self.current = 0
self.percentage = 0.0
self.message = "Initializing NFO scan..."
self.key: Optional[str] = None
self.folder: Optional[str] = None
self.started_at = datetime.now(timezone.utc)
self.updated_at = datetime.now(timezone.utc)
self.created = 0
self.updated = 0
self.errors: List[str] = []
def to_dict(self) -> Dict[str, Any]:
result = {
"scan_id": self.scan_id,
"status": self.status,
"total": self.total,
"current": self.current,
"percentage": round(self.percentage, 2),
"message": self.message,
"started_at": self.started_at.isoformat(),
"updated_at": self.updated_at.isoformat(),
"created": self.created,
"updated": self.updated,
"errors": self.errors,
}
if self.key is not None:
result["key"] = self.key
if self.folder is not None:
result["folder"] = self.folder
return result
class NfoScanService:
"""Manages NFO validation and creation for anime series.
Scans the anime library directory, checks each series folder for
a tvshow.nfo file, creates missing files, and fills in missing
or empty properties from TMDB metadata.
Uses 'key' as the primary series identifier and 'folder' as
metadata only for filesystem operations.
"""
def __init__(self) -> None:
self._current_scan: Optional[NfoScanProgress] = None
self._is_scanning = False
self._lock = asyncio.Lock()
# Event handlers for scan events
self._scan_event_handlers: List[Callable[[Dict[str, Any]], None]] = []
logger.info("NfoScanService initialized")
def subscribe_to_scan_events(
self,
handler: Callable[[Dict[str, Any]], None],
) -> None:
"""Subscribe to NFO scan events."""
self._scan_event_handlers.append(handler)
def unsubscribe_from_scan_events(
self,
handler: Callable[[Dict[str, Any]], None],
) -> None:
"""Unsubscribe from NFO scan events."""
try:
self._scan_event_handlers.remove(handler)
except ValueError:
logger.warning("Handler not found for unsubscribe")
async def _emit_scan_event(self, event_data: Dict[str, Any]) -> None:
"""Emit scan event to all subscribers."""
for handler in self._scan_event_handlers:
try:
if asyncio.iscoroutinefunction(handler):
await handler(event_data)
else:
handler(event_data)
except Exception as e:
logger.error("NFO scan event handler error", error=str(e))
@property
def is_scanning(self) -> bool:
return self._is_scanning
@property
def current_scan(self) -> Optional[NfoScanProgress]:
return self._current_scan
async def scan_all(
self,
anime_service: Any, # AnimeService instance
) -> Dict[str, Any]:
"""Run NFO validation and creation across all series.
Args:
anime_service: AnimeService instance for accessing series data.
Returns:
Summary dict with keys: total, created, updated, errors_count,
scan_id, and duration_seconds.
Raises:
NfoScanServiceError: If a scan is already in progress.
"""
async with self._lock:
if self._is_scanning:
raise NfoScanServiceError("An NFO scan is already in progress")
self._is_scanning = True
scan_id = f"nfo_scan_{id(self)}"
scan_progress = NfoScanProgress(scan_id)
self._current_scan = scan_progress
logger.info("Starting NFO scan")
# Emit scan started
await self._emit_scan_event({
"type": "nfo_scan_started",
"scan_id": scan_id,
"message": "NFO scan started",
})
# Get all series from AnimeService
try:
series_list = await anime_service.list_series_with_filters()
except Exception as exc:
logger.error("Failed to get series list: %s", exc)
async with self._lock:
self._is_scanning = False
raise NfoScanServiceError(f"Failed to get series list: {exc}") from exc
if not series_list:
logger.info("No series found — NFO scan complete")
scan_progress.status = "completed"
scan_progress.message = "No series found"
scan_progress.percentage = 100.0
scan_progress.updated_at = datetime.now(timezone.utc)
async with self._lock:
self._is_scanning = False
await self._emit_scan_event({
"type": "nfo_scan_completed",
"scan_id": scan_id,
"success": True,
"message": "No series found",
"data": scan_progress.to_dict(),
})
return {
"total": 0,
"created": 0,
"updated": 0,
"errors_count": 0,
"scan_id": scan_id,
"duration_seconds": 0.0,
}
scan_progress.total = len(series_list)
scan_progress.status = "in_progress"
scan_progress.message = f"Scanning {scan_progress.total} series..."
scan_progress.updated_at = datetime.now(timezone.utc)
start_time = datetime.now(timezone.utc)
errors: List[str] = []
for idx, series in enumerate(series_list):
key = series.get("key", "")
folder = series.get("folder", "")
name = series.get("name", "")
scan_progress.key = key
scan_progress.folder = folder
scan_progress.message = f"Scanning: {name}"
scan_progress.updated_at = datetime.now(timezone.utc)
await self._emit_scan_event({
"type": "nfo_scan_progress",
"data": scan_progress.to_dict(),
})
try:
result = await self._scan_series(key, folder, series)
if result == "created":
scan_progress.created += 1
elif result == "updated":
scan_progress.updated += 1
except Exception as exc:
error_msg = f"NFO scan failed for {key}: {exc}"
logger.warning(error_msg)
errors.append(error_msg)
scan_progress.errors.append(error_msg)
scan_progress.current = idx + 1
scan_progress.percentage = round(
(scan_progress.current / scan_progress.total) * 100, 2
)
scan_progress.updated_at = datetime.now(timezone.utc)
end_time = datetime.now(timezone.utc)
duration = (end_time - start_time).total_seconds()
scan_progress.status = "completed"
scan_progress.message = (
f"NFO scan completed: {scan_progress.created} created, "
f"{scan_progress.updated} updated, {len(errors)} errors"
)
scan_progress.percentage = 100.0
scan_progress.updated_at = end_time
async with self._lock:
self._is_scanning = False
logger.info(
"NFO scan completed: total=%d created=%d updated=%d errors=%d duration=%.2fs",
scan_progress.total,
scan_progress.created,
scan_progress.updated,
len(errors),
duration,
)
await self._emit_scan_event({
"type": "nfo_scan_completed",
"scan_id": scan_id,
"success": True,
"message": scan_progress.message,
"data": scan_progress.to_dict(),
"statistics": {
"total": scan_progress.total,
"created": scan_progress.created,
"updated": scan_progress.updated,
"errors_count": len(errors),
},
})
return {
"total": scan_progress.total,
"created": scan_progress.created,
"updated": scan_progress.updated,
"errors_count": len(errors),
"scan_id": scan_id,
"duration_seconds": round(duration, 2),
}
async def _scan_series(
self,
key: str,
folder: str,
series_data: Dict[str, Any],
) -> Optional[str]:
"""Scan and update NFO for a single series.
Args:
key: Series key (primary identifier)
folder: Series folder name (metadata for filesystem path)
series_data: Series data dict from anime_service
Returns:
"created" if new NFO was created, "updated" if existing was
modified, None if no change needed or error occurred.
"""
if not folder:
logger.debug("Skipping series with no folder: key=%s", key)
return None
anime_dir = getattr(settings, "anime_directory", None)
if not anime_dir:
logger.warning("anime_directory not configured — skipping NFO scan")
return None
series_path = os.path.join(anime_dir, folder)
nfo_path = os.path.join(series_path, "tvshow.nfo")
nfo_exists = os.path.isfile(nfo_path)
if not nfo_exists:
# Create new NFO
logger.info("Creating NFO for series: %s (%s)", key, folder)
await self._create_nfo(key, folder, series_data, nfo_path)
await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
return "created"
# NFO exists — check if it needs updating
updated = await self._update_nfo_if_needed(key, folder, series_data, nfo_path)
if updated:
await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
return "updated"
return None
async def _create_nfo(
self,
key: str,
folder: str,
series_data: Dict[str, Any],
nfo_path: str,
) -> None:
"""Create a new tvshow.nfo file from TMDB metadata.
Args:
key: Series key
folder: Series folder name
series_data: Series data from anime_service
nfo_path: Full path to the NFO file to create
"""
tmdb_id = series_data.get("tmdb_id")
if not tmdb_id:
logger.warning(
"Cannot create NFO for %s: no tmdb_id available",
key,
)
return
try:
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
except Exception as exc:
logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
return
if not tmdb_data:
logger.warning("No TMDB data for %s", key)
return
nfo_model = tmdb_to_nfo_model(
tmdb_data,
content_ratings=None,
get_image_url=self._make_tmdb_image_url(tmdb_id),
image_size="original",
)
xml_content = generate_tvshow_nfo(nfo_model)
# Ensure directory exists
os.makedirs(os.path.dirname(nfo_path), exist_ok=True)
with open(nfo_path, "w", encoding="utf-8") as f:
f.write(xml_content)
logger.info("Created tvshow.nfo for %s at %s", key, nfo_path)
await self._emit_scan_event({
"type": "nfo_created",
"key": key,
"folder": folder,
"path": nfo_path,
})
async def _update_nfo_if_needed(
self,
key: str,
folder: str,
series_data: Dict[str, Any],
nfo_path: str,
) -> bool:
"""Load existing NFO, check for missing fields, fill and rewrite.
Args:
key: Series key
folder: Series folder name
series_data: Series data from anime_service
nfo_path: Full path to the existing NFO file
Returns:
True if NFO was updated, False if no changes were needed.
"""
try:
from lxml import etree
except ImportError:
logger.warning("lxml not available — cannot update existing NFO files")
return False
try:
tree = etree.parse(nfo_path)
root = tree.getroot()
except Exception as exc:
logger.warning("Failed to parse existing NFO for %s: %s — will regenerate", key, exc)
# Corrupt or unreadable NFO — regenerate from TMDB
return await self._regenerate_nfo(key, folder, series_data, nfo_path)
# Check for missing or empty critical fields
critical_fields = ["title", "plot", "premiered", "tmdbid"]
missing_fields: List[str] = []
for field in critical_fields:
elem = root.find(field)
if elem is None or not elem.text or elem.text.strip() == "":
missing_fields.append(field)
if not missing_fields:
logger.debug("NFO for %s is complete — no update needed", key)
return False
logger.info(
"NFO for %s is missing fields %s — attempting to fill from TMDB",
key,
missing_fields,
)
# Try to fill missing fields from TMDB
tmdb_id = series_data.get("tmdb_id")
if not tmdb_id:
logger.warning("Cannot update NFO for %s: no tmdb_id", key)
return False
try:
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
except Exception as exc:
logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
return False
if not tmdb_data:
return False
nfo_model = tmdb_to_nfo_model(
tmdb_data,
content_ratings=None,
get_image_url=self._make_tmdb_image_url(tmdb_id),
image_size="original",
)
# Serialize updated model to XML and write
xml_content = generate_tvshow_nfo(nfo_model)
with open(nfo_path, "w", encoding="utf-8") as f:
f.write(xml_content)
logger.info("Updated NFO for %s (filled %d fields)", key, len(missing_fields))
await self._emit_scan_event({
"type": "nfo_updated",
"key": key,
"folder": folder,
"path": nfo_path,
"missing_fields": missing_fields,
})
return True
async def _regenerate_nfo(
self,
key: str,
folder: str,
series_data: Dict[str, Any],
nfo_path: str,
) -> bool:
"""Regenerate NFO from scratch when existing file is corrupt."""
tmdb_id = series_data.get("tmdb_id")
if not tmdb_id:
return False
try:
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
except Exception as exc:
logger.warning("Failed to fetch TMDB data for %s during regeneration: %s", key, exc)
return False
if not tmdb_data:
return False
nfo_model = tmdb_to_nfo_model(
tmdb_data,
content_ratings=None,
get_image_url=self._make_tmdb_image_url(tmdb_id),
image_size="original",
)
xml_content = generate_tvshow_nfo(nfo_model)
with open(nfo_path, "w", encoding="utf-8") as f:
f.write(xml_content)
logger.info("Regenerated NFO for %s", key)
return True
async def _fetch_tmdb_data(self, tmdb_id: int) -> Optional[Dict[str, Any]]:
"""Fetch series metadata from TMDB API.
Args:
tmdb_id: TMDB series ID
Returns:
TMDB response dict or None on failure.
"""
try:
from src.server.nfo.tmdb_client import get_tmdb_client
client = get_tmdb_client()
data = await client.get_series_details(tmdb_id)
return data
except Exception as exc:
logger.warning("TMDB fetch failed for TMDB ID %s: %s", tmdb_id, exc)
return None
def _make_tmdb_image_url(self, tmdb_id: int) -> Callable[[str, str], str]:
"""Create a get_image_url closure bound to a TMDB account."""
from src.server.nfo.tmdb_client import get_tmdb_image_base_url
base = get_tmdb_image_base_url(tmdb_id)
def get_image_url(path: str, size: str = "original") -> str:
if not path:
return ""
return f"{base}{size}{path}"
return get_image_url
async def _update_series_nfo_flag(
self,
key: str,
has_nfo: bool,
nfo_path: str,
) -> None:
"""Update the has_nfo flag and nfo_path in the database.
Args:
key: Series key (primary identifier)
has_nfo: Whether the series now has an NFO file
nfo_path: Path to the NFO file
"""
try:
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
async with get_db_session() as db:
series = await AnimeSeriesService.get_by_key(db, key)
if series:
now = datetime.now(timezone.utc)
series.has_nfo = has_nfo
series.nfo_path = nfo_path
if series.nfo_created_at is None:
series.nfo_created_at = now
series.nfo_updated_at = now
await db.flush()
logger.debug("Updated NFO flag for series: %s", key)
except Exception as exc:
logger.warning("Failed to update NFO flag for %s: %s", key, exc)
async def cancel_scan(self) -> bool:
"""Cancel the current NFO scan if one is in progress.
Returns:
True if scan was cancelled, False if no scan in progress.
"""
async with self._lock:
if not self._is_scanning:
return False
self._is_scanning = False
if self._current_scan:
self._current_scan.status = "cancelled"
self._current_scan.message = "NFO scan cancelled by user"
self._current_scan.updated_at = datetime.now(timezone.utc)
if self._current_scan:
await self._emit_scan_event({
"type": "nfo_scan_cancelled",
"scan_id": self._current_scan.scan_id,
"message": "NFO scan cancelled by user",
})
logger.info("NFO scan cancelled")
return True
async def get_scan_status(self) -> Dict[str, Any]:
"""Get the current NFO scan status.
Returns:
Dict with is_scanning and current_scan data.
"""
return {
"is_scanning": self._is_scanning,
"current_scan": (
self._current_scan.to_dict() if self._current_scan else None
),
}
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_nfo_scan_service: Optional[NfoScanService] = None
def get_nfo_scan_service() -> NfoScanService:
"""Return the singleton NfoScanService instance."""
global _nfo_scan_service
if _nfo_scan_service is None:
_nfo_scan_service = NfoScanService()
return _nfo_scan_service
def reset_nfo_scan_service() -> None:
"""Reset the singleton NfoScanService instance (for testing)."""
global _nfo_scan_service
_nfo_scan_service = None