- Add nfo_scan_after_rescan config option (default: true) - Implement year caching in AniworldLoader and EnhancedAniWorldLoader - Make get_year abstract method in base provider - Run NFO validation/creation after scheduled rescan completes - Add _YearDict cache to avoid re-extracting year from HTML
651 lines
21 KiB
Python
651 lines
21 KiB
Python
"""NFO scan service for validating and creating tvshow.nfo files.
|
|
|
|
This module provides a service layer for scanning the anime library,
|
|
checking whether each series has a valid tvshow.nfo file, creating
|
|
missing files, and filling in missing properties from TMDB metadata.
|
|
|
|
All series are identified by 'key' (provider-assigned, URL-safe
|
|
identifier). 'folder' is used as metadata only for filesystem paths.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import os
|
|
from datetime import datetime, timezone
|
|
from typing import Any, Callable, Dict, List, Optional
|
|
|
|
import structlog
|
|
|
|
from src.config.settings import settings
|
|
from src.server.nfo.nfo_generator import generate_tvshow_nfo
|
|
from src.server.nfo.nfo_mapper import tmdb_to_nfo_model
|
|
from src.server.nfo.nfo_models import TVShowNFO
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
class NfoScanServiceError(Exception):
|
|
"""Service-level exception for NFO scan operations."""
|
|
|
|
|
|
class NfoScanProgress:
|
|
"""Tracks the current state of an NFO scan operation.
|
|
|
|
Attributes:
|
|
scan_id: Unique identifier for this scan
|
|
status: Current status (started, in_progress, completed, failed, cancelled)
|
|
total: Total number of series to scan
|
|
current: Number of series processed
|
|
percentage: Completion percentage
|
|
message: Human-readable progress message
|
|
key: Current series key being processed (metadata only)
|
|
folder: Current series folder being processed (metadata only)
|
|
created: Number of NFO files created
|
|
updated: Number of NFO files updated
|
|
errors: List of error messages encountered
|
|
started_at: When the scan started
|
|
updated_at: When progress was last updated
|
|
"""
|
|
|
|
def __init__(self, scan_id: str):
|
|
self.scan_id = scan_id
|
|
self.status = "started"
|
|
self.total = 0
|
|
self.current = 0
|
|
self.percentage = 0.0
|
|
self.message = "Initializing NFO scan..."
|
|
self.key: Optional[str] = None
|
|
self.folder: Optional[str] = None
|
|
self.started_at = datetime.now(timezone.utc)
|
|
self.updated_at = datetime.now(timezone.utc)
|
|
self.created = 0
|
|
self.updated = 0
|
|
self.errors: List[str] = []
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
result = {
|
|
"scan_id": self.scan_id,
|
|
"status": self.status,
|
|
"total": self.total,
|
|
"current": self.current,
|
|
"percentage": round(self.percentage, 2),
|
|
"message": self.message,
|
|
"started_at": self.started_at.isoformat(),
|
|
"updated_at": self.updated_at.isoformat(),
|
|
"created": self.created,
|
|
"updated": self.updated,
|
|
"errors": self.errors,
|
|
}
|
|
if self.key is not None:
|
|
result["key"] = self.key
|
|
if self.folder is not None:
|
|
result["folder"] = self.folder
|
|
return result
|
|
|
|
|
|
class NfoScanService:
|
|
"""Manages NFO validation and creation for anime series.
|
|
|
|
Scans the anime library directory, checks each series folder for
|
|
a tvshow.nfo file, creates missing files, and fills in missing
|
|
or empty properties from TMDB metadata.
|
|
|
|
Uses 'key' as the primary series identifier and 'folder' as
|
|
metadata only for filesystem operations.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._current_scan: Optional[NfoScanProgress] = None
|
|
self._is_scanning = False
|
|
self._lock = asyncio.Lock()
|
|
|
|
# Event handlers for scan events
|
|
self._scan_event_handlers: List[Callable[[Dict[str, Any]], None]] = []
|
|
|
|
logger.info("NfoScanService initialized")
|
|
|
|
def subscribe_to_scan_events(
|
|
self,
|
|
handler: Callable[[Dict[str, Any]], None],
|
|
) -> None:
|
|
"""Subscribe to NFO scan events."""
|
|
self._scan_event_handlers.append(handler)
|
|
|
|
def unsubscribe_from_scan_events(
|
|
self,
|
|
handler: Callable[[Dict[str, Any]], None],
|
|
) -> None:
|
|
"""Unsubscribe from NFO scan events."""
|
|
try:
|
|
self._scan_event_handlers.remove(handler)
|
|
except ValueError:
|
|
logger.warning("Handler not found for unsubscribe")
|
|
|
|
async def _emit_scan_event(self, event_data: Dict[str, Any]) -> None:
|
|
"""Emit scan event to all subscribers."""
|
|
for handler in self._scan_event_handlers:
|
|
try:
|
|
if asyncio.iscoroutinefunction(handler):
|
|
await handler(event_data)
|
|
else:
|
|
handler(event_data)
|
|
except Exception as e:
|
|
logger.error("NFO scan event handler error", error=str(e))
|
|
|
|
@property
|
|
def is_scanning(self) -> bool:
|
|
return self._is_scanning
|
|
|
|
@property
|
|
def current_scan(self) -> Optional[NfoScanProgress]:
|
|
return self._current_scan
|
|
|
|
async def scan_all(
|
|
self,
|
|
anime_service: Any, # AnimeService instance
|
|
) -> Dict[str, Any]:
|
|
"""Run NFO validation and creation across all series.
|
|
|
|
Args:
|
|
anime_service: AnimeService instance for accessing series data.
|
|
|
|
Returns:
|
|
Summary dict with keys: total, created, updated, errors_count,
|
|
scan_id, and duration_seconds.
|
|
|
|
Raises:
|
|
NfoScanServiceError: If a scan is already in progress.
|
|
"""
|
|
async with self._lock:
|
|
if self._is_scanning:
|
|
raise NfoScanServiceError("An NFO scan is already in progress")
|
|
self._is_scanning = True
|
|
|
|
scan_id = f"nfo_scan_{id(self)}"
|
|
scan_progress = NfoScanProgress(scan_id)
|
|
self._current_scan = scan_progress
|
|
|
|
logger.info("Starting NFO scan")
|
|
|
|
# Emit scan started
|
|
await self._emit_scan_event({
|
|
"type": "nfo_scan_started",
|
|
"scan_id": scan_id,
|
|
"message": "NFO scan started",
|
|
})
|
|
|
|
# Get all series from AnimeService
|
|
try:
|
|
series_list = await anime_service.list_series_with_filters()
|
|
except Exception as exc:
|
|
logger.error("Failed to get series list: %s", exc)
|
|
async with self._lock:
|
|
self._is_scanning = False
|
|
raise NfoScanServiceError(f"Failed to get series list: {exc}") from exc
|
|
|
|
if not series_list:
|
|
logger.info("No series found — NFO scan complete")
|
|
scan_progress.status = "completed"
|
|
scan_progress.message = "No series found"
|
|
scan_progress.percentage = 100.0
|
|
scan_progress.updated_at = datetime.now(timezone.utc)
|
|
|
|
async with self._lock:
|
|
self._is_scanning = False
|
|
|
|
await self._emit_scan_event({
|
|
"type": "nfo_scan_completed",
|
|
"scan_id": scan_id,
|
|
"success": True,
|
|
"message": "No series found",
|
|
"data": scan_progress.to_dict(),
|
|
})
|
|
return {
|
|
"total": 0,
|
|
"created": 0,
|
|
"updated": 0,
|
|
"errors_count": 0,
|
|
"scan_id": scan_id,
|
|
"duration_seconds": 0.0,
|
|
}
|
|
|
|
scan_progress.total = len(series_list)
|
|
scan_progress.status = "in_progress"
|
|
scan_progress.message = f"Scanning {scan_progress.total} series..."
|
|
scan_progress.updated_at = datetime.now(timezone.utc)
|
|
|
|
start_time = datetime.now(timezone.utc)
|
|
errors: List[str] = []
|
|
|
|
for idx, series in enumerate(series_list):
|
|
key = series.get("key", "")
|
|
folder = series.get("folder", "")
|
|
name = series.get("name", "")
|
|
|
|
scan_progress.key = key
|
|
scan_progress.folder = folder
|
|
scan_progress.message = f"Scanning: {name}"
|
|
scan_progress.updated_at = datetime.now(timezone.utc)
|
|
|
|
await self._emit_scan_event({
|
|
"type": "nfo_scan_progress",
|
|
"data": scan_progress.to_dict(),
|
|
})
|
|
|
|
try:
|
|
result = await self._scan_series(key, folder, series)
|
|
if result == "created":
|
|
scan_progress.created += 1
|
|
elif result == "updated":
|
|
scan_progress.updated += 1
|
|
except Exception as exc:
|
|
error_msg = f"NFO scan failed for {key}: {exc}"
|
|
logger.warning(error_msg)
|
|
errors.append(error_msg)
|
|
scan_progress.errors.append(error_msg)
|
|
|
|
scan_progress.current = idx + 1
|
|
scan_progress.percentage = round(
|
|
(scan_progress.current / scan_progress.total) * 100, 2
|
|
)
|
|
scan_progress.updated_at = datetime.now(timezone.utc)
|
|
|
|
end_time = datetime.now(timezone.utc)
|
|
duration = (end_time - start_time).total_seconds()
|
|
scan_progress.status = "completed"
|
|
scan_progress.message = (
|
|
f"NFO scan completed: {scan_progress.created} created, "
|
|
f"{scan_progress.updated} updated, {len(errors)} errors"
|
|
)
|
|
scan_progress.percentage = 100.0
|
|
scan_progress.updated_at = end_time
|
|
|
|
async with self._lock:
|
|
self._is_scanning = False
|
|
|
|
logger.info(
|
|
"NFO scan completed: total=%d created=%d updated=%d errors=%d duration=%.2fs",
|
|
scan_progress.total,
|
|
scan_progress.created,
|
|
scan_progress.updated,
|
|
len(errors),
|
|
duration,
|
|
)
|
|
|
|
await self._emit_scan_event({
|
|
"type": "nfo_scan_completed",
|
|
"scan_id": scan_id,
|
|
"success": True,
|
|
"message": scan_progress.message,
|
|
"data": scan_progress.to_dict(),
|
|
"statistics": {
|
|
"total": scan_progress.total,
|
|
"created": scan_progress.created,
|
|
"updated": scan_progress.updated,
|
|
"errors_count": len(errors),
|
|
},
|
|
})
|
|
|
|
return {
|
|
"total": scan_progress.total,
|
|
"created": scan_progress.created,
|
|
"updated": scan_progress.updated,
|
|
"errors_count": len(errors),
|
|
"scan_id": scan_id,
|
|
"duration_seconds": round(duration, 2),
|
|
}
|
|
|
|
async def _scan_series(
|
|
self,
|
|
key: str,
|
|
folder: str,
|
|
series_data: Dict[str, Any],
|
|
) -> Optional[str]:
|
|
"""Scan and update NFO for a single series.
|
|
|
|
Args:
|
|
key: Series key (primary identifier)
|
|
folder: Series folder name (metadata for filesystem path)
|
|
series_data: Series data dict from anime_service
|
|
|
|
Returns:
|
|
"created" if new NFO was created, "updated" if existing was
|
|
modified, None if no change needed or error occurred.
|
|
"""
|
|
if not folder:
|
|
logger.debug("Skipping series with no folder: key=%s", key)
|
|
return None
|
|
|
|
anime_dir = getattr(settings, "anime_directory", None)
|
|
if not anime_dir:
|
|
logger.warning("anime_directory not configured — skipping NFO scan")
|
|
return None
|
|
|
|
series_path = os.path.join(anime_dir, folder)
|
|
nfo_path = os.path.join(series_path, "tvshow.nfo")
|
|
|
|
nfo_exists = os.path.isfile(nfo_path)
|
|
|
|
if not nfo_exists:
|
|
# Create new NFO
|
|
logger.info("Creating NFO for series: %s (%s)", key, folder)
|
|
await self._create_nfo(key, folder, series_data, nfo_path)
|
|
await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
|
|
return "created"
|
|
|
|
# NFO exists — check if it needs updating
|
|
updated = await self._update_nfo_if_needed(key, folder, series_data, nfo_path)
|
|
if updated:
|
|
await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
|
|
return "updated"
|
|
|
|
return None
|
|
|
|
async def _create_nfo(
|
|
self,
|
|
key: str,
|
|
folder: str,
|
|
series_data: Dict[str, Any],
|
|
nfo_path: str,
|
|
) -> None:
|
|
"""Create a new tvshow.nfo file from TMDB metadata.
|
|
|
|
Args:
|
|
key: Series key
|
|
folder: Series folder name
|
|
series_data: Series data from anime_service
|
|
nfo_path: Full path to the NFO file to create
|
|
"""
|
|
tmdb_id = series_data.get("tmdb_id")
|
|
|
|
if not tmdb_id:
|
|
logger.warning(
|
|
"Cannot create NFO for %s: no tmdb_id available",
|
|
key,
|
|
)
|
|
return
|
|
|
|
try:
|
|
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
|
|
except Exception as exc:
|
|
logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
|
|
return
|
|
|
|
if not tmdb_data:
|
|
logger.warning("No TMDB data for %s", key)
|
|
return
|
|
|
|
nfo_model = tmdb_to_nfo_model(
|
|
tmdb_data,
|
|
content_ratings=None,
|
|
get_image_url=self._make_tmdb_image_url(tmdb_id),
|
|
image_size="original",
|
|
)
|
|
|
|
xml_content = generate_tvshow_nfo(nfo_model)
|
|
|
|
# Ensure directory exists
|
|
os.makedirs(os.path.dirname(nfo_path), exist_ok=True)
|
|
|
|
with open(nfo_path, "w", encoding="utf-8") as f:
|
|
f.write(xml_content)
|
|
|
|
logger.info("Created tvshow.nfo for %s at %s", key, nfo_path)
|
|
|
|
await self._emit_scan_event({
|
|
"type": "nfo_created",
|
|
"key": key,
|
|
"folder": folder,
|
|
"path": nfo_path,
|
|
})
|
|
|
|
async def _update_nfo_if_needed(
|
|
self,
|
|
key: str,
|
|
folder: str,
|
|
series_data: Dict[str, Any],
|
|
nfo_path: str,
|
|
) -> bool:
|
|
"""Load existing NFO, check for missing fields, fill and rewrite.
|
|
|
|
Args:
|
|
key: Series key
|
|
folder: Series folder name
|
|
series_data: Series data from anime_service
|
|
nfo_path: Full path to the existing NFO file
|
|
|
|
Returns:
|
|
True if NFO was updated, False if no changes were needed.
|
|
"""
|
|
try:
|
|
from lxml import etree
|
|
except ImportError:
|
|
logger.warning("lxml not available — cannot update existing NFO files")
|
|
return False
|
|
|
|
try:
|
|
tree = etree.parse(nfo_path)
|
|
root = tree.getroot()
|
|
except Exception as exc:
|
|
logger.warning("Failed to parse existing NFO for %s: %s — will regenerate", key, exc)
|
|
# Corrupt or unreadable NFO — regenerate from TMDB
|
|
return await self._regenerate_nfo(key, folder, series_data, nfo_path)
|
|
|
|
# Check for missing or empty critical fields
|
|
critical_fields = ["title", "plot", "premiered", "tmdbid"]
|
|
missing_fields: List[str] = []
|
|
|
|
for field in critical_fields:
|
|
elem = root.find(field)
|
|
if elem is None or not elem.text or elem.text.strip() == "":
|
|
missing_fields.append(field)
|
|
|
|
if not missing_fields:
|
|
logger.debug("NFO for %s is complete — no update needed", key)
|
|
return False
|
|
|
|
logger.info(
|
|
"NFO for %s is missing fields %s — attempting to fill from TMDB",
|
|
key,
|
|
missing_fields,
|
|
)
|
|
|
|
# Try to fill missing fields from TMDB
|
|
tmdb_id = series_data.get("tmdb_id")
|
|
if not tmdb_id:
|
|
logger.warning("Cannot update NFO for %s: no tmdb_id", key)
|
|
return False
|
|
|
|
try:
|
|
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
|
|
except Exception as exc:
|
|
logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
|
|
return False
|
|
|
|
if not tmdb_data:
|
|
return False
|
|
|
|
nfo_model = tmdb_to_nfo_model(
|
|
tmdb_data,
|
|
content_ratings=None,
|
|
get_image_url=self._make_tmdb_image_url(tmdb_id),
|
|
image_size="original",
|
|
)
|
|
|
|
# Serialize updated model to XML and write
|
|
xml_content = generate_tvshow_nfo(nfo_model)
|
|
|
|
with open(nfo_path, "w", encoding="utf-8") as f:
|
|
f.write(xml_content)
|
|
|
|
logger.info("Updated NFO for %s (filled %d fields)", key, len(missing_fields))
|
|
|
|
await self._emit_scan_event({
|
|
"type": "nfo_updated",
|
|
"key": key,
|
|
"folder": folder,
|
|
"path": nfo_path,
|
|
"missing_fields": missing_fields,
|
|
})
|
|
|
|
return True
|
|
|
|
async def _regenerate_nfo(
|
|
self,
|
|
key: str,
|
|
folder: str,
|
|
series_data: Dict[str, Any],
|
|
nfo_path: str,
|
|
) -> bool:
|
|
"""Regenerate NFO from scratch when existing file is corrupt."""
|
|
tmdb_id = series_data.get("tmdb_id")
|
|
if not tmdb_id:
|
|
return False
|
|
|
|
try:
|
|
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
|
|
except Exception as exc:
|
|
logger.warning("Failed to fetch TMDB data for %s during regeneration: %s", key, exc)
|
|
return False
|
|
|
|
if not tmdb_data:
|
|
return False
|
|
|
|
nfo_model = tmdb_to_nfo_model(
|
|
tmdb_data,
|
|
content_ratings=None,
|
|
get_image_url=self._make_tmdb_image_url(tmdb_id),
|
|
image_size="original",
|
|
)
|
|
|
|
xml_content = generate_tvshow_nfo(nfo_model)
|
|
|
|
with open(nfo_path, "w", encoding="utf-8") as f:
|
|
f.write(xml_content)
|
|
|
|
logger.info("Regenerated NFO for %s", key)
|
|
return True
|
|
|
|
async def _fetch_tmdb_data(self, tmdb_id: int) -> Optional[Dict[str, Any]]:
|
|
"""Fetch series metadata from TMDB API.
|
|
|
|
Args:
|
|
tmdb_id: TMDB series ID
|
|
|
|
Returns:
|
|
TMDB response dict or None on failure.
|
|
"""
|
|
try:
|
|
from src.server.nfo.tmdb_client import get_tmdb_client
|
|
|
|
client = get_tmdb_client()
|
|
data = await client.get_series_details(tmdb_id)
|
|
return data
|
|
except Exception as exc:
|
|
logger.warning("TMDB fetch failed for TMDB ID %s: %s", tmdb_id, exc)
|
|
return None
|
|
|
|
def _make_tmdb_image_url(self, tmdb_id: int) -> Callable[[str, str], str]:
|
|
"""Create a get_image_url closure bound to a TMDB account."""
|
|
from src.server.nfo.tmdb_client import get_tmdb_image_base_url
|
|
|
|
base = get_tmdb_image_base_url(tmdb_id)
|
|
|
|
def get_image_url(path: str, size: str = "original") -> str:
|
|
if not path:
|
|
return ""
|
|
return f"{base}{size}{path}"
|
|
|
|
return get_image_url
|
|
|
|
async def _update_series_nfo_flag(
|
|
self,
|
|
key: str,
|
|
has_nfo: bool,
|
|
nfo_path: str,
|
|
) -> None:
|
|
"""Update the has_nfo flag and nfo_path in the database.
|
|
|
|
Args:
|
|
key: Series key (primary identifier)
|
|
has_nfo: Whether the series now has an NFO file
|
|
nfo_path: Path to the NFO file
|
|
"""
|
|
try:
|
|
from src.server.database.connection import get_db_session
|
|
from src.server.database.service import AnimeSeriesService
|
|
|
|
async with get_db_session() as db:
|
|
series = await AnimeSeriesService.get_by_key(db, key)
|
|
if series:
|
|
now = datetime.now(timezone.utc)
|
|
series.has_nfo = has_nfo
|
|
series.nfo_path = nfo_path
|
|
if series.nfo_created_at is None:
|
|
series.nfo_created_at = now
|
|
series.nfo_updated_at = now
|
|
await db.flush()
|
|
logger.debug("Updated NFO flag for series: %s", key)
|
|
except Exception as exc:
|
|
logger.warning("Failed to update NFO flag for %s: %s", key, exc)
|
|
|
|
async def cancel_scan(self) -> bool:
|
|
"""Cancel the current NFO scan if one is in progress.
|
|
|
|
Returns:
|
|
True if scan was cancelled, False if no scan in progress.
|
|
"""
|
|
async with self._lock:
|
|
if not self._is_scanning:
|
|
return False
|
|
|
|
self._is_scanning = False
|
|
|
|
if self._current_scan:
|
|
self._current_scan.status = "cancelled"
|
|
self._current_scan.message = "NFO scan cancelled by user"
|
|
self._current_scan.updated_at = datetime.now(timezone.utc)
|
|
|
|
if self._current_scan:
|
|
await self._emit_scan_event({
|
|
"type": "nfo_scan_cancelled",
|
|
"scan_id": self._current_scan.scan_id,
|
|
"message": "NFO scan cancelled by user",
|
|
})
|
|
|
|
logger.info("NFO scan cancelled")
|
|
return True
|
|
|
|
async def get_scan_status(self) -> Dict[str, Any]:
|
|
"""Get the current NFO scan status.
|
|
|
|
Returns:
|
|
Dict with is_scanning and current_scan data.
|
|
"""
|
|
return {
|
|
"is_scanning": self._is_scanning,
|
|
"current_scan": (
|
|
self._current_scan.to_dict() if self._current_scan else None
|
|
),
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Module-level singleton
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_nfo_scan_service: Optional[NfoScanService] = None
|
|
|
|
|
|
def get_nfo_scan_service() -> NfoScanService:
|
|
"""Return the singleton NfoScanService instance."""
|
|
global _nfo_scan_service
|
|
if _nfo_scan_service is None:
|
|
_nfo_scan_service = NfoScanService()
|
|
return _nfo_scan_service
|
|
|
|
|
|
def reset_nfo_scan_service() -> None:
|
|
"""Reset the singleton NfoScanService instance (for testing)."""
|
|
global _nfo_scan_service
|
|
_nfo_scan_service = None |