feat: add NFO scan after rescan and year caching
- Add nfo_scan_after_rescan config option (default: true) - Implement year caching in AniworldLoader and EnhancedAniWorldLoader - Make get_year abstract method in base provider - Run NFO validation/creation after scheduled rescan completes - Add _YearDict cache to avoid re-extracting year from HTML
This commit is contained in:
@@ -39,6 +39,12 @@ class SchedulerConfig(BaseModel):
|
||||
description="Automatically queue and start downloads for all missing "
|
||||
"episodes after a scheduled rescan completes.",
|
||||
)
|
||||
nfo_scan_after_rescan: bool = Field(
|
||||
default=True,
|
||||
description="Run NFO validation and creation after a scheduled rescan "
|
||||
"completes. Checks each series folder for tvshow.nfo and "
|
||||
"creates or fills missing properties.",
|
||||
)
|
||||
# Legacy alias fields — read via Pydantic alias
|
||||
auto_download: Optional[bool] = Field(default=None, alias="auto_download")
|
||||
|
||||
|
||||
@@ -158,6 +158,7 @@ class AniworldLoader(Loader):
|
||||
|
||||
self._KeyHTMLDict = {}
|
||||
self._EpisodeHTMLDict = {}
|
||||
self._YearDict = {}
|
||||
self.Providers = Providers()
|
||||
|
||||
# Events: download_progress is triggered with progress dict
|
||||
@@ -774,55 +775,81 @@ class AniworldLoader(Loader):
|
||||
if span_tag:
|
||||
title = span_tag.text
|
||||
logger.debug("Found title: %s", title)
|
||||
|
||||
# Also try to extract year from sibling p tag "Jahr: {year}"
|
||||
# Year is typically right after title in the HTML structure
|
||||
year = self._extract_year_from_soup(soup)
|
||||
if year is not None:
|
||||
self._YearDict[key] = year
|
||||
logger.debug("Cached year %d for key: %s", year, key)
|
||||
|
||||
return title
|
||||
|
||||
logger.warning("No title found for key: %s", key)
|
||||
return ""
|
||||
|
||||
def _extract_year_from_soup(self, soup: BeautifulSoup) -> int | None:
|
||||
"""Extract year from BeautifulSoup object.
|
||||
|
||||
Looks for 'Jahr: {year}' pattern in p tags adjacent to series-title.
|
||||
|
||||
Args:
|
||||
soup: Parsed BeautifulSoup object
|
||||
|
||||
Returns:
|
||||
Year as int or None if not found
|
||||
"""
|
||||
# Try to find year in metadata
|
||||
for p_tag in soup.find_all('p'):
|
||||
text = p_tag.get_text()
|
||||
if 'Jahr:' in text or 'Year:' in text:
|
||||
match = re.search(r'(\d{4})', text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
# Fallback: look in series-info div
|
||||
info_div = soup.find('div', class_='series-info')
|
||||
if info_div:
|
||||
text = info_div.get_text()
|
||||
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return None
|
||||
|
||||
def get_year(self, key: str) -> int | None:
|
||||
"""Get anime release year from series key.
|
||||
|
||||
Attempts to extract the year from the series page metadata.
|
||||
Returns None if year cannot be determined.
|
||||
|
||||
|
||||
Uses cached year from get_title if available,
|
||||
otherwise extracts and caches it.
|
||||
|
||||
Args:
|
||||
key: Series identifier
|
||||
|
||||
|
||||
Returns:
|
||||
int or None: Release year if found, None otherwise
|
||||
Release year or None if not found
|
||||
"""
|
||||
logger.debug("Getting year for key: %s", key)
|
||||
|
||||
# Check cache first
|
||||
if key in self._YearDict:
|
||||
logger.debug("Using cached year %d for key: %s", self._YearDict[key], key)
|
||||
return self._YearDict[key]
|
||||
|
||||
# Not cached - extract from HTML
|
||||
try:
|
||||
soup = BeautifulSoup(
|
||||
_decode_html_content(self._get_key_html(key).content),
|
||||
'html.parser'
|
||||
)
|
||||
|
||||
# Try to find year in metadata
|
||||
# Check for "Jahr:" or similar metadata fields
|
||||
for p_tag in soup.find_all('p'):
|
||||
text = p_tag.get_text()
|
||||
if 'Jahr:' in text or 'Year:' in text:
|
||||
# Extract year from text like "Jahr: 2025"
|
||||
match = re.search(r'(\d{4})', text)
|
||||
if match:
|
||||
year = int(match.group(1))
|
||||
logger.debug("Found year in metadata: %s", year)
|
||||
return year
|
||||
|
||||
# Try alternative: look for year in genre/info section
|
||||
info_div = soup.find('div', class_='series-info')
|
||||
if info_div:
|
||||
text = info_div.get_text()
|
||||
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
|
||||
if match:
|
||||
year = int(match.group(1))
|
||||
logger.debug("Found year in info section: %s", year)
|
||||
return year
|
||||
|
||||
logger.debug("No year found for key: %s", key)
|
||||
return None
|
||||
|
||||
|
||||
year = self._extract_year_from_soup(soup)
|
||||
if year is not None:
|
||||
self._YearDict[key] = year
|
||||
logger.debug("Found and cached year %d for key: %s", year, key)
|
||||
|
||||
return year
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Error extracting year for key %s: %s", key, e)
|
||||
return None
|
||||
|
||||
@@ -91,6 +91,17 @@ class Loader(ABC):
|
||||
Series title string
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_year(self, key: str) -> int | None:
|
||||
"""Get the release year of a series.
|
||||
|
||||
Args:
|
||||
key: Unique series identifier/key
|
||||
|
||||
Returns:
|
||||
Release year as integer, or None if year cannot be determined
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
|
||||
"""Get season and episode counts for a series.
|
||||
|
||||
@@ -110,6 +110,7 @@ class EnhancedAniWorldLoader(Loader):
|
||||
# Cache dictionaries
|
||||
self._KeyHTMLDict = {}
|
||||
self._EpisodeHTMLDict = {}
|
||||
self._YearDict = {}
|
||||
|
||||
# Provider manager
|
||||
self.Providers = Providers()
|
||||
@@ -666,6 +667,10 @@ class EnhancedAniWorldLoader(Loader):
|
||||
if title_span:
|
||||
span = title_span.find('span')
|
||||
if span:
|
||||
# Extract and cache year from soup if available
|
||||
year = self._ExtractYearFromSoup(soup)
|
||||
if year is not None:
|
||||
self._YearDict[key] = year
|
||||
return span.text.strip()
|
||||
|
||||
self.logger.warning("Could not extract title for key: %s", key)
|
||||
@@ -674,7 +679,62 @@ class EnhancedAniWorldLoader(Loader):
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to get title for key %s: %s", key, e)
|
||||
raise RetryableError(f"Title extraction failed: {e}") from e
|
||||
|
||||
|
||||
def _ExtractYearFromSoup(self, soup: BeautifulSoup) -> int | None:
|
||||
"""Extract year from parsed BeautifulSoup.
|
||||
|
||||
Looks for 'Jahr: {year}' pattern in p tags.
|
||||
|
||||
Args:
|
||||
soup: Parsed BeautifulSoup object
|
||||
|
||||
Returns:
|
||||
Year as int or None if not found
|
||||
"""
|
||||
for p_tag in soup.find_all('p'):
|
||||
text = p_tag.get_text()
|
||||
if 'Jahr:' in text or 'Year:' in text:
|
||||
match = re.search(r'(\d{4})', text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
info_div = soup.find('div', class_='series-info')
|
||||
if info_div:
|
||||
text = info_div.get_text()
|
||||
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return None
|
||||
|
||||
def GetYear(self, key: str) -> int | None:
|
||||
"""Get anime release year from series key.
|
||||
|
||||
Uses cached year from GetTitle if available,
|
||||
otherwise extracts and caches it.
|
||||
|
||||
Args:
|
||||
key: Series identifier
|
||||
|
||||
Returns:
|
||||
Release year or None if not found
|
||||
"""
|
||||
# Check cache first
|
||||
if key in self._YearDict:
|
||||
return self._YearDict[key]
|
||||
|
||||
# Not cached - extract from HTML
|
||||
try:
|
||||
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
|
||||
year = self._ExtractYearFromSoup(soup)
|
||||
if year is not None:
|
||||
self._YearDict[key] = year
|
||||
return year
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning("Error extracting year for key %s: %s", key, e)
|
||||
return None
|
||||
|
||||
def GetSiteKey(self) -> str:
|
||||
"""Get site identifier."""
|
||||
return "aniworld.to"
|
||||
|
||||
651
src/server/services/nfo_scan_service.py
Normal file
651
src/server/services/nfo_scan_service.py
Normal file
@@ -0,0 +1,651 @@
|
||||
"""NFO scan service for validating and creating tvshow.nfo files.
|
||||
|
||||
This module provides a service layer for scanning the anime library,
|
||||
checking whether each series has a valid tvshow.nfo file, creating
|
||||
missing files, and filling in missing properties from TMDB metadata.
|
||||
|
||||
All series are identified by 'key' (provider-assigned, URL-safe
|
||||
identifier). 'folder' is used as metadata only for filesystem paths.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import structlog
|
||||
|
||||
from src.config.settings import settings
|
||||
from src.server.nfo.nfo_generator import generate_tvshow_nfo
|
||||
from src.server.nfo.nfo_mapper import tmdb_to_nfo_model
|
||||
from src.server.nfo.nfo_models import TVShowNFO
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class NfoScanServiceError(Exception):
|
||||
"""Service-level exception for NFO scan operations."""
|
||||
|
||||
|
||||
class NfoScanProgress:
|
||||
"""Tracks the current state of an NFO scan operation.
|
||||
|
||||
Attributes:
|
||||
scan_id: Unique identifier for this scan
|
||||
status: Current status (started, in_progress, completed, failed, cancelled)
|
||||
total: Total number of series to scan
|
||||
current: Number of series processed
|
||||
percentage: Completion percentage
|
||||
message: Human-readable progress message
|
||||
key: Current series key being processed (metadata only)
|
||||
folder: Current series folder being processed (metadata only)
|
||||
created: Number of NFO files created
|
||||
updated: Number of NFO files updated
|
||||
errors: List of error messages encountered
|
||||
started_at: When the scan started
|
||||
updated_at: When progress was last updated
|
||||
"""
|
||||
|
||||
def __init__(self, scan_id: str):
|
||||
self.scan_id = scan_id
|
||||
self.status = "started"
|
||||
self.total = 0
|
||||
self.current = 0
|
||||
self.percentage = 0.0
|
||||
self.message = "Initializing NFO scan..."
|
||||
self.key: Optional[str] = None
|
||||
self.folder: Optional[str] = None
|
||||
self.started_at = datetime.now(timezone.utc)
|
||||
self.updated_at = datetime.now(timezone.utc)
|
||||
self.created = 0
|
||||
self.updated = 0
|
||||
self.errors: List[str] = []
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result = {
|
||||
"scan_id": self.scan_id,
|
||||
"status": self.status,
|
||||
"total": self.total,
|
||||
"current": self.current,
|
||||
"percentage": round(self.percentage, 2),
|
||||
"message": self.message,
|
||||
"started_at": self.started_at.isoformat(),
|
||||
"updated_at": self.updated_at.isoformat(),
|
||||
"created": self.created,
|
||||
"updated": self.updated,
|
||||
"errors": self.errors,
|
||||
}
|
||||
if self.key is not None:
|
||||
result["key"] = self.key
|
||||
if self.folder is not None:
|
||||
result["folder"] = self.folder
|
||||
return result
|
||||
|
||||
|
||||
class NfoScanService:
|
||||
"""Manages NFO validation and creation for anime series.
|
||||
|
||||
Scans the anime library directory, checks each series folder for
|
||||
a tvshow.nfo file, creates missing files, and fills in missing
|
||||
or empty properties from TMDB metadata.
|
||||
|
||||
Uses 'key' as the primary series identifier and 'folder' as
|
||||
metadata only for filesystem operations.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._current_scan: Optional[NfoScanProgress] = None
|
||||
self._is_scanning = False
|
||||
self._lock = asyncio.Lock()
|
||||
|
||||
# Event handlers for scan events
|
||||
self._scan_event_handlers: List[Callable[[Dict[str, Any]], None]] = []
|
||||
|
||||
logger.info("NfoScanService initialized")
|
||||
|
||||
def subscribe_to_scan_events(
|
||||
self,
|
||||
handler: Callable[[Dict[str, Any]], None],
|
||||
) -> None:
|
||||
"""Subscribe to NFO scan events."""
|
||||
self._scan_event_handlers.append(handler)
|
||||
|
||||
def unsubscribe_from_scan_events(
|
||||
self,
|
||||
handler: Callable[[Dict[str, Any]], None],
|
||||
) -> None:
|
||||
"""Unsubscribe from NFO scan events."""
|
||||
try:
|
||||
self._scan_event_handlers.remove(handler)
|
||||
except ValueError:
|
||||
logger.warning("Handler not found for unsubscribe")
|
||||
|
||||
async def _emit_scan_event(self, event_data: Dict[str, Any]) -> None:
|
||||
"""Emit scan event to all subscribers."""
|
||||
for handler in self._scan_event_handlers:
|
||||
try:
|
||||
if asyncio.iscoroutinefunction(handler):
|
||||
await handler(event_data)
|
||||
else:
|
||||
handler(event_data)
|
||||
except Exception as e:
|
||||
logger.error("NFO scan event handler error", error=str(e))
|
||||
|
||||
@property
|
||||
def is_scanning(self) -> bool:
|
||||
return self._is_scanning
|
||||
|
||||
@property
|
||||
def current_scan(self) -> Optional[NfoScanProgress]:
|
||||
return self._current_scan
|
||||
|
||||
async def scan_all(
|
||||
self,
|
||||
anime_service: Any, # AnimeService instance
|
||||
) -> Dict[str, Any]:
|
||||
"""Run NFO validation and creation across all series.
|
||||
|
||||
Args:
|
||||
anime_service: AnimeService instance for accessing series data.
|
||||
|
||||
Returns:
|
||||
Summary dict with keys: total, created, updated, errors_count,
|
||||
scan_id, and duration_seconds.
|
||||
|
||||
Raises:
|
||||
NfoScanServiceError: If a scan is already in progress.
|
||||
"""
|
||||
async with self._lock:
|
||||
if self._is_scanning:
|
||||
raise NfoScanServiceError("An NFO scan is already in progress")
|
||||
self._is_scanning = True
|
||||
|
||||
scan_id = f"nfo_scan_{id(self)}"
|
||||
scan_progress = NfoScanProgress(scan_id)
|
||||
self._current_scan = scan_progress
|
||||
|
||||
logger.info("Starting NFO scan")
|
||||
|
||||
# Emit scan started
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_scan_started",
|
||||
"scan_id": scan_id,
|
||||
"message": "NFO scan started",
|
||||
})
|
||||
|
||||
# Get all series from AnimeService
|
||||
try:
|
||||
series_list = await anime_service.list_series_with_filters()
|
||||
except Exception as exc:
|
||||
logger.error("Failed to get series list: %s", exc)
|
||||
async with self._lock:
|
||||
self._is_scanning = False
|
||||
raise NfoScanServiceError(f"Failed to get series list: {exc}") from exc
|
||||
|
||||
if not series_list:
|
||||
logger.info("No series found — NFO scan complete")
|
||||
scan_progress.status = "completed"
|
||||
scan_progress.message = "No series found"
|
||||
scan_progress.percentage = 100.0
|
||||
scan_progress.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
async with self._lock:
|
||||
self._is_scanning = False
|
||||
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_scan_completed",
|
||||
"scan_id": scan_id,
|
||||
"success": True,
|
||||
"message": "No series found",
|
||||
"data": scan_progress.to_dict(),
|
||||
})
|
||||
return {
|
||||
"total": 0,
|
||||
"created": 0,
|
||||
"updated": 0,
|
||||
"errors_count": 0,
|
||||
"scan_id": scan_id,
|
||||
"duration_seconds": 0.0,
|
||||
}
|
||||
|
||||
scan_progress.total = len(series_list)
|
||||
scan_progress.status = "in_progress"
|
||||
scan_progress.message = f"Scanning {scan_progress.total} series..."
|
||||
scan_progress.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
start_time = datetime.now(timezone.utc)
|
||||
errors: List[str] = []
|
||||
|
||||
for idx, series in enumerate(series_list):
|
||||
key = series.get("key", "")
|
||||
folder = series.get("folder", "")
|
||||
name = series.get("name", "")
|
||||
|
||||
scan_progress.key = key
|
||||
scan_progress.folder = folder
|
||||
scan_progress.message = f"Scanning: {name}"
|
||||
scan_progress.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_scan_progress",
|
||||
"data": scan_progress.to_dict(),
|
||||
})
|
||||
|
||||
try:
|
||||
result = await self._scan_series(key, folder, series)
|
||||
if result == "created":
|
||||
scan_progress.created += 1
|
||||
elif result == "updated":
|
||||
scan_progress.updated += 1
|
||||
except Exception as exc:
|
||||
error_msg = f"NFO scan failed for {key}: {exc}"
|
||||
logger.warning(error_msg)
|
||||
errors.append(error_msg)
|
||||
scan_progress.errors.append(error_msg)
|
||||
|
||||
scan_progress.current = idx + 1
|
||||
scan_progress.percentage = round(
|
||||
(scan_progress.current / scan_progress.total) * 100, 2
|
||||
)
|
||||
scan_progress.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
end_time = datetime.now(timezone.utc)
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
scan_progress.status = "completed"
|
||||
scan_progress.message = (
|
||||
f"NFO scan completed: {scan_progress.created} created, "
|
||||
f"{scan_progress.updated} updated, {len(errors)} errors"
|
||||
)
|
||||
scan_progress.percentage = 100.0
|
||||
scan_progress.updated_at = end_time
|
||||
|
||||
async with self._lock:
|
||||
self._is_scanning = False
|
||||
|
||||
logger.info(
|
||||
"NFO scan completed: total=%d created=%d updated=%d errors=%d duration=%.2fs",
|
||||
scan_progress.total,
|
||||
scan_progress.created,
|
||||
scan_progress.updated,
|
||||
len(errors),
|
||||
duration,
|
||||
)
|
||||
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_scan_completed",
|
||||
"scan_id": scan_id,
|
||||
"success": True,
|
||||
"message": scan_progress.message,
|
||||
"data": scan_progress.to_dict(),
|
||||
"statistics": {
|
||||
"total": scan_progress.total,
|
||||
"created": scan_progress.created,
|
||||
"updated": scan_progress.updated,
|
||||
"errors_count": len(errors),
|
||||
},
|
||||
})
|
||||
|
||||
return {
|
||||
"total": scan_progress.total,
|
||||
"created": scan_progress.created,
|
||||
"updated": scan_progress.updated,
|
||||
"errors_count": len(errors),
|
||||
"scan_id": scan_id,
|
||||
"duration_seconds": round(duration, 2),
|
||||
}
|
||||
|
||||
async def _scan_series(
|
||||
self,
|
||||
key: str,
|
||||
folder: str,
|
||||
series_data: Dict[str, Any],
|
||||
) -> Optional[str]:
|
||||
"""Scan and update NFO for a single series.
|
||||
|
||||
Args:
|
||||
key: Series key (primary identifier)
|
||||
folder: Series folder name (metadata for filesystem path)
|
||||
series_data: Series data dict from anime_service
|
||||
|
||||
Returns:
|
||||
"created" if new NFO was created, "updated" if existing was
|
||||
modified, None if no change needed or error occurred.
|
||||
"""
|
||||
if not folder:
|
||||
logger.debug("Skipping series with no folder: key=%s", key)
|
||||
return None
|
||||
|
||||
anime_dir = getattr(settings, "anime_directory", None)
|
||||
if not anime_dir:
|
||||
logger.warning("anime_directory not configured — skipping NFO scan")
|
||||
return None
|
||||
|
||||
series_path = os.path.join(anime_dir, folder)
|
||||
nfo_path = os.path.join(series_path, "tvshow.nfo")
|
||||
|
||||
nfo_exists = os.path.isfile(nfo_path)
|
||||
|
||||
if not nfo_exists:
|
||||
# Create new NFO
|
||||
logger.info("Creating NFO for series: %s (%s)", key, folder)
|
||||
await self._create_nfo(key, folder, series_data, nfo_path)
|
||||
await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
|
||||
return "created"
|
||||
|
||||
# NFO exists — check if it needs updating
|
||||
updated = await self._update_nfo_if_needed(key, folder, series_data, nfo_path)
|
||||
if updated:
|
||||
await self._update_series_nfo_flag(key, has_nfo=True, nfo_path=nfo_path)
|
||||
return "updated"
|
||||
|
||||
return None
|
||||
|
||||
async def _create_nfo(
|
||||
self,
|
||||
key: str,
|
||||
folder: str,
|
||||
series_data: Dict[str, Any],
|
||||
nfo_path: str,
|
||||
) -> None:
|
||||
"""Create a new tvshow.nfo file from TMDB metadata.
|
||||
|
||||
Args:
|
||||
key: Series key
|
||||
folder: Series folder name
|
||||
series_data: Series data from anime_service
|
||||
nfo_path: Full path to the NFO file to create
|
||||
"""
|
||||
tmdb_id = series_data.get("tmdb_id")
|
||||
|
||||
if not tmdb_id:
|
||||
logger.warning(
|
||||
"Cannot create NFO for %s: no tmdb_id available",
|
||||
key,
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
|
||||
return
|
||||
|
||||
if not tmdb_data:
|
||||
logger.warning("No TMDB data for %s", key)
|
||||
return
|
||||
|
||||
nfo_model = tmdb_to_nfo_model(
|
||||
tmdb_data,
|
||||
content_ratings=None,
|
||||
get_image_url=self._make_tmdb_image_url(tmdb_id),
|
||||
image_size="original",
|
||||
)
|
||||
|
||||
xml_content = generate_tvshow_nfo(nfo_model)
|
||||
|
||||
# Ensure directory exists
|
||||
os.makedirs(os.path.dirname(nfo_path), exist_ok=True)
|
||||
|
||||
with open(nfo_path, "w", encoding="utf-8") as f:
|
||||
f.write(xml_content)
|
||||
|
||||
logger.info("Created tvshow.nfo for %s at %s", key, nfo_path)
|
||||
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_created",
|
||||
"key": key,
|
||||
"folder": folder,
|
||||
"path": nfo_path,
|
||||
})
|
||||
|
||||
async def _update_nfo_if_needed(
|
||||
self,
|
||||
key: str,
|
||||
folder: str,
|
||||
series_data: Dict[str, Any],
|
||||
nfo_path: str,
|
||||
) -> bool:
|
||||
"""Load existing NFO, check for missing fields, fill and rewrite.
|
||||
|
||||
Args:
|
||||
key: Series key
|
||||
folder: Series folder name
|
||||
series_data: Series data from anime_service
|
||||
nfo_path: Full path to the existing NFO file
|
||||
|
||||
Returns:
|
||||
True if NFO was updated, False if no changes were needed.
|
||||
"""
|
||||
try:
|
||||
from lxml import etree
|
||||
except ImportError:
|
||||
logger.warning("lxml not available — cannot update existing NFO files")
|
||||
return False
|
||||
|
||||
try:
|
||||
tree = etree.parse(nfo_path)
|
||||
root = tree.getroot()
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to parse existing NFO for %s: %s — will regenerate", key, exc)
|
||||
# Corrupt or unreadable NFO — regenerate from TMDB
|
||||
return await self._regenerate_nfo(key, folder, series_data, nfo_path)
|
||||
|
||||
# Check for missing or empty critical fields
|
||||
critical_fields = ["title", "plot", "premiered", "tmdbid"]
|
||||
missing_fields: List[str] = []
|
||||
|
||||
for field in critical_fields:
|
||||
elem = root.find(field)
|
||||
if elem is None or not elem.text or elem.text.strip() == "":
|
||||
missing_fields.append(field)
|
||||
|
||||
if not missing_fields:
|
||||
logger.debug("NFO for %s is complete — no update needed", key)
|
||||
return False
|
||||
|
||||
logger.info(
|
||||
"NFO for %s is missing fields %s — attempting to fill from TMDB",
|
||||
key,
|
||||
missing_fields,
|
||||
)
|
||||
|
||||
# Try to fill missing fields from TMDB
|
||||
tmdb_id = series_data.get("tmdb_id")
|
||||
if not tmdb_id:
|
||||
logger.warning("Cannot update NFO for %s: no tmdb_id", key)
|
||||
return False
|
||||
|
||||
try:
|
||||
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to fetch TMDB data for %s: %s", key, exc)
|
||||
return False
|
||||
|
||||
if not tmdb_data:
|
||||
return False
|
||||
|
||||
nfo_model = tmdb_to_nfo_model(
|
||||
tmdb_data,
|
||||
content_ratings=None,
|
||||
get_image_url=self._make_tmdb_image_url(tmdb_id),
|
||||
image_size="original",
|
||||
)
|
||||
|
||||
# Serialize updated model to XML and write
|
||||
xml_content = generate_tvshow_nfo(nfo_model)
|
||||
|
||||
with open(nfo_path, "w", encoding="utf-8") as f:
|
||||
f.write(xml_content)
|
||||
|
||||
logger.info("Updated NFO for %s (filled %d fields)", key, len(missing_fields))
|
||||
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_updated",
|
||||
"key": key,
|
||||
"folder": folder,
|
||||
"path": nfo_path,
|
||||
"missing_fields": missing_fields,
|
||||
})
|
||||
|
||||
return True
|
||||
|
||||
async def _regenerate_nfo(
|
||||
self,
|
||||
key: str,
|
||||
folder: str,
|
||||
series_data: Dict[str, Any],
|
||||
nfo_path: str,
|
||||
) -> bool:
|
||||
"""Regenerate NFO from scratch when existing file is corrupt."""
|
||||
tmdb_id = series_data.get("tmdb_id")
|
||||
if not tmdb_id:
|
||||
return False
|
||||
|
||||
try:
|
||||
tmdb_data = await self._fetch_tmdb_data(tmdb_id)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to fetch TMDB data for %s during regeneration: %s", key, exc)
|
||||
return False
|
||||
|
||||
if not tmdb_data:
|
||||
return False
|
||||
|
||||
nfo_model = tmdb_to_nfo_model(
|
||||
tmdb_data,
|
||||
content_ratings=None,
|
||||
get_image_url=self._make_tmdb_image_url(tmdb_id),
|
||||
image_size="original",
|
||||
)
|
||||
|
||||
xml_content = generate_tvshow_nfo(nfo_model)
|
||||
|
||||
with open(nfo_path, "w", encoding="utf-8") as f:
|
||||
f.write(xml_content)
|
||||
|
||||
logger.info("Regenerated NFO for %s", key)
|
||||
return True
|
||||
|
||||
async def _fetch_tmdb_data(self, tmdb_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch series metadata from TMDB API.
|
||||
|
||||
Args:
|
||||
tmdb_id: TMDB series ID
|
||||
|
||||
Returns:
|
||||
TMDB response dict or None on failure.
|
||||
"""
|
||||
try:
|
||||
from src.server.nfo.tmdb_client import get_tmdb_client
|
||||
|
||||
client = get_tmdb_client()
|
||||
data = await client.get_series_details(tmdb_id)
|
||||
return data
|
||||
except Exception as exc:
|
||||
logger.warning("TMDB fetch failed for TMDB ID %s: %s", tmdb_id, exc)
|
||||
return None
|
||||
|
||||
def _make_tmdb_image_url(self, tmdb_id: int) -> Callable[[str, str], str]:
|
||||
"""Create a get_image_url closure bound to a TMDB account."""
|
||||
from src.server.nfo.tmdb_client import get_tmdb_image_base_url
|
||||
|
||||
base = get_tmdb_image_base_url(tmdb_id)
|
||||
|
||||
def get_image_url(path: str, size: str = "original") -> str:
|
||||
if not path:
|
||||
return ""
|
||||
return f"{base}{size}{path}"
|
||||
|
||||
return get_image_url
|
||||
|
||||
async def _update_series_nfo_flag(
|
||||
self,
|
||||
key: str,
|
||||
has_nfo: bool,
|
||||
nfo_path: str,
|
||||
) -> None:
|
||||
"""Update the has_nfo flag and nfo_path in the database.
|
||||
|
||||
Args:
|
||||
key: Series key (primary identifier)
|
||||
has_nfo: Whether the series now has an NFO file
|
||||
nfo_path: Path to the NFO file
|
||||
"""
|
||||
try:
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
async with get_db_session() as db:
|
||||
series = await AnimeSeriesService.get_by_key(db, key)
|
||||
if series:
|
||||
now = datetime.now(timezone.utc)
|
||||
series.has_nfo = has_nfo
|
||||
series.nfo_path = nfo_path
|
||||
if series.nfo_created_at is None:
|
||||
series.nfo_created_at = now
|
||||
series.nfo_updated_at = now
|
||||
await db.flush()
|
||||
logger.debug("Updated NFO flag for series: %s", key)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to update NFO flag for %s: %s", key, exc)
|
||||
|
||||
async def cancel_scan(self) -> bool:
|
||||
"""Cancel the current NFO scan if one is in progress.
|
||||
|
||||
Returns:
|
||||
True if scan was cancelled, False if no scan in progress.
|
||||
"""
|
||||
async with self._lock:
|
||||
if not self._is_scanning:
|
||||
return False
|
||||
|
||||
self._is_scanning = False
|
||||
|
||||
if self._current_scan:
|
||||
self._current_scan.status = "cancelled"
|
||||
self._current_scan.message = "NFO scan cancelled by user"
|
||||
self._current_scan.updated_at = datetime.now(timezone.utc)
|
||||
|
||||
if self._current_scan:
|
||||
await self._emit_scan_event({
|
||||
"type": "nfo_scan_cancelled",
|
||||
"scan_id": self._current_scan.scan_id,
|
||||
"message": "NFO scan cancelled by user",
|
||||
})
|
||||
|
||||
logger.info("NFO scan cancelled")
|
||||
return True
|
||||
|
||||
async def get_scan_status(self) -> Dict[str, Any]:
|
||||
"""Get the current NFO scan status.
|
||||
|
||||
Returns:
|
||||
Dict with is_scanning and current_scan data.
|
||||
"""
|
||||
return {
|
||||
"is_scanning": self._is_scanning,
|
||||
"current_scan": (
|
||||
self._current_scan.to_dict() if self._current_scan else None
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Module-level singleton
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_nfo_scan_service: Optional[NfoScanService] = None
|
||||
|
||||
|
||||
def get_nfo_scan_service() -> NfoScanService:
|
||||
"""Return the singleton NfoScanService instance."""
|
||||
global _nfo_scan_service
|
||||
if _nfo_scan_service is None:
|
||||
_nfo_scan_service = NfoScanService()
|
||||
return _nfo_scan_service
|
||||
|
||||
|
||||
def reset_nfo_scan_service() -> None:
|
||||
"""Reset the singleton NfoScanService instance (for testing)."""
|
||||
global _nfo_scan_service
|
||||
_nfo_scan_service = None
|
||||
@@ -11,7 +11,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
@@ -261,6 +261,9 @@ class SchedulerService:
|
||||
"auto_download_after_rescan": (
|
||||
self._config.auto_download_after_rescan if self._config else False
|
||||
),
|
||||
"nfo_scan_after_rescan": (
|
||||
self._config.nfo_scan_after_rescan if self._config else True
|
||||
),
|
||||
"last_run": (
|
||||
self._last_scan_time.isoformat()
|
||||
if self._last_scan_time
|
||||
@@ -375,7 +378,19 @@ class SchedulerService:
|
||||
# 1. Main library rescan
|
||||
await self._run_rescan()
|
||||
|
||||
# 2. Auto-download (if enabled)
|
||||
# 2. NFO scan (if enabled)
|
||||
if self._config and self._config.nfo_scan_after_rescan:
|
||||
try:
|
||||
nfo_result = await self._run_nfo_scan()
|
||||
await self._broadcast("nfo_scan_started", {
|
||||
"created": nfo_result.get("created", 0),
|
||||
"updated": nfo_result.get("updated", 0),
|
||||
})
|
||||
except Exception as exc:
|
||||
logger.error("NFO scan failed: %s", exc, exc_info=True)
|
||||
await self._broadcast("nfo_scan_error", {"error": str(exc)})
|
||||
|
||||
# 3. Auto-download (if enabled)
|
||||
if self._config and self._config.auto_download_after_rescan:
|
||||
try:
|
||||
queued = await self._run_auto_download()
|
||||
@@ -419,6 +434,24 @@ class SchedulerService:
|
||||
await anime_service.rescan()
|
||||
logger.info("anime_service.rescan() completed")
|
||||
|
||||
async def _run_nfo_scan(self) -> Dict[str, Any]:
|
||||
"""Run NFO validation and creation across all series."""
|
||||
from src.server.services.nfo_scan_service import get_nfo_scan_service
|
||||
from src.server.utils.dependencies import get_anime_service
|
||||
|
||||
anime_service = get_anime_service()
|
||||
nfo_scan_service = get_nfo_scan_service()
|
||||
|
||||
logger.info("Starting NFO scan...")
|
||||
result = await nfo_scan_service.scan_all(anime_service)
|
||||
logger.info(
|
||||
"NFO scan completed: created=%d updated=%d errors=%d",
|
||||
result.get("created", 0),
|
||||
result.get("updated", 0),
|
||||
result.get("errors_count", 0),
|
||||
)
|
||||
return result
|
||||
|
||||
async def _run_auto_download(self) -> int:
|
||||
"""Queue and start downloads for all series with missing episodes."""
|
||||
from src.server.models.download import EpisodeIdentifier
|
||||
|
||||
@@ -103,6 +103,9 @@ class ConcreteLoader(Loader):
|
||||
def get_title(self, key: str) -> str:
|
||||
return f"Title for {key}"
|
||||
|
||||
def get_year(self, key: str) -> int | None:
|
||||
return 2024
|
||||
|
||||
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
|
||||
return {1: 12, 2: 24}
|
||||
|
||||
|
||||
@@ -45,6 +45,9 @@ class ConcreteEnhancedLoader(EnhancedAniWorldLoader):
|
||||
def get_title(self, key: str) -> str:
|
||||
return self.GetTitle(key)
|
||||
|
||||
def get_year(self, key: str) -> int | None:
|
||||
return self.GetYear(key)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def enhanced_loader():
|
||||
@@ -969,6 +972,7 @@ class TestHlsUrlDetection:
|
||||
def test_voe_hls_pattern_extracts_hls_url(self):
|
||||
"""HLS_PATTERN should extract HLS URL from VOE embedded player HTML."""
|
||||
import re
|
||||
|
||||
from src.server.providers.streaming.voe import HLS_PATTERN
|
||||
|
||||
html_with_hls = """
|
||||
@@ -984,6 +988,7 @@ class TestHlsUrlDetection:
|
||||
def test_voe_hls_pattern_returns_none_when_no_hls(self):
|
||||
"""HLS_PATTERN should return None when no HLS URL in HTML."""
|
||||
import re
|
||||
|
||||
from src.server.providers.streaming.voe import HLS_PATTERN
|
||||
|
||||
html_no_hls = """
|
||||
@@ -997,6 +1002,7 @@ class TestHlsUrlDetection:
|
||||
def test_hls_url_detection_in_provider_flow(self, enhanced_loader, tmp_path):
|
||||
"""Provider should detect and handle HLS URLs from VOE extractor."""
|
||||
import re
|
||||
|
||||
from src.server.providers.streaming.voe import HLS_PATTERN
|
||||
|
||||
# Simulate VOE returning an HLS URL (base64 encoded .m3u8)
|
||||
|
||||
@@ -53,6 +53,9 @@ class MockProvider(Loader):
|
||||
def get_title(self, key):
|
||||
return self._title
|
||||
|
||||
def get_year(self, key) -> int | None:
|
||||
return 2024
|
||||
|
||||
def get_season_episode_count(self, slug):
|
||||
return self._season_episodes
|
||||
|
||||
@@ -66,6 +69,9 @@ class ConcreteMonitoredWrapper(MonitoredProviderWrapper):
|
||||
def unsubscribe_download_progress(self, handler):
|
||||
pass
|
||||
|
||||
def get_year(self, key: str) -> int | None:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_provider():
|
||||
|
||||
@@ -25,6 +25,7 @@ def _loader() -> AniworldLoader:
|
||||
loader = AniworldLoader.__new__(AniworldLoader)
|
||||
loader._KeyHTMLDict = {}
|
||||
loader._EpisodeHTMLDict = {}
|
||||
loader._YearDict = {}
|
||||
loader.ANIWORLD_TO = "https://aniworld.to"
|
||||
loader.DEFAULT_REQUEST_TIMEOUT = 10
|
||||
loader.session = MagicMock()
|
||||
|
||||
Reference in New Issue
Block a user