Files
Aniworld/src/core/services/nfo_service.py
Lukas d596902ca3 Parse existing NFO for TMDB ID to skip redundant search
Check existing tvshow.nfo for TMDB ID before querying TMDB API.
If found, fetch details directly using cached ID instead of searching.
Reduces API calls and improves performance for already-indexed series.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-27 21:22:24 +02:00

892 lines
33 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""NFO service for creating and managing tvshow.nfo files.
This service orchestrates TMDB API calls, XML generation, and media downloads
to create complete NFO metadata for TV series.
Example:
>>> nfo_service = NFOService(tmdb_api_key="key", anime_directory="/anime")
>>> await nfo_service.create_tvshow_nfo("Attack on Titan", "/anime/aot", 2013)
"""
import logging
import re
import unicodedata
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from lxml import etree
from src.core.services.tmdb_client import TMDBAPIError, TMDBClient
from src.core.utils.image_downloader import ImageDownloader
from src.core.utils.nfo_generator import generate_tvshow_nfo
from src.core.utils.nfo_mapper import tmdb_to_nfo_model
from src.core.entities.nfo_models import TVShowNFO
logger = logging.getLogger(__name__)
class NFOService:
"""Service for creating and managing tvshow.nfo files.
Attributes:
tmdb_client: TMDB API client
image_downloader: Image downloader utility
anime_directory: Base directory for anime series
"""
def __init__(
self,
tmdb_api_key: str,
anime_directory: str,
image_size: str = "original",
auto_create: bool = True
):
"""Initialize NFO service.
Args:
tmdb_api_key: TMDB API key
anime_directory: Base anime directory path
image_size: Image size to download (original, w500, etc.)
auto_create: Whether to auto-create NFOs
"""
self.tmdb_client = TMDBClient(api_key=tmdb_api_key)
self.image_downloader = ImageDownloader()
self.anime_directory = Path(anime_directory)
self.image_size = image_size
self.auto_create = auto_create
async def __aenter__(self) -> "NFOService":
"""Enter async context manager."""
await self.tmdb_client.__aenter__()
await self.image_downloader.__aenter__()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Exit async context manager and cleanup resources."""
await self.tmdb_client.close()
await self.image_downloader.close()
return False
def has_nfo(self, serie_folder: str) -> bool:
"""Check if tvshow.nfo exists for a series.
Args:
serie_folder: Series folder name
Returns:
True if NFO file exists
"""
nfo_path = self.anime_directory / serie_folder / "tvshow.nfo"
return nfo_path.exists()
@staticmethod
def _extract_year_from_name(serie_name: str) -> Tuple[str, Optional[int]]:
"""Extract year from series name if present in format 'Name (YYYY)'.
Args:
serie_name: Series name, possibly with year in parentheses
Returns:
Tuple of (clean_name, year)
- clean_name: Series name without year
- year: Extracted year or None
Examples:
>>> _extract_year_from_name("Attack on Titan (2013)")
("Attack on Titan", 2013)
>>> _extract_year_from_name("Attack on Titan")
("Attack on Titan", None)
"""
# Match the last year in parentheses at the end: (YYYY)
match = re.search(r'\((\d{4})\)\s*$', serie_name)
if match:
year = int(match.group(1))
# Strip ALL trailing year suffixes to get a fully clean name
clean_name = re.sub(r'(\s*\(\d{4}\))+\s*$', '', serie_name).strip()
return clean_name, year
return serie_name, None
async def check_nfo_exists(self, serie_folder: str) -> bool:
"""Check if tvshow.nfo exists for a series.
Args:
serie_folder: Series folder name
Returns:
True if tvshow.nfo exists
"""
nfo_path = self.anime_directory / serie_folder / "tvshow.nfo"
return nfo_path.exists()
async def create_tvshow_nfo(
self,
serie_name: str,
serie_folder: str,
year: Optional[int] = None,
download_poster: bool = True,
download_logo: bool = True,
download_fanart: bool = True,
alt_titles: Optional[List[str]] = None
) -> Path:
"""Create tvshow.nfo by scraping TMDB.
Args:
serie_name: Name of the series to search (may include year in parentheses)
serie_folder: Series folder name
year: Release year (helps narrow search). If None and name contains year,
year will be auto-extracted
download_poster: Whether to download poster.jpg
download_logo: Whether to download logo.png
download_fanart: Whether to download fanart.jpg
alt_titles: Alternative titles (e.g., Japanese title) for fallback search
Returns:
Path to created NFO file
Raises:
TMDBAPIError: If TMDB API fails
FileNotFoundError: If series folder doesn't exist
"""
# Extract year from name if not provided
clean_name, extracted_year = self._extract_year_from_name(serie_name)
if year is None and extracted_year is not None:
year = extracted_year
logger.info("Extracted year %s from series name", year)
# Use clean name for search
search_name = clean_name
logger.info("Creating NFO for %s (year: %s)", search_name, year)
folder_path = self.anime_directory / serie_folder
if not folder_path.exists():
logger.info("Creating series folder: %s", folder_path)
folder_path.mkdir(parents=True, exist_ok=True)
# Check for existing NFO with TMDB ID to skip search
nfo_path = folder_path / "tvshow.nfo"
existing_ids = None
if nfo_path.exists():
try:
existing_ids = self.parse_nfo_ids(nfo_path)
if existing_ids.get("tmdb_id"):
logger.info(
"Found existing TMDB ID %s in NFO, using directly",
existing_ids["tmdb_id"]
)
except Exception as e:
logger.debug("Could not parse existing NFO IDs: %s", e)
try:
await self.tmdb_client._ensure_session()
# Use existing TMDB ID if found, otherwise search
if existing_ids and existing_ids.get("tmdb_id"):
tv_id = existing_ids["tmdb_id"]
logger.info("Fetching details directly for TMDB ID: %s", tv_id)
details = await self.tmdb_client.get_tv_show_details(
tv_id,
append_to_response="credits,external_ids,images"
)
content_ratings = await self.tmdb_client.get_tv_show_content_ratings(tv_id)
tv_show = {"id": tv_id, "name": details.get("name", serie_name)}
search_source = "nfo_override"
else:
# Search for TV show - try multiple strategies
tv_show, search_source = await self._search_with_fallback(
search_name, year, alt_titles
)
tv_id = tv_show["id"]
logger.info("Found match: %s (ID: %s)", tv_show['name'], tv_id)
# Get detailed information with multi-language image support
# Skip if we already fetched details via nfo_override
if search_source != "nfo_override":
details = await self.tmdb_client.get_tv_show_details(
tv_id,
append_to_response="credits,external_ids,images"
)
# Get content ratings for FSK
content_ratings = await self.tmdb_client.get_tv_show_content_ratings(tv_id)
# Enrich with fallback languages for empty overview/tagline
# Pass search result overview as last resort fallback
search_overview = tv_show.get("overview") or None
if not search_overview:
try:
logger.debug(
"No overview in German search result, trying en-US search fallback for: %s",
search_name,
)
en_search_results = await self.tmdb_client.search_tv_show(
search_name,
language="en-US",
)
if en_search_results.get("results"):
en_match = self._find_best_match(
en_search_results["results"], search_name, year
)
search_overview = en_match.get("overview") or None
if search_overview:
logger.info(
"Using en-US search overview fallback for %s",
search_name,
)
except (TMDBAPIError, Exception) as exc:
logger.warning(
"Failed en-US search fallback for overview: %s",
exc,
)
details = await self._enrich_details_with_fallback(
details, search_overview=search_overview
)
else:
# When using nfo_override, content_ratings already fetched
pass
# Convert TMDB data to TVShowNFO model
nfo_model = tmdb_to_nfo_model(
details,
content_ratings,
self.tmdb_client.get_image_url,
self.image_size,
)
# Generate XML
nfo_xml = generate_tvshow_nfo(nfo_model)
# Save NFO file
nfo_path = folder_path / "tvshow.nfo"
nfo_path.write_text(nfo_xml, encoding="utf-8")
logger.info("Created NFO: %s", nfo_path)
# Download media files
await self._download_media_files(
details,
folder_path,
download_poster=download_poster,
download_logo=download_logo,
download_fanart=download_fanart
)
return nfo_path
finally:
await self.tmdb_client.close()
async def update_tvshow_nfo(
self,
serie_folder: str,
download_media: bool = True
) -> Path:
"""Update existing tvshow.nfo with fresh data from TMDB.
Args:
serie_folder: Series folder name
download_media: Whether to re-download media files
Returns:
Path to updated NFO file
Raises:
FileNotFoundError: If NFO file doesn't exist
TMDBAPIError: If TMDB API fails or no TMDB ID found in NFO
"""
folder_path = self.anime_directory / serie_folder
nfo_path = folder_path / "tvshow.nfo"
if not nfo_path.exists():
raise FileNotFoundError(f"NFO file not found: {nfo_path}")
logger.info("Updating NFO for %s", serie_folder)
# Parse existing NFO to extract TMDB ID
try:
tree = etree.parse(str(nfo_path))
root = tree.getroot()
# Try to find TMDB ID from uniqueid elements
tmdb_id = None
for uniqueid in root.findall(".//uniqueid"):
if uniqueid.get("type") == "tmdb":
tmdb_id = int(uniqueid.text)
break
# Fallback: check for tmdbid element
if tmdb_id is None:
tmdbid_elem = root.find(".//tmdbid")
if tmdbid_elem is not None and tmdbid_elem.text:
tmdb_id = int(tmdbid_elem.text)
if tmdb_id is None:
raise TMDBAPIError(
f"No TMDB ID found in existing NFO. "
f"Delete the NFO and create a new one instead."
)
logger.debug("Found TMDB ID: %s", tmdb_id)
except etree.XMLSyntaxError as e:
raise TMDBAPIError(f"Invalid XML in NFO file: {e}")
except ValueError as e:
raise TMDBAPIError(f"Invalid TMDB ID format in NFO: {e}")
try:
await self.tmdb_client._ensure_session()
logger.debug("Fetching fresh data for TMDB ID: %s", tmdb_id)
details = await self.tmdb_client.get_tv_show_details(
tmdb_id,
append_to_response="credits,external_ids,images"
)
# Get content ratings for FSK
content_ratings = await self.tmdb_client.get_tv_show_content_ratings(tmdb_id)
# Enrich with fallback languages for empty overview/tagline
details = await self._enrich_details_with_fallback(details)
# Convert TMDB data to TVShowNFO model
nfo_model = tmdb_to_nfo_model(
details,
content_ratings,
self.tmdb_client.get_image_url,
self.image_size,
)
# Generate XML
nfo_xml = generate_tvshow_nfo(nfo_model)
# Save updated NFO file
nfo_path.write_text(nfo_xml, encoding="utf-8")
logger.info("Updated NFO: %s", nfo_path)
# Re-download media files if requested
if download_media:
await self._download_media_files(
details,
folder_path,
download_poster=True,
download_logo=True,
download_fanart=True
)
return nfo_path
finally:
await self.tmdb_client.close()
def parse_nfo_ids(self, nfo_path: Path) -> Dict[str, Optional[int]]:
"""Parse TMDB ID and TVDB ID from an existing NFO file.
Args:
nfo_path: Path to tvshow.nfo file
Returns:
Dictionary with 'tmdb_id' and 'tvdb_id' keys.
Values are integers if found, None otherwise.
Example:
>>> ids = nfo_service.parse_nfo_ids(Path("/anime/series/tvshow.nfo"))
>>> print(ids)
{'tmdb_id': 1429, 'tvdb_id': 79168}
"""
result = {"tmdb_id": None, "tvdb_id": None}
if not nfo_path.exists():
logger.debug("NFO file not found: %s", nfo_path)
return result
try:
tree = etree.parse(str(nfo_path))
root = tree.getroot()
# Try to find TMDB ID from uniqueid elements first
for uniqueid in root.findall(".//uniqueid"):
uid_type = uniqueid.get("type")
uid_text = uniqueid.text
if uid_type == "tmdb" and uid_text:
try:
result["tmdb_id"] = int(uid_text)
except ValueError:
logger.warning(
f"Invalid TMDB ID format in NFO: {uid_text}"
)
elif uid_type == "tvdb" and uid_text:
try:
result["tvdb_id"] = int(uid_text)
except ValueError:
logger.warning(
f"Invalid TVDB ID format in NFO: {uid_text}"
)
# Fallback: check for dedicated tmdbid/tvdbid elements
if result["tmdb_id"] is None:
tmdbid_elem = root.find(".//tmdbid")
if tmdbid_elem is not None and tmdbid_elem.text:
try:
result["tmdb_id"] = int(tmdbid_elem.text)
except ValueError:
logger.warning(
f"Invalid TMDB ID format in tmdbid element: "
f"{tmdbid_elem.text}"
)
if result["tvdb_id"] is None:
tvdbid_elem = root.find(".//tvdbid")
if tvdbid_elem is not None and tvdbid_elem.text:
try:
result["tvdb_id"] = int(tvdbid_elem.text)
except ValueError:
logger.warning(
f"Invalid TVDB ID format in tvdbid element: "
f"{tvdbid_elem.text}"
)
logger.debug(
f"Parsed IDs from NFO: {nfo_path.name} - "
f"TMDB: {result['tmdb_id']}, TVDB: {result['tvdb_id']}"
)
except etree.XMLSyntaxError as e:
logger.error("Invalid XML in NFO file %s: %s", nfo_path, e)
except Exception as e: # pylint: disable=broad-except
logger.error("Error parsing NFO file %s: %s", nfo_path, e)
return result
def parse_nfo_year(self, nfo_path: Path) -> Optional[int]:
"""Parse year from an existing NFO file.
Extracts year from <year> or <premiered> elements.
Args:
nfo_path: Path to tvshow.nfo file
Returns:
Year as integer if found, None otherwise.
Example:
>>> year = nfo_service.parse_nfo_year(Path("/anime/series/tvshow.nfo"))
>>> print(year)
2013
"""
if not nfo_path.exists():
logger.debug("NFO file not found: %s", nfo_path)
return None
try:
tree = etree.parse(str(nfo_path))
root = tree.getroot()
# Try <year> element first
year_elem = root.find(".//year")
if year_elem is not None and year_elem.text:
try:
year = int(year_elem.text)
if 1900 <= year <= 2100:
logger.debug("Found year in NFO: %d", year)
return year
except ValueError:
pass
# Fallback: try <premiered> element (format: YYYY-MM-DD)
premiered_elem = root.find(".//premiered")
if premiered_elem is not None and premiered_elem.text:
if premiered_elem.text and len(premiered_elem.text) >= 4:
try:
year = int(premiered_elem.text[:4])
if 1900 <= year <= 2100:
logger.debug("Found year from premiered in NFO: %d", year)
return year
except ValueError:
pass
logger.debug("No year found in NFO: %s", nfo_path)
except etree.XMLSyntaxError as e:
logger.error("Invalid XML in NFO file %s: %s", nfo_path, e)
except Exception as e: # pylint: disable=broad-except
logger.error("Error parsing year from NFO file %s: %s", nfo_path, e)
return None
async def _enrich_details_with_fallback(
self,
details: Dict[str, Any],
search_overview: Optional[str] = None,
) -> Dict[str, Any]:
"""Enrich TMDB details with fallback languages for empty fields.
When requesting details in ``de-DE``, some anime have an empty
``overview`` (and potentially other translatable fields). This
method detects empty values and fills them from alternative
languages (``en-US``, then ``ja-JP``) so that NFO files always
contain a ``plot`` regardless of whether the German translation
exists. As a last resort, the overview from the search result
is used.
Args:
details: TMDB TV show details (language ``de-DE``).
search_overview: Overview text from the TMDB search result,
used as a final fallback if all language-specific
requests fail or return empty overviews.
Returns:
The *same* dict, mutated in-place with fallback values
where needed.
"""
overview = details.get("overview") or ""
if overview:
# Overview already populated nothing to do.
return details
tmdb_id = details.get("id")
fallback_languages = ["en-US", "ja-JP"]
for lang in fallback_languages:
if details.get("overview"):
break
logger.debug(
"Trying %s fallback for TMDB ID %s",
lang, tmdb_id,
)
try:
lang_details = await self.tmdb_client.get_tv_show_details(
tmdb_id,
language=lang,
)
if not details.get("overview") and lang_details.get("overview"):
details["overview"] = lang_details["overview"]
logger.info(
"Used %s overview fallback for TMDB ID %s",
lang, tmdb_id,
)
# Also fill tagline if missing
if not details.get("tagline") and lang_details.get("tagline"):
details["tagline"] = lang_details["tagline"]
except Exception as exc: # pylint: disable=broad-except
logger.warning(
"Failed to fetch %s fallback for TMDB ID %s: %s",
lang, tmdb_id, exc,
)
# Last resort: use search result overview
if not details.get("overview") and search_overview:
details["overview"] = search_overview
logger.info(
"Used search result overview fallback for TMDB ID %s",
tmdb_id,
)
return details
def _find_best_match(
self,
results: List[Dict[str, Any]],
query: str,
year: Optional[int] = None
) -> Dict[str, Any]:
"""Find best matching TV show from search results.
Args:
results: TMDB search results
query: Original search query
year: Expected release year
Returns:
Best matching TV show data
"""
if not results:
raise TMDBAPIError("No search results to match")
# If year is provided, try to find exact match
if year:
for result in results:
first_air_date = result.get("first_air_date", "")
if first_air_date.startswith(str(year)):
logger.debug("Found year match: %s (%s)", result['name'], first_air_date)
return result
# Return first result (usually best match)
return results[0]
async def _search_with_fallback(
self,
primary_query: str,
year: Optional[int],
alt_titles: Optional[List[str]] = None
) -> Tuple[Dict[str, Any], str]:
"""Search TMDB with fallback strategies.
Tries multiple search strategies in order:
1. Primary query with year filter
2. Alternative titles (e.g., Japanese name)
3. Multi-language search (en-US)
4. Search without year constraint
5. Punctuation-normalized search
Args:
primary_query: Primary search term
year: Release year for filtering
alt_titles: Alternative titles to try if primary fails
Returns:
Tuple of (matched TV show dict, source description string)
Raises:
TMDBAPIError: If all search strategies fail
"""
search_strategies = [
# Strategy 1: Primary query as-is
{"query": primary_query, "year": year, "lang": "de-DE", "desc": "primary"},
]
# Strategy 2: Try alt titles (typically Japanese)
if alt_titles:
for alt in alt_titles:
if alt != primary_query:
search_strategies.append(
{"query": alt, "year": year, "lang": "ja-JP", "desc": f"alt_title:{alt}"}
)
search_strategies.append(
{"query": alt, "year": year, "lang": "en-US", "desc": f"alt_title:{alt}"}
)
# Strategy 3: Try English search
search_strategies.append(
{"query": primary_query, "year": year, "lang": "en-US", "desc": "english"}
)
# Strategy 4: Try without year constraint
if year:
search_strategies.append(
{"query": primary_query, "year": None, "lang": "de-DE", "desc": "no_year"}
)
# Strategy 5: Normalize punctuation
normalized = self._normalize_query_for_search(primary_query)
if normalized != primary_query:
search_strategies.append(
{"query": normalized, "year": year, "lang": "de-DE", "desc": f"normalized:{normalized}"}
)
# Strategy 6: Try search/multi for series indexed as movies
search_strategies.append(
{"query": primary_query, "year": year, "lang": "en-US", "desc": "multi_search", "use_multi": True}
)
last_error = None
for strategy in search_strategies:
query = strategy["query"]
lang = strategy["lang"]
desc = strategy["desc"]
use_multi = strategy.get("use_multi", False)
try:
logger.debug(
"TMDB search attempt: query='%s', lang=%s, year=%s, strategy=%s",
query, lang, strategy["year"], desc
)
# Use search/multi for multi_search strategy
if use_multi:
search_results = await self.tmdb_client.search_multi(
query,
language=lang
)
# Filter for TV shows only
if search_results.get("results"):
tv_results = [
r for r in search_results["results"]
if r.get("media_type") == "tv"
]
if tv_results:
search_results["results"] = tv_results
else:
search_results["results"] = []
else:
search_results = await self.tmdb_client.search_tv_show(
query,
language=lang
)
if search_results.get("results"):
# Apply year filter if we have one
results = search_results["results"]
if strategy["year"]:
year_filtered = [
r for r in results
if r.get("first_air_date", "").startswith(str(strategy["year"]))
]
if year_filtered:
match = year_filtered[0]
else:
# Year didn't match, still use first result but log it
match = results[0]
logger.debug(
"Year %s not found in results for '%s', using: %s",
strategy["year"], query, match["name"]
)
else:
match = results[0]
logger.info(
"TMDB search succeeded: '%s' found via strategy '%s' (ID: %s)",
match["name"], desc, match["id"]
)
return match, desc
else:
logger.debug("No results for '%s' via %s", query, desc)
except TMDBAPIError as e:
last_error = e
logger.debug("Search strategy '%s' failed: %s", desc, e)
continue
# All strategies exhausted
raise TMDBAPIError(
f"No results found for: {primary_query} (tried {len(search_strategies)} strategies)"
)
def _normalize_query_for_search(self, query: str) -> str:
"""Normalize query by removing punctuation and special chars.
Args:
query: Original search query
Returns:
Query with punctuation removed
"""
# Remove common punctuation but keep CJK characters
normalized = unicodedata.normalize('NFKC', query)
# Remove punctuation but not CJK
normalized = re.sub(r'[^\w\s\u3000-\u9fff\u4e00-\u9faf]', '', normalized)
# Collapse multiple spaces
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized
async def _download_media_files(
self,
tmdb_data: Dict[str, Any],
folder_path: Path,
download_poster: bool = True,
download_logo: bool = True,
download_fanart: bool = True
) -> Dict[str, bool]:
"""Download media files (poster, logo, fanart).
Args:
tmdb_data: TMDB TV show details
folder_path: Series folder path
download_poster: Download poster.jpg
download_logo: Download logo.png
download_fanart: Download fanart.jpg
Returns:
Dictionary with download status for each file
"""
poster_url = None
logo_url = None
fanart_url = None
# Get poster URL
if download_poster and tmdb_data.get("poster_path"):
poster_url = self.tmdb_client.get_image_url(
tmdb_data["poster_path"],
self.image_size
)
# Get fanart URL
if download_fanart and tmdb_data.get("backdrop_path"):
fanart_url = self.tmdb_client.get_image_url(
tmdb_data["backdrop_path"],
"original" # Always use original for fanart
)
# Get logo URL
if download_logo:
images_data = tmdb_data.get("images", {})
logos = images_data.get("logos", [])
if logos:
logo_url = self.tmdb_client.get_image_url(
logos[0]["file_path"],
"original" # Logos should be original size
)
# Download all media concurrently
results = await self.image_downloader.download_all_media(
folder_path,
poster_url=poster_url,
logo_url=logo_url,
fanart_url=fanart_url,
skip_existing=True
)
logger.info("Media download results: %s", results)
return results
async def close(self):
"""Clean up resources."""
await self.tmdb_client.close()
await self.image_downloader.close()
async def create_minimal_nfo(
self,
serie_name: str,
serie_folder: str,
year: Optional[int] = None
) -> Path:
"""Create minimal tvshow.nfo when TMDB lookup fails.
Creates a basic NFO with just the title (and year if available)
so the series is tracked even without TMDB metadata.
Args:
serie_name: Name of the series (may include year in parentheses)
serie_folder: Series folder name
year: Optional release year
Returns:
Path to created NFO file
Raises:
FileNotFoundError: If series folder doesn't exist
"""
# Extract year from name if not provided
clean_name, extracted_year = self._extract_year_from_name(serie_name)
if year is None and extracted_year is not None:
year = extracted_year
folder_path = self.anime_directory / serie_folder
if not folder_path.exists():
logger.info("Creating series folder: %s", folder_path)
folder_path.mkdir(parents=True, exist_ok=True)
# Create minimal NFO model with just title and year
nfo_model = TVShowNFO(
title=clean_name,
year=year,
plot=f"No metadata available for {clean_name}. TMDB lookup failed."
)
# Generate XML
nfo_xml = generate_tvshow_nfo(nfo_model)
# Save NFO file
nfo_path = folder_path / "tvshow.nfo"
nfo_path.write_text(nfo_xml, encoding="utf-8")
logger.info("Created minimal NFO (no TMDB): %s", nfo_path)
return nfo_path