"""NFO service for creating and managing tvshow.nfo files. This service orchestrates TMDB API calls, XML generation, and media downloads to create complete NFO metadata for TV series. Example: >>> nfo_service = NFOService(tmdb_api_key="key", anime_directory="/anime") >>> await nfo_service.create_tvshow_nfo("Attack on Titan", "/anime/aot", 2013) """ import logging import re import unicodedata from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from lxml import etree from src.core.services.tmdb_client import TMDBAPIError, TMDBClient from src.core.utils.image_downloader import ImageDownloader from src.core.utils.nfo_generator import generate_tvshow_nfo from src.core.utils.nfo_mapper import tmdb_to_nfo_model from src.core.entities.nfo_models import TVShowNFO logger = logging.getLogger(__name__) class NFOService: """Service for creating and managing tvshow.nfo files. Attributes: tmdb_client: TMDB API client image_downloader: Image downloader utility anime_directory: Base directory for anime series """ def __init__( self, tmdb_api_key: str, anime_directory: str, image_size: str = "original", auto_create: bool = True ): """Initialize NFO service. Args: tmdb_api_key: TMDB API key anime_directory: Base anime directory path image_size: Image size to download (original, w500, etc.) auto_create: Whether to auto-create NFOs """ self.tmdb_client = TMDBClient(api_key=tmdb_api_key) self.image_downloader = ImageDownloader() self.anime_directory = Path(anime_directory) self.image_size = image_size self.auto_create = auto_create async def __aenter__(self) -> "NFOService": """Enter async context manager.""" await self.tmdb_client.__aenter__() await self.image_downloader.__aenter__() return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Exit async context manager and cleanup resources.""" await self.tmdb_client.close() await self.image_downloader.close() return False def has_nfo(self, serie_folder: str) -> bool: """Check if tvshow.nfo exists for a series. Args: serie_folder: Series folder name Returns: True if NFO file exists """ nfo_path = self.anime_directory / serie_folder / "tvshow.nfo" return nfo_path.exists() @staticmethod def _extract_year_from_name(serie_name: str) -> Tuple[str, Optional[int]]: """Extract year from series name if present in format 'Name (YYYY)'. Args: serie_name: Series name, possibly with year in parentheses Returns: Tuple of (clean_name, year) - clean_name: Series name without year - year: Extracted year or None Examples: >>> _extract_year_from_name("Attack on Titan (2013)") ("Attack on Titan", 2013) >>> _extract_year_from_name("Attack on Titan") ("Attack on Titan", None) """ # Match the last year in parentheses at the end: (YYYY) match = re.search(r'\((\d{4})\)\s*$', serie_name) if match: year = int(match.group(1)) # Strip ALL trailing year suffixes to get a fully clean name clean_name = re.sub(r'(\s*\(\d{4}\))+\s*$', '', serie_name).strip() return clean_name, year return serie_name, None async def check_nfo_exists(self, serie_folder: str) -> bool: """Check if tvshow.nfo exists for a series. Args: serie_folder: Series folder name Returns: True if tvshow.nfo exists """ nfo_path = self.anime_directory / serie_folder / "tvshow.nfo" return nfo_path.exists() async def create_tvshow_nfo( self, serie_name: str, serie_folder: str, year: Optional[int] = None, download_poster: bool = True, download_logo: bool = True, download_fanart: bool = True, alt_titles: Optional[List[str]] = None ) -> Path: """Create tvshow.nfo by scraping TMDB. Args: serie_name: Name of the series to search (may include year in parentheses) serie_folder: Series folder name year: Release year (helps narrow search). If None and name contains year, year will be auto-extracted download_poster: Whether to download poster.jpg download_logo: Whether to download logo.png download_fanart: Whether to download fanart.jpg alt_titles: Alternative titles (e.g., Japanese title) for fallback search Returns: Path to created NFO file Raises: TMDBAPIError: If TMDB API fails FileNotFoundError: If series folder doesn't exist """ # Extract year from name if not provided clean_name, extracted_year = self._extract_year_from_name(serie_name) if year is None and extracted_year is not None: year = extracted_year logger.info("Extracted year %s from series name", year) # Use clean name for search search_name = clean_name logger.info("Creating NFO for %s (year: %s)", search_name, year) folder_path = self.anime_directory / serie_folder if not folder_path.exists(): logger.info("Creating series folder: %s", folder_path) folder_path.mkdir(parents=True, exist_ok=True) try: await self.tmdb_client._ensure_session() # Search for TV show - try multiple strategies tv_show, search_source = await self._search_with_fallback( search_name, year, alt_titles ) tv_id = tv_show["id"] logger.info("Found match: %s (ID: %s)", tv_show['name'], tv_id) # Get detailed information with multi-language image support details = await self.tmdb_client.get_tv_show_details( tv_id, append_to_response="credits,external_ids,images" ) # Get content ratings for FSK content_ratings = await self.tmdb_client.get_tv_show_content_ratings(tv_id) # Enrich with fallback languages for empty overview/tagline # Pass search result overview as last resort fallback search_overview = tv_show.get("overview") or None if not search_overview: try: logger.debug( "No overview in German search result, trying en-US search fallback for: %s", search_name, ) en_search_results = await self.tmdb_client.search_tv_show( search_name, language="en-US", ) if en_search_results.get("results"): en_match = self._find_best_match( en_search_results["results"], search_name, year ) search_overview = en_match.get("overview") or None if search_overview: logger.info( "Using en-US search overview fallback for %s", search_name, ) except (TMDBAPIError, Exception) as exc: logger.warning( "Failed en-US search fallback for overview: %s", exc, ) details = await self._enrich_details_with_fallback( details, search_overview=search_overview ) # Convert TMDB data to TVShowNFO model nfo_model = tmdb_to_nfo_model( details, content_ratings, self.tmdb_client.get_image_url, self.image_size, ) # Generate XML nfo_xml = generate_tvshow_nfo(nfo_model) # Save NFO file nfo_path = folder_path / "tvshow.nfo" nfo_path.write_text(nfo_xml, encoding="utf-8") logger.info("Created NFO: %s", nfo_path) # Download media files await self._download_media_files( details, folder_path, download_poster=download_poster, download_logo=download_logo, download_fanart=download_fanart ) return nfo_path finally: await self.tmdb_client.close() async def update_tvshow_nfo( self, serie_folder: str, download_media: bool = True ) -> Path: """Update existing tvshow.nfo with fresh data from TMDB. Args: serie_folder: Series folder name download_media: Whether to re-download media files Returns: Path to updated NFO file Raises: FileNotFoundError: If NFO file doesn't exist TMDBAPIError: If TMDB API fails or no TMDB ID found in NFO """ folder_path = self.anime_directory / serie_folder nfo_path = folder_path / "tvshow.nfo" if not nfo_path.exists(): raise FileNotFoundError(f"NFO file not found: {nfo_path}") logger.info("Updating NFO for %s", serie_folder) # Parse existing NFO to extract TMDB ID try: tree = etree.parse(str(nfo_path)) root = tree.getroot() # Try to find TMDB ID from uniqueid elements tmdb_id = None for uniqueid in root.findall(".//uniqueid"): if uniqueid.get("type") == "tmdb": tmdb_id = int(uniqueid.text) break # Fallback: check for tmdbid element if tmdb_id is None: tmdbid_elem = root.find(".//tmdbid") if tmdbid_elem is not None and tmdbid_elem.text: tmdb_id = int(tmdbid_elem.text) if tmdb_id is None: raise TMDBAPIError( f"No TMDB ID found in existing NFO. " f"Delete the NFO and create a new one instead." ) logger.debug("Found TMDB ID: %s", tmdb_id) except etree.XMLSyntaxError as e: raise TMDBAPIError(f"Invalid XML in NFO file: {e}") except ValueError as e: raise TMDBAPIError(f"Invalid TMDB ID format in NFO: {e}") try: await self.tmdb_client._ensure_session() logger.debug("Fetching fresh data for TMDB ID: %s", tmdb_id) details = await self.tmdb_client.get_tv_show_details( tmdb_id, append_to_response="credits,external_ids,images" ) # Get content ratings for FSK content_ratings = await self.tmdb_client.get_tv_show_content_ratings(tmdb_id) # Enrich with fallback languages for empty overview/tagline details = await self._enrich_details_with_fallback(details) # Convert TMDB data to TVShowNFO model nfo_model = tmdb_to_nfo_model( details, content_ratings, self.tmdb_client.get_image_url, self.image_size, ) # Generate XML nfo_xml = generate_tvshow_nfo(nfo_model) # Save updated NFO file nfo_path.write_text(nfo_xml, encoding="utf-8") logger.info("Updated NFO: %s", nfo_path) # Re-download media files if requested if download_media: await self._download_media_files( details, folder_path, download_poster=True, download_logo=True, download_fanart=True ) return nfo_path finally: await self.tmdb_client.close() def parse_nfo_ids(self, nfo_path: Path) -> Dict[str, Optional[int]]: """Parse TMDB ID and TVDB ID from an existing NFO file. Args: nfo_path: Path to tvshow.nfo file Returns: Dictionary with 'tmdb_id' and 'tvdb_id' keys. Values are integers if found, None otherwise. Example: >>> ids = nfo_service.parse_nfo_ids(Path("/anime/series/tvshow.nfo")) >>> print(ids) {'tmdb_id': 1429, 'tvdb_id': 79168} """ result = {"tmdb_id": None, "tvdb_id": None} if not nfo_path.exists(): logger.debug("NFO file not found: %s", nfo_path) return result try: tree = etree.parse(str(nfo_path)) root = tree.getroot() # Try to find TMDB ID from uniqueid elements first for uniqueid in root.findall(".//uniqueid"): uid_type = uniqueid.get("type") uid_text = uniqueid.text if uid_type == "tmdb" and uid_text: try: result["tmdb_id"] = int(uid_text) except ValueError: logger.warning( f"Invalid TMDB ID format in NFO: {uid_text}" ) elif uid_type == "tvdb" and uid_text: try: result["tvdb_id"] = int(uid_text) except ValueError: logger.warning( f"Invalid TVDB ID format in NFO: {uid_text}" ) # Fallback: check for dedicated tmdbid/tvdbid elements if result["tmdb_id"] is None: tmdbid_elem = root.find(".//tmdbid") if tmdbid_elem is not None and tmdbid_elem.text: try: result["tmdb_id"] = int(tmdbid_elem.text) except ValueError: logger.warning( f"Invalid TMDB ID format in tmdbid element: " f"{tmdbid_elem.text}" ) if result["tvdb_id"] is None: tvdbid_elem = root.find(".//tvdbid") if tvdbid_elem is not None and tvdbid_elem.text: try: result["tvdb_id"] = int(tvdbid_elem.text) except ValueError: logger.warning( f"Invalid TVDB ID format in tvdbid element: " f"{tvdbid_elem.text}" ) logger.debug( f"Parsed IDs from NFO: {nfo_path.name} - " f"TMDB: {result['tmdb_id']}, TVDB: {result['tvdb_id']}" ) except etree.XMLSyntaxError as e: logger.error("Invalid XML in NFO file %s: %s", nfo_path, e) except Exception as e: # pylint: disable=broad-except logger.error("Error parsing NFO file %s: %s", nfo_path, e) return result def parse_nfo_year(self, nfo_path: Path) -> Optional[int]: """Parse year from an existing NFO file. Extracts year from or elements. Args: nfo_path: Path to tvshow.nfo file Returns: Year as integer if found, None otherwise. Example: >>> year = nfo_service.parse_nfo_year(Path("/anime/series/tvshow.nfo")) >>> print(year) 2013 """ if not nfo_path.exists(): logger.debug("NFO file not found: %s", nfo_path) return None try: tree = etree.parse(str(nfo_path)) root = tree.getroot() # Try element first year_elem = root.find(".//year") if year_elem is not None and year_elem.text: try: year = int(year_elem.text) if 1900 <= year <= 2100: logger.debug("Found year in NFO: %d", year) return year except ValueError: pass # Fallback: try element (format: YYYY-MM-DD) premiered_elem = root.find(".//premiered") if premiered_elem is not None and premiered_elem.text: if premiered_elem.text and len(premiered_elem.text) >= 4: try: year = int(premiered_elem.text[:4]) if 1900 <= year <= 2100: logger.debug("Found year from premiered in NFO: %d", year) return year except ValueError: pass logger.debug("No year found in NFO: %s", nfo_path) except etree.XMLSyntaxError as e: logger.error("Invalid XML in NFO file %s: %s", nfo_path, e) except Exception as e: # pylint: disable=broad-except logger.error("Error parsing year from NFO file %s: %s", nfo_path, e) return None async def _enrich_details_with_fallback( self, details: Dict[str, Any], search_overview: Optional[str] = None, ) -> Dict[str, Any]: """Enrich TMDB details with fallback languages for empty fields. When requesting details in ``de-DE``, some anime have an empty ``overview`` (and potentially other translatable fields). This method detects empty values and fills them from alternative languages (``en-US``, then ``ja-JP``) so that NFO files always contain a ``plot`` regardless of whether the German translation exists. As a last resort, the overview from the search result is used. Args: details: TMDB TV show details (language ``de-DE``). search_overview: Overview text from the TMDB search result, used as a final fallback if all language-specific requests fail or return empty overviews. Returns: The *same* dict, mutated in-place with fallback values where needed. """ overview = details.get("overview") or "" if overview: # Overview already populated – nothing to do. return details tmdb_id = details.get("id") fallback_languages = ["en-US", "ja-JP"] for lang in fallback_languages: if details.get("overview"): break logger.debug( "Trying %s fallback for TMDB ID %s", lang, tmdb_id, ) try: lang_details = await self.tmdb_client.get_tv_show_details( tmdb_id, language=lang, ) if not details.get("overview") and lang_details.get("overview"): details["overview"] = lang_details["overview"] logger.info( "Used %s overview fallback for TMDB ID %s", lang, tmdb_id, ) # Also fill tagline if missing if not details.get("tagline") and lang_details.get("tagline"): details["tagline"] = lang_details["tagline"] except Exception as exc: # pylint: disable=broad-except logger.warning( "Failed to fetch %s fallback for TMDB ID %s: %s", lang, tmdb_id, exc, ) # Last resort: use search result overview if not details.get("overview") and search_overview: details["overview"] = search_overview logger.info( "Used search result overview fallback for TMDB ID %s", tmdb_id, ) return details def _find_best_match( self, results: List[Dict[str, Any]], query: str, year: Optional[int] = None ) -> Dict[str, Any]: """Find best matching TV show from search results. Args: results: TMDB search results query: Original search query year: Expected release year Returns: Best matching TV show data """ if not results: raise TMDBAPIError("No search results to match") # If year is provided, try to find exact match if year: for result in results: first_air_date = result.get("first_air_date", "") if first_air_date.startswith(str(year)): logger.debug("Found year match: %s (%s)", result['name'], first_air_date) return result # Return first result (usually best match) return results[0] async def _search_with_fallback( self, primary_query: str, year: Optional[int], alt_titles: Optional[List[str]] = None ) -> Tuple[Dict[str, Any], str]: """Search TMDB with fallback strategies. Tries multiple search strategies in order: 1. Primary query with year filter 2. Alternative titles (e.g., Japanese name) 3. Multi-language search (en-US) 4. Search without year constraint 5. Punctuation-normalized search Args: primary_query: Primary search term year: Release year for filtering alt_titles: Alternative titles to try if primary fails Returns: Tuple of (matched TV show dict, source description string) Raises: TMDBAPIError: If all search strategies fail """ search_strategies = [ # Strategy 1: Primary query as-is {"query": primary_query, "year": year, "lang": "de-DE", "desc": "primary"}, ] # Strategy 2: Try alt titles (typically Japanese) if alt_titles: for alt in alt_titles: if alt != primary_query: search_strategies.append( {"query": alt, "year": year, "lang": "ja-JP", "desc": f"alt_title:{alt}"} ) search_strategies.append( {"query": alt, "year": year, "lang": "en-US", "desc": f"alt_title:{alt}"} ) # Strategy 3: Try English search search_strategies.append( {"query": primary_query, "year": year, "lang": "en-US", "desc": "english"} ) # Strategy 4: Try without year constraint if year: search_strategies.append( {"query": primary_query, "year": None, "lang": "de-DE", "desc": "no_year"} ) # Strategy 5: Normalize punctuation normalized = self._normalize_query_for_search(primary_query) if normalized != primary_query: search_strategies.append( {"query": normalized, "year": year, "lang": "de-DE", "desc": f"normalized:{normalized}"} ) last_error = None for strategy in search_strategies: query = strategy["query"] lang = strategy["lang"] desc = strategy["desc"] try: logger.debug( "TMDB search attempt: query='%s', lang=%s, year=%s, strategy=%s", query, lang, strategy["year"], desc ) search_results = await self.tmdb_client.search_tv_show( query, language=lang ) if search_results.get("results"): # Apply year filter if we have one results = search_results["results"] if strategy["year"]: year_filtered = [ r for r in results if r.get("first_air_date", "").startswith(str(strategy["year"])) ] if year_filtered: match = year_filtered[0] else: # Year didn't match, still use first result but log it match = results[0] logger.debug( "Year %s not found in results for '%s', using: %s", strategy["year"], query, match["name"] ) else: match = results[0] logger.info( "TMDB search succeeded: '%s' found via strategy '%s' (ID: %s)", match["name"], desc, match["id"] ) return match, desc else: logger.debug("No results for '%s' via %s", query, desc) except TMDBAPIError as e: last_error = e logger.debug("Search strategy '%s' failed: %s", desc, e) continue # All strategies exhausted raise TMDBAPIError( f"No results found for: {primary_query} (tried {len(search_strategies)} strategies)" ) def _normalize_query_for_search(self, query: str) -> str: """Normalize query by removing punctuation and special chars. Args: query: Original search query Returns: Query with punctuation removed """ # Remove common punctuation but keep CJK characters normalized = unicodedata.normalize('NFKC', query) # Remove punctuation but not CJK normalized = re.sub(r'[^\w\s\u3000-\u9fff\u4e00-\u9faf]', '', normalized) # Collapse multiple spaces normalized = re.sub(r'\s+', ' ', normalized).strip() return normalized async def _download_media_files( self, tmdb_data: Dict[str, Any], folder_path: Path, download_poster: bool = True, download_logo: bool = True, download_fanart: bool = True ) -> Dict[str, bool]: """Download media files (poster, logo, fanart). Args: tmdb_data: TMDB TV show details folder_path: Series folder path download_poster: Download poster.jpg download_logo: Download logo.png download_fanart: Download fanart.jpg Returns: Dictionary with download status for each file """ poster_url = None logo_url = None fanart_url = None # Get poster URL if download_poster and tmdb_data.get("poster_path"): poster_url = self.tmdb_client.get_image_url( tmdb_data["poster_path"], self.image_size ) # Get fanart URL if download_fanart and tmdb_data.get("backdrop_path"): fanart_url = self.tmdb_client.get_image_url( tmdb_data["backdrop_path"], "original" # Always use original for fanart ) # Get logo URL if download_logo: images_data = tmdb_data.get("images", {}) logos = images_data.get("logos", []) if logos: logo_url = self.tmdb_client.get_image_url( logos[0]["file_path"], "original" # Logos should be original size ) # Download all media concurrently results = await self.image_downloader.download_all_media( folder_path, poster_url=poster_url, logo_url=logo_url, fanart_url=fanart_url, skip_existing=True ) logger.info("Media download results: %s", results) return results async def close(self): """Clean up resources.""" await self.tmdb_client.close() async def create_minimal_nfo( self, serie_name: str, serie_folder: str, year: Optional[int] = None ) -> Path: """Create minimal tvshow.nfo when TMDB lookup fails. Creates a basic NFO with just the title (and year if available) so the series is tracked even without TMDB metadata. Args: serie_name: Name of the series (may include year in parentheses) serie_folder: Series folder name year: Optional release year Returns: Path to created NFO file Raises: FileNotFoundError: If series folder doesn't exist """ # Extract year from name if not provided clean_name, extracted_year = self._extract_year_from_name(serie_name) if year is None and extracted_year is not None: year = extracted_year folder_path = self.anime_directory / serie_folder if not folder_path.exists(): logger.info("Creating series folder: %s", folder_path) folder_path.mkdir(parents=True, exist_ok=True) # Create minimal NFO model with just title and year nfo_model = TVShowNFO( title=clean_name, year=year, plot=f"No metadata available for {clean_name}. TMDB lookup failed." ) # Generate XML nfo_xml = generate_tvshow_nfo(nfo_model) # Save NFO file nfo_path = folder_path / "tvshow.nfo" nfo_path.write_text(nfo_xml, encoding="utf-8") logger.info("Created minimal NFO (no TMDB): %s", nfo_path) return nfo_path