feat(NFO): add TMDB search fallback with alt_titles support

- New _search_with_fallback() method tries multiple strategies:
  1. Primary query with year filter (de-DE locale)
  2. Alternative titles with ja-JP / en-US locales
  3. English search (en-US)
  4. Search without year constraint
  5. Punctuation-normalized query
- create_nfo() accepts new alt_titles param for Japanese/title fallback
- Better match rate for anime with non-English titles

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-23 21:57:00 +02:00
parent 3f7651404d
commit 9a20541598
7 changed files with 588 additions and 43 deletions

View File

@@ -10,6 +10,7 @@ Example:
import logging
import re
import unicodedata
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
@@ -123,7 +124,8 @@ class NFOService:
year: Optional[int] = None,
download_poster: bool = True,
download_logo: bool = True,
download_fanart: bool = True
download_fanart: bool = True,
alt_titles: Optional[List[str]] = None
) -> Path:
"""Create tvshow.nfo by scraping TMDB.
@@ -135,6 +137,7 @@ class NFOService:
download_poster: Whether to download poster.jpg
download_logo: Whether to download logo.png
download_fanart: Whether to download fanart.jpg
alt_titles: Alternative titles (e.g., Japanese title) for fallback search
Returns:
Path to created NFO file
@@ -161,16 +164,11 @@ class NFOService:
try:
await self.tmdb_client._ensure_session()
# Search for TV show with clean name (without year)
logger.debug("Searching TMDB for: %s", search_name)
search_results = await self.tmdb_client.search_tv_show(search_name)
if not search_results.get("results"):
raise TMDBAPIError(f"No results found for: {search_name}")
# Find best match (consider year if provided)
tv_show = self._find_best_match(search_results["results"], search_name, year)
# Search for TV show - try multiple strategies
tv_show, search_source = await self._search_with_fallback(
search_name, year, alt_titles
)
tv_id = tv_show["id"]
logger.info("Found match: %s (ID: %s)", tv_show['name'], tv_id)
@@ -531,6 +529,137 @@ class NFOService:
# Return first result (usually best match)
return results[0]
async def _search_with_fallback(
self,
primary_query: str,
year: Optional[int],
alt_titles: Optional[List[str]] = None
) -> Tuple[Dict[str, Any], str]:
"""Search TMDB with fallback strategies.
Tries multiple search strategies in order:
1. Primary query with year filter
2. Alternative titles (e.g., Japanese name)
3. Multi-language search (en-US)
4. Search without year constraint
5. Punctuation-normalized search
Args:
primary_query: Primary search term
year: Release year for filtering
alt_titles: Alternative titles to try if primary fails
Returns:
Tuple of (matched TV show dict, source description string)
Raises:
TMDBAPIError: If all search strategies fail
"""
search_strategies = [
# Strategy 1: Primary query as-is
{"query": primary_query, "year": year, "lang": "de-DE", "desc": "primary"},
]
# Strategy 2: Try alt titles (typically Japanese)
if alt_titles:
for alt in alt_titles:
if alt != primary_query:
search_strategies.append(
{"query": alt, "year": year, "lang": "ja-JP", "desc": f"alt_title:{alt}"}
)
search_strategies.append(
{"query": alt, "year": year, "lang": "en-US", "desc": f"alt_title:{alt}"}
)
# Strategy 3: Try English search
search_strategies.append(
{"query": primary_query, "year": year, "lang": "en-US", "desc": "english"}
)
# Strategy 4: Try without year constraint
if year:
search_strategies.append(
{"query": primary_query, "year": None, "lang": "de-DE", "desc": "no_year"}
)
# Strategy 5: Normalize punctuation
normalized = self._normalize_query_for_search(primary_query)
if normalized != primary_query:
search_strategies.append(
{"query": normalized, "year": year, "lang": "de-DE", "desc": f"normalized:{normalized}"}
)
last_error = None
for strategy in search_strategies:
query = strategy["query"]
lang = strategy["lang"]
desc = strategy["desc"]
try:
logger.debug(
"TMDB search attempt: query='%s', lang=%s, year=%s, strategy=%s",
query, lang, strategy["year"], desc
)
search_results = await self.tmdb_client.search_tv_show(
query,
language=lang
)
if search_results.get("results"):
# Apply year filter if we have one
results = search_results["results"]
if strategy["year"]:
year_filtered = [
r for r in results
if r.get("first_air_date", "").startswith(str(strategy["year"]))
]
if year_filtered:
match = year_filtered[0]
else:
# Year didn't match, still use first result but log it
match = results[0]
logger.debug(
"Year %s not found in results for '%s', using: %s",
strategy["year"], query, match["name"]
)
else:
match = results[0]
logger.info(
"TMDB search succeeded: '%s' found via strategy '%s' (ID: %s)",
match["name"], desc, match["id"]
)
return match, desc
else:
logger.debug("No results for '%s' via %s", query, desc)
except TMDBAPIError as e:
last_error = e
logger.debug("Search strategy '%s' failed: %s", desc, e)
continue
# All strategies exhausted
raise TMDBAPIError(
f"No results found for: {primary_query} (tried {len(search_strategies)} strategies)"
)
def _normalize_query_for_search(self, query: str) -> str:
"""Normalize query by removing punctuation and special chars.
Args:
query: Original search query
Returns:
Query with punctuation removed
"""
# Remove common punctuation but keep CJK characters
normalized = unicodedata.normalize('NFKC', query)
# Remove punctuation but not CJK
normalized = re.sub(r'[^\w\s\u3000-\u9fff\u4e00-\u9faf]', '', normalized)
# Collapse multiple spaces
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized
async def _download_media_files(