feat(NFO): add TMDB search fallback with alt_titles support
- New _search_with_fallback() method tries multiple strategies: 1. Primary query with year filter (de-DE locale) 2. Alternative titles with ja-JP / en-US locales 3. English search (en-US) 4. Search without year constraint 5. Punctuation-normalized query - create_nfo() accepts new alt_titles param for Japanese/title fallback - Better match rate for anime with non-English titles Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@ Example:
|
||||
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
@@ -123,7 +124,8 @@ class NFOService:
|
||||
year: Optional[int] = None,
|
||||
download_poster: bool = True,
|
||||
download_logo: bool = True,
|
||||
download_fanart: bool = True
|
||||
download_fanart: bool = True,
|
||||
alt_titles: Optional[List[str]] = None
|
||||
) -> Path:
|
||||
"""Create tvshow.nfo by scraping TMDB.
|
||||
|
||||
@@ -135,6 +137,7 @@ class NFOService:
|
||||
download_poster: Whether to download poster.jpg
|
||||
download_logo: Whether to download logo.png
|
||||
download_fanart: Whether to download fanart.jpg
|
||||
alt_titles: Alternative titles (e.g., Japanese title) for fallback search
|
||||
|
||||
Returns:
|
||||
Path to created NFO file
|
||||
@@ -161,16 +164,11 @@ class NFOService:
|
||||
|
||||
try:
|
||||
await self.tmdb_client._ensure_session()
|
||||
|
||||
# Search for TV show with clean name (without year)
|
||||
logger.debug("Searching TMDB for: %s", search_name)
|
||||
search_results = await self.tmdb_client.search_tv_show(search_name)
|
||||
|
||||
if not search_results.get("results"):
|
||||
raise TMDBAPIError(f"No results found for: {search_name}")
|
||||
|
||||
# Find best match (consider year if provided)
|
||||
tv_show = self._find_best_match(search_results["results"], search_name, year)
|
||||
|
||||
# Search for TV show - try multiple strategies
|
||||
tv_show, search_source = await self._search_with_fallback(
|
||||
search_name, year, alt_titles
|
||||
)
|
||||
tv_id = tv_show["id"]
|
||||
|
||||
logger.info("Found match: %s (ID: %s)", tv_show['name'], tv_id)
|
||||
@@ -531,6 +529,137 @@ class NFOService:
|
||||
# Return first result (usually best match)
|
||||
return results[0]
|
||||
|
||||
async def _search_with_fallback(
|
||||
self,
|
||||
primary_query: str,
|
||||
year: Optional[int],
|
||||
alt_titles: Optional[List[str]] = None
|
||||
) -> Tuple[Dict[str, Any], str]:
|
||||
"""Search TMDB with fallback strategies.
|
||||
|
||||
Tries multiple search strategies in order:
|
||||
1. Primary query with year filter
|
||||
2. Alternative titles (e.g., Japanese name)
|
||||
3. Multi-language search (en-US)
|
||||
4. Search without year constraint
|
||||
5. Punctuation-normalized search
|
||||
|
||||
Args:
|
||||
primary_query: Primary search term
|
||||
year: Release year for filtering
|
||||
alt_titles: Alternative titles to try if primary fails
|
||||
|
||||
Returns:
|
||||
Tuple of (matched TV show dict, source description string)
|
||||
|
||||
Raises:
|
||||
TMDBAPIError: If all search strategies fail
|
||||
"""
|
||||
search_strategies = [
|
||||
# Strategy 1: Primary query as-is
|
||||
{"query": primary_query, "year": year, "lang": "de-DE", "desc": "primary"},
|
||||
]
|
||||
|
||||
# Strategy 2: Try alt titles (typically Japanese)
|
||||
if alt_titles:
|
||||
for alt in alt_titles:
|
||||
if alt != primary_query:
|
||||
search_strategies.append(
|
||||
{"query": alt, "year": year, "lang": "ja-JP", "desc": f"alt_title:{alt}"}
|
||||
)
|
||||
search_strategies.append(
|
||||
{"query": alt, "year": year, "lang": "en-US", "desc": f"alt_title:{alt}"}
|
||||
)
|
||||
|
||||
# Strategy 3: Try English search
|
||||
search_strategies.append(
|
||||
{"query": primary_query, "year": year, "lang": "en-US", "desc": "english"}
|
||||
)
|
||||
|
||||
# Strategy 4: Try without year constraint
|
||||
if year:
|
||||
search_strategies.append(
|
||||
{"query": primary_query, "year": None, "lang": "de-DE", "desc": "no_year"}
|
||||
)
|
||||
|
||||
# Strategy 5: Normalize punctuation
|
||||
normalized = self._normalize_query_for_search(primary_query)
|
||||
if normalized != primary_query:
|
||||
search_strategies.append(
|
||||
{"query": normalized, "year": year, "lang": "de-DE", "desc": f"normalized:{normalized}"}
|
||||
)
|
||||
|
||||
last_error = None
|
||||
for strategy in search_strategies:
|
||||
query = strategy["query"]
|
||||
lang = strategy["lang"]
|
||||
desc = strategy["desc"]
|
||||
|
||||
try:
|
||||
logger.debug(
|
||||
"TMDB search attempt: query='%s', lang=%s, year=%s, strategy=%s",
|
||||
query, lang, strategy["year"], desc
|
||||
)
|
||||
search_results = await self.tmdb_client.search_tv_show(
|
||||
query,
|
||||
language=lang
|
||||
)
|
||||
|
||||
if search_results.get("results"):
|
||||
# Apply year filter if we have one
|
||||
results = search_results["results"]
|
||||
if strategy["year"]:
|
||||
year_filtered = [
|
||||
r for r in results
|
||||
if r.get("first_air_date", "").startswith(str(strategy["year"]))
|
||||
]
|
||||
if year_filtered:
|
||||
match = year_filtered[0]
|
||||
else:
|
||||
# Year didn't match, still use first result but log it
|
||||
match = results[0]
|
||||
logger.debug(
|
||||
"Year %s not found in results for '%s', using: %s",
|
||||
strategy["year"], query, match["name"]
|
||||
)
|
||||
else:
|
||||
match = results[0]
|
||||
|
||||
logger.info(
|
||||
"TMDB search succeeded: '%s' found via strategy '%s' (ID: %s)",
|
||||
match["name"], desc, match["id"]
|
||||
)
|
||||
return match, desc
|
||||
else:
|
||||
logger.debug("No results for '%s' via %s", query, desc)
|
||||
|
||||
except TMDBAPIError as e:
|
||||
last_error = e
|
||||
logger.debug("Search strategy '%s' failed: %s", desc, e)
|
||||
continue
|
||||
|
||||
# All strategies exhausted
|
||||
raise TMDBAPIError(
|
||||
f"No results found for: {primary_query} (tried {len(search_strategies)} strategies)"
|
||||
)
|
||||
|
||||
def _normalize_query_for_search(self, query: str) -> str:
|
||||
"""Normalize query by removing punctuation and special chars.
|
||||
|
||||
Args:
|
||||
query: Original search query
|
||||
|
||||
Returns:
|
||||
Query with punctuation removed
|
||||
"""
|
||||
# Remove common punctuation but keep CJK characters
|
||||
normalized = unicodedata.normalize('NFKC', query)
|
||||
# Remove punctuation but not CJK
|
||||
normalized = re.sub(r'[^\w\s\u3000-\u9fff\u4e00-\u9faf]', '', normalized)
|
||||
# Collapse multiple spaces
|
||||
normalized = re.sub(r'\s+', ' ', normalized).strip()
|
||||
return normalized
|
||||
|
||||
|
||||
|
||||
async def _download_media_files(
|
||||
|
||||
Reference in New Issue
Block a user