feat(NFO): add TMDB search fallback with alt_titles support
- New _search_with_fallback() method tries multiple strategies: 1. Primary query with year filter (de-DE locale) 2. Alternative titles with ja-JP / en-US locales 3. English search (en-US) 4. Search without year constraint 5. Punctuation-normalized query - create_nfo() accepts new alt_titles param for Japanese/title fallback - Better match rate for anime with non-English titles Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,7 @@ Example:
|
||||
|
||||
import logging
|
||||
import re
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
@@ -123,7 +124,8 @@ class NFOService:
|
||||
year: Optional[int] = None,
|
||||
download_poster: bool = True,
|
||||
download_logo: bool = True,
|
||||
download_fanart: bool = True
|
||||
download_fanart: bool = True,
|
||||
alt_titles: Optional[List[str]] = None
|
||||
) -> Path:
|
||||
"""Create tvshow.nfo by scraping TMDB.
|
||||
|
||||
@@ -135,6 +137,7 @@ class NFOService:
|
||||
download_poster: Whether to download poster.jpg
|
||||
download_logo: Whether to download logo.png
|
||||
download_fanart: Whether to download fanart.jpg
|
||||
alt_titles: Alternative titles (e.g., Japanese title) for fallback search
|
||||
|
||||
Returns:
|
||||
Path to created NFO file
|
||||
@@ -161,16 +164,11 @@ class NFOService:
|
||||
|
||||
try:
|
||||
await self.tmdb_client._ensure_session()
|
||||
|
||||
# Search for TV show with clean name (without year)
|
||||
logger.debug("Searching TMDB for: %s", search_name)
|
||||
search_results = await self.tmdb_client.search_tv_show(search_name)
|
||||
|
||||
if not search_results.get("results"):
|
||||
raise TMDBAPIError(f"No results found for: {search_name}")
|
||||
|
||||
# Find best match (consider year if provided)
|
||||
tv_show = self._find_best_match(search_results["results"], search_name, year)
|
||||
|
||||
# Search for TV show - try multiple strategies
|
||||
tv_show, search_source = await self._search_with_fallback(
|
||||
search_name, year, alt_titles
|
||||
)
|
||||
tv_id = tv_show["id"]
|
||||
|
||||
logger.info("Found match: %s (ID: %s)", tv_show['name'], tv_id)
|
||||
@@ -531,6 +529,137 @@ class NFOService:
|
||||
# Return first result (usually best match)
|
||||
return results[0]
|
||||
|
||||
async def _search_with_fallback(
|
||||
self,
|
||||
primary_query: str,
|
||||
year: Optional[int],
|
||||
alt_titles: Optional[List[str]] = None
|
||||
) -> Tuple[Dict[str, Any], str]:
|
||||
"""Search TMDB with fallback strategies.
|
||||
|
||||
Tries multiple search strategies in order:
|
||||
1. Primary query with year filter
|
||||
2. Alternative titles (e.g., Japanese name)
|
||||
3. Multi-language search (en-US)
|
||||
4. Search without year constraint
|
||||
5. Punctuation-normalized search
|
||||
|
||||
Args:
|
||||
primary_query: Primary search term
|
||||
year: Release year for filtering
|
||||
alt_titles: Alternative titles to try if primary fails
|
||||
|
||||
Returns:
|
||||
Tuple of (matched TV show dict, source description string)
|
||||
|
||||
Raises:
|
||||
TMDBAPIError: If all search strategies fail
|
||||
"""
|
||||
search_strategies = [
|
||||
# Strategy 1: Primary query as-is
|
||||
{"query": primary_query, "year": year, "lang": "de-DE", "desc": "primary"},
|
||||
]
|
||||
|
||||
# Strategy 2: Try alt titles (typically Japanese)
|
||||
if alt_titles:
|
||||
for alt in alt_titles:
|
||||
if alt != primary_query:
|
||||
search_strategies.append(
|
||||
{"query": alt, "year": year, "lang": "ja-JP", "desc": f"alt_title:{alt}"}
|
||||
)
|
||||
search_strategies.append(
|
||||
{"query": alt, "year": year, "lang": "en-US", "desc": f"alt_title:{alt}"}
|
||||
)
|
||||
|
||||
# Strategy 3: Try English search
|
||||
search_strategies.append(
|
||||
{"query": primary_query, "year": year, "lang": "en-US", "desc": "english"}
|
||||
)
|
||||
|
||||
# Strategy 4: Try without year constraint
|
||||
if year:
|
||||
search_strategies.append(
|
||||
{"query": primary_query, "year": None, "lang": "de-DE", "desc": "no_year"}
|
||||
)
|
||||
|
||||
# Strategy 5: Normalize punctuation
|
||||
normalized = self._normalize_query_for_search(primary_query)
|
||||
if normalized != primary_query:
|
||||
search_strategies.append(
|
||||
{"query": normalized, "year": year, "lang": "de-DE", "desc": f"normalized:{normalized}"}
|
||||
)
|
||||
|
||||
last_error = None
|
||||
for strategy in search_strategies:
|
||||
query = strategy["query"]
|
||||
lang = strategy["lang"]
|
||||
desc = strategy["desc"]
|
||||
|
||||
try:
|
||||
logger.debug(
|
||||
"TMDB search attempt: query='%s', lang=%s, year=%s, strategy=%s",
|
||||
query, lang, strategy["year"], desc
|
||||
)
|
||||
search_results = await self.tmdb_client.search_tv_show(
|
||||
query,
|
||||
language=lang
|
||||
)
|
||||
|
||||
if search_results.get("results"):
|
||||
# Apply year filter if we have one
|
||||
results = search_results["results"]
|
||||
if strategy["year"]:
|
||||
year_filtered = [
|
||||
r for r in results
|
||||
if r.get("first_air_date", "").startswith(str(strategy["year"]))
|
||||
]
|
||||
if year_filtered:
|
||||
match = year_filtered[0]
|
||||
else:
|
||||
# Year didn't match, still use first result but log it
|
||||
match = results[0]
|
||||
logger.debug(
|
||||
"Year %s not found in results for '%s', using: %s",
|
||||
strategy["year"], query, match["name"]
|
||||
)
|
||||
else:
|
||||
match = results[0]
|
||||
|
||||
logger.info(
|
||||
"TMDB search succeeded: '%s' found via strategy '%s' (ID: %s)",
|
||||
match["name"], desc, match["id"]
|
||||
)
|
||||
return match, desc
|
||||
else:
|
||||
logger.debug("No results for '%s' via %s", query, desc)
|
||||
|
||||
except TMDBAPIError as e:
|
||||
last_error = e
|
||||
logger.debug("Search strategy '%s' failed: %s", desc, e)
|
||||
continue
|
||||
|
||||
# All strategies exhausted
|
||||
raise TMDBAPIError(
|
||||
f"No results found for: {primary_query} (tried {len(search_strategies)} strategies)"
|
||||
)
|
||||
|
||||
def _normalize_query_for_search(self, query: str) -> str:
|
||||
"""Normalize query by removing punctuation and special chars.
|
||||
|
||||
Args:
|
||||
query: Original search query
|
||||
|
||||
Returns:
|
||||
Query with punctuation removed
|
||||
"""
|
||||
# Remove common punctuation but keep CJK characters
|
||||
normalized = unicodedata.normalize('NFKC', query)
|
||||
# Remove punctuation but not CJK
|
||||
normalized = re.sub(r'[^\w\s\u3000-\u9fff\u4e00-\u9faf]', '', normalized)
|
||||
# Collapse multiple spaces
|
||||
normalized = re.sub(r'\s+', ' ', normalized).strip()
|
||||
return normalized
|
||||
|
||||
|
||||
|
||||
async def _download_media_files(
|
||||
|
||||
@@ -39,6 +39,7 @@ class TMDBClient:
|
||||
|
||||
DEFAULT_BASE_URL = "https://api.themoviedb.org/3"
|
||||
DEFAULT_IMAGE_BASE_URL = "https://image.tmdb.org/t/p"
|
||||
NEGATIVE_CACHE_TTL = 86400 # 24 hours
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@@ -64,6 +65,7 @@ class TMDBClient:
|
||||
self.max_connections = max_connections
|
||||
self.session: Optional[aiohttp.ClientSession] = None
|
||||
self._cache: Dict[str, Any] = {}
|
||||
self._negative_cache: Dict[str, float] = {} # query -> timestamp when cached
|
||||
# TMDB allows ~40 req/s; use 30 concurrent + per-second throttle to stay safe
|
||||
self._semaphore = asyncio.Semaphore(30)
|
||||
self._rate_limit_lock = asyncio.Lock()
|
||||
@@ -116,6 +118,16 @@ class TMDBClient:
|
||||
logger.debug("Cache hit for %s", endpoint)
|
||||
return self._cache[cache_key]
|
||||
|
||||
# Check negative cache (cached empty results)
|
||||
negative_cache_key = f"{endpoint}:{str(sorted(params.items()))}"
|
||||
if negative_cache_key in self._negative_cache:
|
||||
if time.monotonic() - self._negative_cache[negative_cache_key] < self.NEGATIVE_CACHE_TTL:
|
||||
logger.debug("Negative cache hit for %s (cached empty result)", endpoint)
|
||||
return {"results": []}
|
||||
else:
|
||||
# Expired negative cache entry
|
||||
del self._negative_cache[negative_cache_key]
|
||||
|
||||
delay = 2
|
||||
last_error = None
|
||||
|
||||
@@ -158,6 +170,10 @@ class TMDBClient:
|
||||
resp.raise_for_status()
|
||||
data = await resp.json()
|
||||
self._cache[cache_key] = data
|
||||
# Cache negative result if empty
|
||||
if endpoint.startswith("search/") and not data.get("results"):
|
||||
self._negative_cache[negative_cache_key] = time.monotonic()
|
||||
logger.debug("Cached negative result for %s", endpoint)
|
||||
return data
|
||||
|
||||
except asyncio.TimeoutError as e:
|
||||
@@ -224,6 +240,34 @@ class TMDBClient:
|
||||
{"query": query, "language": language, "page": page}
|
||||
)
|
||||
|
||||
async def search_multi(
|
||||
self,
|
||||
query: str,
|
||||
language: str = "en-US",
|
||||
page: int = 1
|
||||
) -> Dict[str, Any]:
|
||||
"""Search for movies and TV shows by name using TMDB multi search.
|
||||
|
||||
Multi search returns both movies and TV shows, useful for anime
|
||||
that might be indexed as movies on TMDB.
|
||||
|
||||
Args:
|
||||
query: Search query (show name)
|
||||
language: Language for results (default: English)
|
||||
page: Page number for pagination
|
||||
|
||||
Returns:
|
||||
Search results with list of movies and TV shows
|
||||
|
||||
Example:
|
||||
>>> results = await client.search_multi("Suzume no Tojimari")
|
||||
>>> shows = [r for r in results["results"] if r["media_type"] == "tv"]
|
||||
"""
|
||||
return await self._request(
|
||||
"search/multi",
|
||||
{"query": query, "language": language, "page": page}
|
||||
)
|
||||
|
||||
async def get_tv_show_details(
|
||||
self,
|
||||
tv_id: int,
|
||||
@@ -356,3 +400,25 @@ class TMDBClient:
|
||||
"""Clear the request cache."""
|
||||
self._cache.clear()
|
||||
logger.debug("TMDB client cache cleared")
|
||||
|
||||
def clear_negative_cache(self):
|
||||
"""Clear the negative result cache."""
|
||||
self._negative_cache.clear()
|
||||
logger.debug("TMDB negative cache cleared")
|
||||
|
||||
def cleanup_expired_negative_cache(self) -> int:
|
||||
"""Remove expired entries from negative cache.
|
||||
|
||||
Returns:
|
||||
Number of entries removed
|
||||
"""
|
||||
now = time.monotonic()
|
||||
expired_keys = [
|
||||
key for key, timestamp in self._negative_cache.items()
|
||||
if now - timestamp >= self.NEGATIVE_CACHE_TTL
|
||||
]
|
||||
for key in expired_keys:
|
||||
del self._negative_cache[key]
|
||||
if expired_keys:
|
||||
logger.debug("Removed %d expired negative cache entries", len(expired_keys))
|
||||
return len(expired_keys)
|
||||
|
||||
Reference in New Issue
Block a user