fix: use fuzzy title matching in _resolve_key_via_search

- Add _normalize_title() to strip anime suffixes (TV, OVA, Movie, etc.)
- Add _titles_match() using SequenceMatcher for similarity (threshold 0.85)
- Replace exact string match with fuzzy match to fix skipped folders
- Add debug logging for title mismatches and multiple results
- Set LOG_LEVEL=DEBUG in docker-compose.yml
This commit is contained in:
2026-06-05 20:37:06 +02:00
parent e02d65778f
commit 84487d7571
2 changed files with 82 additions and 5 deletions

View File

@@ -38,6 +38,7 @@ services:
condition: service_healthy
environment:
- PYTHONUNBUFFERED=1
- LOG_LEVEL=DEBUG
volumes:
- app-data:/app/data
- app-logs:/app/logs

View File

@@ -74,6 +74,61 @@ class SetupService:
"""
return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
@staticmethod
def _normalize_title(title: str) -> str:
"""Normalize title for fuzzy matching.
Strips common suffixes and lowercases for comparison.
Args:
title: The title to normalize
Returns:
Normalized title string
"""
# Remove common anime suffixes (case-insensitive)
suffixes = [
r'\s*\(TV\)\s*$',
r'\s*\(Anime\)\s*$',
r'\s*\(OAD\)\s*$',
r'\s*\(OVA\)\s*$',
r'\s*\(Special\)\s*$',
r'\s*\(Movie\)\s*$',
r'\s*\(Spin-Off\)\s*$',
]
normalized = title.lower().strip()
for suffix_pattern in suffixes:
normalized = re.sub(suffix_pattern, '', normalized, flags=re.IGNORECASE).strip()
return normalized
@staticmethod
def _titles_match(title1: str, title2: str, threshold: float = 0.85) -> bool:
"""Check if two titles match using fuzzy comparison.
Args:
title1: First title
title2: Second title
threshold: Similarity threshold (0.0 to 1.0)
Returns:
True if titles match within threshold
"""
norm1 = SetupService._normalize_title(title1)
norm2 = SetupService._normalize_title(title2)
# Direct match after normalization
if norm1 == norm2:
return True
# Containment check (e.g., "Attack on Titan" in "Attack on Titan (TV)")
if norm1 in norm2 or norm2 in norm1:
return True
# Similarity ratio check using SequenceMatcher
from difflib import SequenceMatcher
ratio = SequenceMatcher(None, norm1, norm2).ratio()
return ratio >= threshold
@staticmethod
async def _resolve_key_via_search(title: str) -> str:
"""Resolve provider key by searching for the title.
@@ -93,11 +148,32 @@ class SetupService:
results = await series_app.search(title)
if len(results) == 1:
result_name = results[0].get('title', '').lower()
if result_name == title.lower():
link = results[0].get('link', '')
if link and '/anime/stream/' in link:
return link.split('/anime/stream/')[-1].split('/')[0]
result_name = results[0].get('title', '')
result_link = results[0].get('link', '')
if SetupService._titles_match(result_name, title):
if result_link and '/anime/stream/' in result_link:
return result_link.split('/anime/stream/')[-1].split('/')[0]
else:
logger.debug(
"Series key resolved but link format unexpected",
folder_title=title,
result_title=result_name,
link=result_link
)
else:
logger.debug(
"Series search result title mismatch",
folder_title=title,
result_title=result_name,
link=result_link
)
elif len(results) > 1:
logger.debug(
"Multiple search results for title, skipping fuzzy match",
title=title,
result_count=len(results)
)
except Exception as e:
logger.warning(
"Provider search failed for folder",