fix: use fuzzy title matching in _resolve_key_via_search
- Add _normalize_title() to strip anime suffixes (TV, OVA, Movie, etc.) - Add _titles_match() using SequenceMatcher for similarity (threshold 0.85) - Replace exact string match with fuzzy match to fix skipped folders - Add debug logging for title mismatches and multiple results - Set LOG_LEVEL=DEBUG in docker-compose.yml
This commit is contained in:
@@ -74,6 +74,61 @@ class SetupService:
|
||||
"""
|
||||
return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
|
||||
|
||||
@staticmethod
|
||||
def _normalize_title(title: str) -> str:
|
||||
"""Normalize title for fuzzy matching.
|
||||
|
||||
Strips common suffixes and lowercases for comparison.
|
||||
|
||||
Args:
|
||||
title: The title to normalize
|
||||
|
||||
Returns:
|
||||
Normalized title string
|
||||
"""
|
||||
# Remove common anime suffixes (case-insensitive)
|
||||
suffixes = [
|
||||
r'\s*\(TV\)\s*$',
|
||||
r'\s*\(Anime\)\s*$',
|
||||
r'\s*\(OAD\)\s*$',
|
||||
r'\s*\(OVA\)\s*$',
|
||||
r'\s*\(Special\)\s*$',
|
||||
r'\s*\(Movie\)\s*$',
|
||||
r'\s*\(Spin-Off\)\s*$',
|
||||
]
|
||||
normalized = title.lower().strip()
|
||||
for suffix_pattern in suffixes:
|
||||
normalized = re.sub(suffix_pattern, '', normalized, flags=re.IGNORECASE).strip()
|
||||
return normalized
|
||||
|
||||
@staticmethod
|
||||
def _titles_match(title1: str, title2: str, threshold: float = 0.85) -> bool:
|
||||
"""Check if two titles match using fuzzy comparison.
|
||||
|
||||
Args:
|
||||
title1: First title
|
||||
title2: Second title
|
||||
threshold: Similarity threshold (0.0 to 1.0)
|
||||
|
||||
Returns:
|
||||
True if titles match within threshold
|
||||
"""
|
||||
norm1 = SetupService._normalize_title(title1)
|
||||
norm2 = SetupService._normalize_title(title2)
|
||||
|
||||
# Direct match after normalization
|
||||
if norm1 == norm2:
|
||||
return True
|
||||
|
||||
# Containment check (e.g., "Attack on Titan" in "Attack on Titan (TV)")
|
||||
if norm1 in norm2 or norm2 in norm1:
|
||||
return True
|
||||
|
||||
# Similarity ratio check using SequenceMatcher
|
||||
from difflib import SequenceMatcher
|
||||
ratio = SequenceMatcher(None, norm1, norm2).ratio()
|
||||
return ratio >= threshold
|
||||
|
||||
@staticmethod
|
||||
async def _resolve_key_via_search(title: str) -> str:
|
||||
"""Resolve provider key by searching for the title.
|
||||
@@ -93,11 +148,32 @@ class SetupService:
|
||||
results = await series_app.search(title)
|
||||
|
||||
if len(results) == 1:
|
||||
result_name = results[0].get('title', '').lower()
|
||||
if result_name == title.lower():
|
||||
link = results[0].get('link', '')
|
||||
if link and '/anime/stream/' in link:
|
||||
return link.split('/anime/stream/')[-1].split('/')[0]
|
||||
result_name = results[0].get('title', '')
|
||||
result_link = results[0].get('link', '')
|
||||
|
||||
if SetupService._titles_match(result_name, title):
|
||||
if result_link and '/anime/stream/' in result_link:
|
||||
return result_link.split('/anime/stream/')[-1].split('/')[0]
|
||||
else:
|
||||
logger.debug(
|
||||
"Series key resolved but link format unexpected",
|
||||
folder_title=title,
|
||||
result_title=result_name,
|
||||
link=result_link
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"Series search result title mismatch",
|
||||
folder_title=title,
|
||||
result_title=result_name,
|
||||
link=result_link
|
||||
)
|
||||
elif len(results) > 1:
|
||||
logger.debug(
|
||||
"Multiple search results for title, skipping fuzzy match",
|
||||
title=title,
|
||||
result_count=len(results)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Provider search failed for folder",
|
||||
|
||||
Reference in New Issue
Block a user