fix: use fuzzy title matching in _resolve_key_via_search
- Add _normalize_title() to strip anime suffixes (TV, OVA, Movie, etc.) - Add _titles_match() using SequenceMatcher for similarity (threshold 0.85) - Replace exact string match with fuzzy match to fix skipped folders - Add debug logging for title mismatches and multiple results - Set LOG_LEVEL=DEBUG in docker-compose.yml
This commit is contained in:
@@ -38,6 +38,7 @@ services:
|
|||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
environment:
|
environment:
|
||||||
- PYTHONUNBUFFERED=1
|
- PYTHONUNBUFFERED=1
|
||||||
|
- LOG_LEVEL=DEBUG
|
||||||
volumes:
|
volumes:
|
||||||
- app-data:/app/data
|
- app-data:/app/data
|
||||||
- app-logs:/app/logs
|
- app-logs:/app/logs
|
||||||
|
|||||||
@@ -74,6 +74,61 @@ class SetupService:
|
|||||||
"""
|
"""
|
||||||
return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
|
return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _normalize_title(title: str) -> str:
|
||||||
|
"""Normalize title for fuzzy matching.
|
||||||
|
|
||||||
|
Strips common suffixes and lowercases for comparison.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title: The title to normalize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized title string
|
||||||
|
"""
|
||||||
|
# Remove common anime suffixes (case-insensitive)
|
||||||
|
suffixes = [
|
||||||
|
r'\s*\(TV\)\s*$',
|
||||||
|
r'\s*\(Anime\)\s*$',
|
||||||
|
r'\s*\(OAD\)\s*$',
|
||||||
|
r'\s*\(OVA\)\s*$',
|
||||||
|
r'\s*\(Special\)\s*$',
|
||||||
|
r'\s*\(Movie\)\s*$',
|
||||||
|
r'\s*\(Spin-Off\)\s*$',
|
||||||
|
]
|
||||||
|
normalized = title.lower().strip()
|
||||||
|
for suffix_pattern in suffixes:
|
||||||
|
normalized = re.sub(suffix_pattern, '', normalized, flags=re.IGNORECASE).strip()
|
||||||
|
return normalized
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _titles_match(title1: str, title2: str, threshold: float = 0.85) -> bool:
|
||||||
|
"""Check if two titles match using fuzzy comparison.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title1: First title
|
||||||
|
title2: Second title
|
||||||
|
threshold: Similarity threshold (0.0 to 1.0)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if titles match within threshold
|
||||||
|
"""
|
||||||
|
norm1 = SetupService._normalize_title(title1)
|
||||||
|
norm2 = SetupService._normalize_title(title2)
|
||||||
|
|
||||||
|
# Direct match after normalization
|
||||||
|
if norm1 == norm2:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Containment check (e.g., "Attack on Titan" in "Attack on Titan (TV)")
|
||||||
|
if norm1 in norm2 or norm2 in norm1:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Similarity ratio check using SequenceMatcher
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
ratio = SequenceMatcher(None, norm1, norm2).ratio()
|
||||||
|
return ratio >= threshold
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def _resolve_key_via_search(title: str) -> str:
|
async def _resolve_key_via_search(title: str) -> str:
|
||||||
"""Resolve provider key by searching for the title.
|
"""Resolve provider key by searching for the title.
|
||||||
@@ -93,11 +148,32 @@ class SetupService:
|
|||||||
results = await series_app.search(title)
|
results = await series_app.search(title)
|
||||||
|
|
||||||
if len(results) == 1:
|
if len(results) == 1:
|
||||||
result_name = results[0].get('title', '').lower()
|
result_name = results[0].get('title', '')
|
||||||
if result_name == title.lower():
|
result_link = results[0].get('link', '')
|
||||||
link = results[0].get('link', '')
|
|
||||||
if link and '/anime/stream/' in link:
|
if SetupService._titles_match(result_name, title):
|
||||||
return link.split('/anime/stream/')[-1].split('/')[0]
|
if result_link and '/anime/stream/' in result_link:
|
||||||
|
return result_link.split('/anime/stream/')[-1].split('/')[0]
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"Series key resolved but link format unexpected",
|
||||||
|
folder_title=title,
|
||||||
|
result_title=result_name,
|
||||||
|
link=result_link
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.debug(
|
||||||
|
"Series search result title mismatch",
|
||||||
|
folder_title=title,
|
||||||
|
result_title=result_name,
|
||||||
|
link=result_link
|
||||||
|
)
|
||||||
|
elif len(results) > 1:
|
||||||
|
logger.debug(
|
||||||
|
"Multiple search results for title, skipping fuzzy match",
|
||||||
|
title=title,
|
||||||
|
result_count=len(results)
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Provider search failed for folder",
|
"Provider search failed for folder",
|
||||||
|
|||||||
Reference in New Issue
Block a user