feat(services): add key resolution for orphaned anime folders
- Add key_resolution_service.py to resolve provider keys for folders without key/data files - Search anime provider and match folder names (case-insensitive, exact match required) - Only save to DB if exactly one match found; otherwise skip - Add comprehensive unit tests (28 tests) - Integrate into scheduler_service after nfo_repair scan - Update ARCHITECTURE.md documentation
This commit is contained in:
317
src/server/services/key_resolution_service.py
Normal file
317
src/server/services/key_resolution_service.py
Normal file
@@ -0,0 +1,317 @@
|
||||
"""Key resolution service for orphaned anime folders.
|
||||
|
||||
Attempts to resolve provider keys for anime folders that have no key/data
|
||||
file and no database entry, by searching the anime provider and matching
|
||||
folder names to search results.
|
||||
|
||||
This service runs after nfo_repair_service during the daily folder scan.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import structlog
|
||||
|
||||
from src.config.settings import settings as _settings
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
# Limit concurrent provider searches to avoid rate-limiting.
|
||||
_SEARCH_SEMAPHORE: asyncio.Semaphore = asyncio.Semaphore(2)
|
||||
|
||||
|
||||
def _strip_year_from_folder(folder_name: str) -> str:
|
||||
"""Remove trailing year suffix like ' (2020)' from folder name.
|
||||
|
||||
Args:
|
||||
folder_name: Folder name, e.g. 'Rent-A-Girlfriend (2020)'
|
||||
|
||||
Returns:
|
||||
Name without year, e.g. 'Rent-A-Girlfriend'
|
||||
"""
|
||||
return re.sub(r"\s*\(\d{4}\)\s*$", "", folder_name).strip()
|
||||
|
||||
|
||||
def _extract_year_from_folder(folder_name: str) -> Optional[int]:
|
||||
"""Extract year from folder name like 'Anime Name (2020)'.
|
||||
|
||||
Returns:
|
||||
Year as int or None if not present.
|
||||
"""
|
||||
match = re.search(r"\((\d{4})\)$", folder_name.strip())
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
return None
|
||||
|
||||
|
||||
def _extract_key_from_link(link: str) -> Optional[str]:
|
||||
"""Extract provider key from search result link.
|
||||
|
||||
Args:
|
||||
link: Link like '/anime/stream/rent-a-girlfriend' or full URL.
|
||||
|
||||
Returns:
|
||||
Key slug like 'rent-a-girlfriend' or None.
|
||||
"""
|
||||
if not link:
|
||||
return None
|
||||
if "/anime/stream/" in link:
|
||||
parts = link.split("/anime/stream/")[-1].split("/")
|
||||
key = parts[0].strip()
|
||||
return key if key else None
|
||||
# If link is just a slug
|
||||
if "/" not in link and link.strip():
|
||||
return link.strip()
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_for_comparison(text: str) -> str:
|
||||
"""Normalize text for case-insensitive comparison.
|
||||
|
||||
Strips whitespace, lowercases, and removes common punctuation
|
||||
differences that shouldn't affect matching.
|
||||
|
||||
Args:
|
||||
text: Raw text string.
|
||||
|
||||
Returns:
|
||||
Normalized lowercase string.
|
||||
"""
|
||||
normalized = text.strip().lower()
|
||||
# Remove common punctuation that varies between sources
|
||||
normalized = re.sub(r"[:\-–—]", " ", normalized)
|
||||
# Collapse multiple spaces
|
||||
normalized = re.sub(r"\s+", " ", normalized)
|
||||
return normalized.strip()
|
||||
|
||||
|
||||
async def resolve_key_for_folder(folder_name: str) -> Optional[str]:
|
||||
"""Attempt to resolve the provider key for a single folder.
|
||||
|
||||
Strategy:
|
||||
1. Strip year suffix from folder name to get search query.
|
||||
2. Search the anime provider with that query.
|
||||
3. If exactly ONE result matches the folder name (case-insensitive),
|
||||
return the key extracted from the result link.
|
||||
4. If zero or multiple matches, return None (not confident enough).
|
||||
|
||||
Args:
|
||||
folder_name: The anime folder name, e.g. 'Rent-A-Girlfriend (2020)'.
|
||||
|
||||
Returns:
|
||||
The provider key string, or None if resolution is not confident.
|
||||
"""
|
||||
search_query = _strip_year_from_folder(folder_name)
|
||||
if not search_query:
|
||||
logger.debug("Empty search query after stripping year from '%s'", folder_name)
|
||||
return None
|
||||
|
||||
async with _SEARCH_SEMAPHORE:
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
results = await loop.run_in_executor(None, _search_provider, search_query)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Provider search failed for '%s': %s", search_query, exc
|
||||
)
|
||||
return None
|
||||
|
||||
if not results:
|
||||
logger.debug("No search results for folder '%s'", folder_name)
|
||||
return None
|
||||
|
||||
# Filter results: find exact name matches (case-insensitive)
|
||||
normalized_query = _normalize_for_comparison(search_query)
|
||||
exact_matches = []
|
||||
|
||||
for result in results:
|
||||
title = result.get("title") or result.get("name") or ""
|
||||
normalized_title = _normalize_for_comparison(title)
|
||||
|
||||
if normalized_title == normalized_query:
|
||||
key = _extract_key_from_link(result.get("link", ""))
|
||||
if key:
|
||||
exact_matches.append((key, title))
|
||||
|
||||
if len(exact_matches) == 1:
|
||||
resolved_key, matched_title = exact_matches[0]
|
||||
logger.info(
|
||||
"Resolved key for folder '%s': key='%s' (matched title: '%s')",
|
||||
folder_name,
|
||||
resolved_key,
|
||||
matched_title,
|
||||
)
|
||||
return resolved_key
|
||||
|
||||
if len(exact_matches) > 1:
|
||||
logger.info(
|
||||
"Multiple exact matches for folder '%s' (%d matches), skipping",
|
||||
folder_name,
|
||||
len(exact_matches),
|
||||
)
|
||||
else:
|
||||
logger.debug(
|
||||
"No exact title match for folder '%s' in %d results",
|
||||
folder_name,
|
||||
len(results),
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _search_provider(query: str) -> list:
|
||||
"""Call the anime provider search synchronously.
|
||||
|
||||
Args:
|
||||
query: Search term.
|
||||
|
||||
Returns:
|
||||
List of search result dicts with 'link' and 'title'/'name' fields.
|
||||
"""
|
||||
from src.core.providers.provider_factory import Loaders
|
||||
|
||||
loader = Loaders().GetLoader("aniworld.to")
|
||||
return loader.search(query)
|
||||
|
||||
|
||||
async def perform_key_resolution_scan() -> dict[str, int]:
|
||||
"""Scan all anime folders and resolve missing keys.
|
||||
|
||||
Iterates over all subfolders of the anime directory. For each folder
|
||||
that has no corresponding database entry, attempts to resolve the
|
||||
provider key via provider search and saves it to the database.
|
||||
|
||||
Returns:
|
||||
Dictionary with counts:
|
||||
- 'scanned': total folders checked
|
||||
- 'resolved': keys successfully resolved and saved
|
||||
- 'skipped': folders already in DB or resolution uncertain
|
||||
- 'errors': folders that caused errors during resolution
|
||||
"""
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
stats = {"scanned": 0, "resolved": 0, "skipped": 0, "errors": 0}
|
||||
|
||||
if not _settings.anime_directory:
|
||||
logger.warning("Key resolution scan skipped — anime directory not configured")
|
||||
return stats
|
||||
|
||||
anime_dir = Path(_settings.anime_directory)
|
||||
if not anime_dir.is_dir():
|
||||
logger.warning(
|
||||
"Key resolution scan skipped — anime directory not found: %s",
|
||||
anime_dir,
|
||||
)
|
||||
return stats
|
||||
|
||||
# Collect folders that need resolution
|
||||
folders_to_resolve: list[str] = []
|
||||
|
||||
async with get_db_session() as db:
|
||||
for series_dir in sorted(anime_dir.iterdir()):
|
||||
if not series_dir.is_dir():
|
||||
continue
|
||||
folder_name = series_dir.name
|
||||
stats["scanned"] += 1
|
||||
|
||||
# Check if already in database
|
||||
existing = await AnimeSeriesService.get_by_folder(db, folder_name)
|
||||
if existing:
|
||||
stats["skipped"] += 1
|
||||
continue
|
||||
|
||||
folders_to_resolve.append(folder_name)
|
||||
|
||||
if not folders_to_resolve:
|
||||
logger.info("Key resolution scan: all folders already have DB entries")
|
||||
return stats
|
||||
|
||||
logger.info(
|
||||
"Key resolution scan: %d folders need resolution", len(folders_to_resolve)
|
||||
)
|
||||
|
||||
# Resolve keys one by one (provider search is rate-limited)
|
||||
for folder_name in folders_to_resolve:
|
||||
try:
|
||||
key = await resolve_key_for_folder(folder_name)
|
||||
if key:
|
||||
# Save to database
|
||||
await _save_resolved_key(folder_name, key)
|
||||
stats["resolved"] += 1
|
||||
else:
|
||||
stats["skipped"] += 1
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Error resolving key for folder '%s': %s",
|
||||
folder_name,
|
||||
exc,
|
||||
)
|
||||
stats["errors"] += 1
|
||||
|
||||
logger.info(
|
||||
"Key resolution scan complete: scanned=%d, resolved=%d, skipped=%d, errors=%d",
|
||||
stats["scanned"],
|
||||
stats["resolved"],
|
||||
stats["skipped"],
|
||||
stats["errors"],
|
||||
)
|
||||
return stats
|
||||
|
||||
|
||||
async def _save_resolved_key(folder_name: str, key: str) -> None:
|
||||
"""Save a resolved key to the database.
|
||||
|
||||
Creates a new AnimeSeries entry with the resolved key and folder name.
|
||||
Does NOT write any key/data file to disk.
|
||||
|
||||
Args:
|
||||
folder_name: The anime folder name (e.g. 'Rent-A-Girlfriend (2020)').
|
||||
key: The resolved provider key (e.g. 'rent-a-girlfriend').
|
||||
"""
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
name = _strip_year_from_folder(folder_name)
|
||||
year = _extract_year_from_folder(folder_name)
|
||||
|
||||
async with get_db_session() as db:
|
||||
# Double-check: another task might have resolved it concurrently
|
||||
existing = await AnimeSeriesService.get_by_folder(db, folder_name)
|
||||
if existing:
|
||||
logger.debug(
|
||||
"Folder '%s' already in DB (resolved concurrently), skipping",
|
||||
folder_name,
|
||||
)
|
||||
return
|
||||
|
||||
# Also check if a series with this key already exists
|
||||
existing_key = await AnimeSeriesService.get_by_key(db, key)
|
||||
if existing_key:
|
||||
logger.warning(
|
||||
"Key '%s' already exists in DB for folder '%s', "
|
||||
"cannot assign to folder '%s'",
|
||||
key,
|
||||
existing_key.folder,
|
||||
folder_name,
|
||||
)
|
||||
return
|
||||
|
||||
await AnimeSeriesService.create(
|
||||
db,
|
||||
key=key,
|
||||
name=name,
|
||||
site="aniworld.to",
|
||||
folder=folder_name,
|
||||
year=year,
|
||||
loading_status="pending",
|
||||
episodes_loaded=False,
|
||||
)
|
||||
logger.info(
|
||||
"Saved resolved key '%s' for folder '%s' to database",
|
||||
key,
|
||||
folder_name,
|
||||
)
|
||||
@@ -316,11 +316,9 @@ class SchedulerService:
|
||||
return
|
||||
|
||||
try:
|
||||
from src.server.database.connection import ( # noqa: PLC0415
|
||||
get_db_session,
|
||||
)
|
||||
from src.server.database.system_settings_service import ( # noqa: PLC0415
|
||||
SystemSettingsService,
|
||||
from src.server.database.connection import get_db_session # noqa: PLC0415
|
||||
from src.server.database.system_settings_service import (
|
||||
SystemSettingsService, # noqa: PLC0415
|
||||
)
|
||||
|
||||
async with get_db_session() as db:
|
||||
@@ -367,8 +365,8 @@ class SchedulerService:
|
||||
async def _broadcast(self, event_type: str, data: dict) -> None:
|
||||
"""Broadcast a WebSocket event to all connected clients."""
|
||||
try:
|
||||
from src.server.services.websocket_service import ( # noqa: PLC0415
|
||||
get_websocket_service,
|
||||
from src.server.services.websocket_service import (
|
||||
get_websocket_service, # noqa: PLC0415
|
||||
)
|
||||
|
||||
ws_service = get_websocket_service()
|
||||
@@ -503,8 +501,8 @@ class SchedulerService:
|
||||
if self._config and self._config.folder_scan_enabled:
|
||||
logger.info("Folder scan is enabled — starting")
|
||||
try:
|
||||
from src.server.services.folder_scan_service import ( # noqa: PLC0415
|
||||
FolderScanService,
|
||||
from src.server.services.folder_scan_service import (
|
||||
FolderScanService, # noqa: PLC0415
|
||||
)
|
||||
|
||||
folder_scan_service = FolderScanService()
|
||||
@@ -519,6 +517,26 @@ class SchedulerService:
|
||||
await self._broadcast(
|
||||
"folder_scan_error", {"error": str(fs_exc)}
|
||||
)
|
||||
|
||||
# Key resolution scan (resolve orphaned folders)
|
||||
try:
|
||||
from src.server.services.key_resolution_service import (
|
||||
perform_key_resolution_scan, # noqa: PLC0415
|
||||
)
|
||||
|
||||
key_stats = await perform_key_resolution_scan()
|
||||
logger.info(
|
||||
"Key resolution scan completed: resolved=%d, skipped=%d, errors=%d",
|
||||
key_stats["resolved"],
|
||||
key_stats["skipped"],
|
||||
key_stats["errors"],
|
||||
)
|
||||
except Exception as kr_exc: # pylint: disable=broad-exception-caught
|
||||
logger.error(
|
||||
"Key resolution scan failed: %s",
|
||||
kr_exc,
|
||||
exc_info=True,
|
||||
)
|
||||
else:
|
||||
logger.debug("Folder scan is disabled — skipping")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user