feat(services): add key resolution for orphaned anime folders

- Add key_resolution_service.py to resolve provider keys for folders without key/data files
- Search anime provider and match folder names (case-insensitive, exact match required)
- Only save to DB if exactly one match found; otherwise skip
- Add comprehensive unit tests (28 tests)
- Integrate into scheduler_service after nfo_repair scan
- Update ARCHITECTURE.md documentation
This commit is contained in:
2026-06-01 20:43:13 +02:00
parent 6dfb24de7e
commit c58b42dfa5
6 changed files with 565 additions and 12 deletions

View File

@@ -1 +1 @@
v1.2.2
v1.3.0

View File

@@ -293,7 +293,7 @@ The FastAPI lifespan function (`src/server/fastapi_app.py`) runs the following s
9. Scheduler service started
+-- Cron-based library rescans configured
+-- Optional: auto-download missing episodes after rescan
+-- Optional: folder maintenance (NFO repair, renaming, poster checks) during scheduled runs
+-- Optional: folder maintenance (NFO repair, key resolution, renaming, poster checks) during scheduled runs
```
### 12.2 Temp Folder Guarantee

View File

@@ -1,6 +1,6 @@
{
"name": "aniworld-web",
"version": "1.2.2",
"version": "1.3.0",
"description": "Aniworld Anime Download Manager - Web Frontend",
"type": "module",
"scripts": {

View File

@@ -0,0 +1,317 @@
"""Key resolution service for orphaned anime folders.
Attempts to resolve provider keys for anime folders that have no key/data
file and no database entry, by searching the anime provider and matching
folder names to search results.
This service runs after nfo_repair_service during the daily folder scan.
"""
from __future__ import annotations
import asyncio
import re
from pathlib import Path
from typing import Optional
import structlog
from src.config.settings import settings as _settings
logger = structlog.get_logger(__name__)
# Limit concurrent provider searches to avoid rate-limiting.
_SEARCH_SEMAPHORE: asyncio.Semaphore = asyncio.Semaphore(2)
def _strip_year_from_folder(folder_name: str) -> str:
"""Remove trailing year suffix like ' (2020)' from folder name.
Args:
folder_name: Folder name, e.g. 'Rent-A-Girlfriend (2020)'
Returns:
Name without year, e.g. 'Rent-A-Girlfriend'
"""
return re.sub(r"\s*\(\d{4}\)\s*$", "", folder_name).strip()
def _extract_year_from_folder(folder_name: str) -> Optional[int]:
"""Extract year from folder name like 'Anime Name (2020)'.
Returns:
Year as int or None if not present.
"""
match = re.search(r"\((\d{4})\)$", folder_name.strip())
if match:
return int(match.group(1))
return None
def _extract_key_from_link(link: str) -> Optional[str]:
"""Extract provider key from search result link.
Args:
link: Link like '/anime/stream/rent-a-girlfriend' or full URL.
Returns:
Key slug like 'rent-a-girlfriend' or None.
"""
if not link:
return None
if "/anime/stream/" in link:
parts = link.split("/anime/stream/")[-1].split("/")
key = parts[0].strip()
return key if key else None
# If link is just a slug
if "/" not in link and link.strip():
return link.strip()
return None
def _normalize_for_comparison(text: str) -> str:
"""Normalize text for case-insensitive comparison.
Strips whitespace, lowercases, and removes common punctuation
differences that shouldn't affect matching.
Args:
text: Raw text string.
Returns:
Normalized lowercase string.
"""
normalized = text.strip().lower()
# Remove common punctuation that varies between sources
normalized = re.sub(r"[:\-–—]", " ", normalized)
# Collapse multiple spaces
normalized = re.sub(r"\s+", " ", normalized)
return normalized.strip()
async def resolve_key_for_folder(folder_name: str) -> Optional[str]:
"""Attempt to resolve the provider key for a single folder.
Strategy:
1. Strip year suffix from folder name to get search query.
2. Search the anime provider with that query.
3. If exactly ONE result matches the folder name (case-insensitive),
return the key extracted from the result link.
4. If zero or multiple matches, return None (not confident enough).
Args:
folder_name: The anime folder name, e.g. 'Rent-A-Girlfriend (2020)'.
Returns:
The provider key string, or None if resolution is not confident.
"""
search_query = _strip_year_from_folder(folder_name)
if not search_query:
logger.debug("Empty search query after stripping year from '%s'", folder_name)
return None
async with _SEARCH_SEMAPHORE:
try:
loop = asyncio.get_running_loop()
results = await loop.run_in_executor(None, _search_provider, search_query)
except Exception as exc:
logger.warning(
"Provider search failed for '%s': %s", search_query, exc
)
return None
if not results:
logger.debug("No search results for folder '%s'", folder_name)
return None
# Filter results: find exact name matches (case-insensitive)
normalized_query = _normalize_for_comparison(search_query)
exact_matches = []
for result in results:
title = result.get("title") or result.get("name") or ""
normalized_title = _normalize_for_comparison(title)
if normalized_title == normalized_query:
key = _extract_key_from_link(result.get("link", ""))
if key:
exact_matches.append((key, title))
if len(exact_matches) == 1:
resolved_key, matched_title = exact_matches[0]
logger.info(
"Resolved key for folder '%s': key='%s' (matched title: '%s')",
folder_name,
resolved_key,
matched_title,
)
return resolved_key
if len(exact_matches) > 1:
logger.info(
"Multiple exact matches for folder '%s' (%d matches), skipping",
folder_name,
len(exact_matches),
)
else:
logger.debug(
"No exact title match for folder '%s' in %d results",
folder_name,
len(results),
)
return None
def _search_provider(query: str) -> list:
"""Call the anime provider search synchronously.
Args:
query: Search term.
Returns:
List of search result dicts with 'link' and 'title'/'name' fields.
"""
from src.core.providers.provider_factory import Loaders
loader = Loaders().GetLoader("aniworld.to")
return loader.search(query)
async def perform_key_resolution_scan() -> dict[str, int]:
"""Scan all anime folders and resolve missing keys.
Iterates over all subfolders of the anime directory. For each folder
that has no corresponding database entry, attempts to resolve the
provider key via provider search and saves it to the database.
Returns:
Dictionary with counts:
- 'scanned': total folders checked
- 'resolved': keys successfully resolved and saved
- 'skipped': folders already in DB or resolution uncertain
- 'errors': folders that caused errors during resolution
"""
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
stats = {"scanned": 0, "resolved": 0, "skipped": 0, "errors": 0}
if not _settings.anime_directory:
logger.warning("Key resolution scan skipped — anime directory not configured")
return stats
anime_dir = Path(_settings.anime_directory)
if not anime_dir.is_dir():
logger.warning(
"Key resolution scan skipped — anime directory not found: %s",
anime_dir,
)
return stats
# Collect folders that need resolution
folders_to_resolve: list[str] = []
async with get_db_session() as db:
for series_dir in sorted(anime_dir.iterdir()):
if not series_dir.is_dir():
continue
folder_name = series_dir.name
stats["scanned"] += 1
# Check if already in database
existing = await AnimeSeriesService.get_by_folder(db, folder_name)
if existing:
stats["skipped"] += 1
continue
folders_to_resolve.append(folder_name)
if not folders_to_resolve:
logger.info("Key resolution scan: all folders already have DB entries")
return stats
logger.info(
"Key resolution scan: %d folders need resolution", len(folders_to_resolve)
)
# Resolve keys one by one (provider search is rate-limited)
for folder_name in folders_to_resolve:
try:
key = await resolve_key_for_folder(folder_name)
if key:
# Save to database
await _save_resolved_key(folder_name, key)
stats["resolved"] += 1
else:
stats["skipped"] += 1
except Exception as exc:
logger.error(
"Error resolving key for folder '%s': %s",
folder_name,
exc,
)
stats["errors"] += 1
logger.info(
"Key resolution scan complete: scanned=%d, resolved=%d, skipped=%d, errors=%d",
stats["scanned"],
stats["resolved"],
stats["skipped"],
stats["errors"],
)
return stats
async def _save_resolved_key(folder_name: str, key: str) -> None:
"""Save a resolved key to the database.
Creates a new AnimeSeries entry with the resolved key and folder name.
Does NOT write any key/data file to disk.
Args:
folder_name: The anime folder name (e.g. 'Rent-A-Girlfriend (2020)').
key: The resolved provider key (e.g. 'rent-a-girlfriend').
"""
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
name = _strip_year_from_folder(folder_name)
year = _extract_year_from_folder(folder_name)
async with get_db_session() as db:
# Double-check: another task might have resolved it concurrently
existing = await AnimeSeriesService.get_by_folder(db, folder_name)
if existing:
logger.debug(
"Folder '%s' already in DB (resolved concurrently), skipping",
folder_name,
)
return
# Also check if a series with this key already exists
existing_key = await AnimeSeriesService.get_by_key(db, key)
if existing_key:
logger.warning(
"Key '%s' already exists in DB for folder '%s', "
"cannot assign to folder '%s'",
key,
existing_key.folder,
folder_name,
)
return
await AnimeSeriesService.create(
db,
key=key,
name=name,
site="aniworld.to",
folder=folder_name,
year=year,
loading_status="pending",
episodes_loaded=False,
)
logger.info(
"Saved resolved key '%s' for folder '%s' to database",
key,
folder_name,
)

View File

@@ -316,11 +316,9 @@ class SchedulerService:
return
try:
from src.server.database.connection import ( # noqa: PLC0415
get_db_session,
)
from src.server.database.system_settings_service import ( # noqa: PLC0415
SystemSettingsService,
from src.server.database.connection import get_db_session # noqa: PLC0415
from src.server.database.system_settings_service import (
SystemSettingsService, # noqa: PLC0415
)
async with get_db_session() as db:
@@ -367,8 +365,8 @@ class SchedulerService:
async def _broadcast(self, event_type: str, data: dict) -> None:
"""Broadcast a WebSocket event to all connected clients."""
try:
from src.server.services.websocket_service import ( # noqa: PLC0415
get_websocket_service,
from src.server.services.websocket_service import (
get_websocket_service, # noqa: PLC0415
)
ws_service = get_websocket_service()
@@ -503,8 +501,8 @@ class SchedulerService:
if self._config and self._config.folder_scan_enabled:
logger.info("Folder scan is enabled — starting")
try:
from src.server.services.folder_scan_service import ( # noqa: PLC0415
FolderScanService,
from src.server.services.folder_scan_service import (
FolderScanService, # noqa: PLC0415
)
folder_scan_service = FolderScanService()
@@ -519,6 +517,26 @@ class SchedulerService:
await self._broadcast(
"folder_scan_error", {"error": str(fs_exc)}
)
# Key resolution scan (resolve orphaned folders)
try:
from src.server.services.key_resolution_service import (
perform_key_resolution_scan, # noqa: PLC0415
)
key_stats = await perform_key_resolution_scan()
logger.info(
"Key resolution scan completed: resolved=%d, skipped=%d, errors=%d",
key_stats["resolved"],
key_stats["skipped"],
key_stats["errors"],
)
except Exception as kr_exc: # pylint: disable=broad-exception-caught
logger.error(
"Key resolution scan failed: %s",
kr_exc,
exc_info=True,
)
else:
logger.debug("Folder scan is disabled — skipping")

View File

@@ -0,0 +1,218 @@
"""Unit tests for key_resolution_service."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.server.services.key_resolution_service import (
_extract_key_from_link,
_extract_year_from_folder,
_normalize_for_comparison,
_strip_year_from_folder,
resolve_key_for_folder,
)
class TestStripYearFromFolder:
"""Tests for _strip_year_from_folder."""
def test_removes_year_suffix(self):
assert _strip_year_from_folder("Rent-A-Girlfriend (2020)") == "Rent-A-Girlfriend"
def test_removes_year_suffix_with_spaces(self):
assert _strip_year_from_folder("Attack on Titan (2013)") == "Attack on Titan"
def test_no_year_returns_original(self):
assert _strip_year_from_folder("Naruto") == "Naruto"
def test_year_in_middle_not_stripped(self):
assert _strip_year_from_folder("2024 Anime (2024)") == "2024 Anime"
def test_empty_string(self):
assert _strip_year_from_folder("") == ""
def test_only_year(self):
assert _strip_year_from_folder("(2020)") == ""
class TestExtractYearFromFolder:
"""Tests for _extract_year_from_folder."""
def test_extracts_year(self):
assert _extract_year_from_folder("Rent-A-Girlfriend (2020)") == 2020
def test_no_year_returns_none(self):
assert _extract_year_from_folder("Naruto") is None
def test_year_in_middle_not_extracted(self):
# Only trailing year is extracted
assert _extract_year_from_folder("2024 Anime") is None
class TestExtractKeyFromLink:
"""Tests for _extract_key_from_link."""
def test_relative_link(self):
assert _extract_key_from_link("/anime/stream/rent-a-girlfriend") == "rent-a-girlfriend"
def test_full_url(self):
assert (
_extract_key_from_link("https://aniworld.to/anime/stream/attack-on-titan")
== "attack-on-titan"
)
def test_link_with_trailing_slash(self):
assert _extract_key_from_link("/anime/stream/naruto/") == "naruto"
def test_empty_link(self):
assert _extract_key_from_link("") is None
def test_none_link(self):
assert _extract_key_from_link(None) is None
def test_slug_only(self):
assert _extract_key_from_link("one-piece") == "one-piece"
class TestNormalizeForComparison:
"""Tests for _normalize_for_comparison."""
def test_case_insensitive(self):
assert _normalize_for_comparison("Rent-A-Girlfriend") == _normalize_for_comparison(
"rent-a-girlfriend"
)
def test_strips_whitespace(self):
assert _normalize_for_comparison(" Naruto ") == "naruto"
def test_normalizes_dashes(self):
assert _normalize_for_comparison("Rent-A-Girlfriend") == "rent a girlfriend"
def test_collapses_spaces(self):
assert _normalize_for_comparison("Attack on Titan") == "attack on titan"
class TestResolveKeyForFolder:
"""Tests for resolve_key_for_folder."""
@pytest.mark.asyncio
async def test_single_exact_match_returns_key(self):
"""When provider returns exactly one exact-name match, key is resolved."""
search_results = [
{"link": "/anime/stream/rent-a-girlfriend", "title": "Rent-A-Girlfriend"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("Rent-A-Girlfriend (2020)")
assert key == "rent-a-girlfriend"
@pytest.mark.asyncio
async def test_no_results_returns_none(self):
"""When provider returns no results, returns None."""
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=[],
):
key = await resolve_key_for_folder("Unknown Anime (2020)")
assert key is None
@pytest.mark.asyncio
async def test_multiple_exact_matches_returns_none(self):
"""When multiple results match the same name exactly, returns None."""
search_results = [
{"link": "/anime/stream/my-anime", "title": "My Anime"},
{"link": "/anime/stream/my-anime-2", "title": "My Anime"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("My Anime (2022)")
assert key is None
@pytest.mark.asyncio
async def test_no_exact_match_returns_none(self):
"""When results exist but none match the folder name, returns None."""
search_results = [
{"link": "/anime/stream/rent-a-girlfriend-2", "title": "Rent-A-Girlfriend 2nd Season"},
{"link": "/anime/stream/rent-a-girlfriend-3", "title": "Rent-A-Girlfriend 3rd Season"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("Rent-A-Girlfriend (2020)")
assert key is None
@pytest.mark.asyncio
async def test_case_insensitive_match(self):
"""Matching is case-insensitive."""
search_results = [
{"link": "/anime/stream/naruto", "title": "NARUTO"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("Naruto (2002)")
assert key == "naruto"
@pytest.mark.asyncio
async def test_provider_error_returns_none(self):
"""When provider search raises an exception, returns None gracefully."""
with patch(
"src.server.services.key_resolution_service._search_provider",
side_effect=RuntimeError("Network error"),
):
key = await resolve_key_for_folder("Some Anime (2020)")
assert key is None
@pytest.mark.asyncio
async def test_result_with_name_field_instead_of_title(self):
"""Search results using 'name' field instead of 'title' work."""
search_results = [
{"link": "/anime/stream/one-piece", "name": "One Piece"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("One Piece (1999)")
assert key == "one-piece"
@pytest.mark.asyncio
async def test_folder_without_year(self):
"""Folders without year suffix still work."""
search_results = [
{"link": "/anime/stream/naruto", "title": "Naruto"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("Naruto")
assert key == "naruto"
@pytest.mark.asyncio
async def test_exact_match_among_partial_matches(self):
"""Only exact matches count, partial matches are ignored."""
search_results = [
{"link": "/anime/stream/dororo", "title": "Dororo"},
{"link": "/anime/stream/dororo-to-hyakkimaru", "title": "Dororo to Hyakkimaru"},
]
with patch(
"src.server.services.key_resolution_service._search_provider",
return_value=search_results,
):
key = await resolve_key_for_folder("Dororo (2019)")
assert key == "dororo"