diff --git a/Docker/VERSION b/Docker/VERSION index cc90463..18fa8e7 100644 --- a/Docker/VERSION +++ b/Docker/VERSION @@ -1 +1 @@ -v1.2.2 +v1.3.0 diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index d13a6b4..be27dbc 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -293,7 +293,7 @@ The FastAPI lifespan function (`src/server/fastapi_app.py`) runs the following s 9. Scheduler service started +-- Cron-based library rescans configured +-- Optional: auto-download missing episodes after rescan - +-- Optional: folder maintenance (NFO repair, renaming, poster checks) during scheduled runs + +-- Optional: folder maintenance (NFO repair, key resolution, renaming, poster checks) during scheduled runs ``` ### 12.2 Temp Folder Guarantee diff --git a/package.json b/package.json index d4f7a90..d5ce72e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "aniworld-web", - "version": "1.2.2", + "version": "1.3.0", "description": "Aniworld Anime Download Manager - Web Frontend", "type": "module", "scripts": { diff --git a/src/server/services/key_resolution_service.py b/src/server/services/key_resolution_service.py new file mode 100644 index 0000000..1c3ed38 --- /dev/null +++ b/src/server/services/key_resolution_service.py @@ -0,0 +1,317 @@ +"""Key resolution service for orphaned anime folders. + +Attempts to resolve provider keys for anime folders that have no key/data +file and no database entry, by searching the anime provider and matching +folder names to search results. + +This service runs after nfo_repair_service during the daily folder scan. +""" +from __future__ import annotations + +import asyncio +import re +from pathlib import Path +from typing import Optional + +import structlog + +from src.config.settings import settings as _settings + +logger = structlog.get_logger(__name__) + +# Limit concurrent provider searches to avoid rate-limiting. +_SEARCH_SEMAPHORE: asyncio.Semaphore = asyncio.Semaphore(2) + + +def _strip_year_from_folder(folder_name: str) -> str: + """Remove trailing year suffix like ' (2020)' from folder name. + + Args: + folder_name: Folder name, e.g. 'Rent-A-Girlfriend (2020)' + + Returns: + Name without year, e.g. 'Rent-A-Girlfriend' + """ + return re.sub(r"\s*\(\d{4}\)\s*$", "", folder_name).strip() + + +def _extract_year_from_folder(folder_name: str) -> Optional[int]: + """Extract year from folder name like 'Anime Name (2020)'. + + Returns: + Year as int or None if not present. + """ + match = re.search(r"\((\d{4})\)$", folder_name.strip()) + if match: + return int(match.group(1)) + return None + + +def _extract_key_from_link(link: str) -> Optional[str]: + """Extract provider key from search result link. + + Args: + link: Link like '/anime/stream/rent-a-girlfriend' or full URL. + + Returns: + Key slug like 'rent-a-girlfriend' or None. + """ + if not link: + return None + if "/anime/stream/" in link: + parts = link.split("/anime/stream/")[-1].split("/") + key = parts[0].strip() + return key if key else None + # If link is just a slug + if "/" not in link and link.strip(): + return link.strip() + return None + + +def _normalize_for_comparison(text: str) -> str: + """Normalize text for case-insensitive comparison. + + Strips whitespace, lowercases, and removes common punctuation + differences that shouldn't affect matching. + + Args: + text: Raw text string. + + Returns: + Normalized lowercase string. + """ + normalized = text.strip().lower() + # Remove common punctuation that varies between sources + normalized = re.sub(r"[:\-–—]", " ", normalized) + # Collapse multiple spaces + normalized = re.sub(r"\s+", " ", normalized) + return normalized.strip() + + +async def resolve_key_for_folder(folder_name: str) -> Optional[str]: + """Attempt to resolve the provider key for a single folder. + + Strategy: + 1. Strip year suffix from folder name to get search query. + 2. Search the anime provider with that query. + 3. If exactly ONE result matches the folder name (case-insensitive), + return the key extracted from the result link. + 4. If zero or multiple matches, return None (not confident enough). + + Args: + folder_name: The anime folder name, e.g. 'Rent-A-Girlfriend (2020)'. + + Returns: + The provider key string, or None if resolution is not confident. + """ + search_query = _strip_year_from_folder(folder_name) + if not search_query: + logger.debug("Empty search query after stripping year from '%s'", folder_name) + return None + + async with _SEARCH_SEMAPHORE: + try: + loop = asyncio.get_running_loop() + results = await loop.run_in_executor(None, _search_provider, search_query) + except Exception as exc: + logger.warning( + "Provider search failed for '%s': %s", search_query, exc + ) + return None + + if not results: + logger.debug("No search results for folder '%s'", folder_name) + return None + + # Filter results: find exact name matches (case-insensitive) + normalized_query = _normalize_for_comparison(search_query) + exact_matches = [] + + for result in results: + title = result.get("title") or result.get("name") or "" + normalized_title = _normalize_for_comparison(title) + + if normalized_title == normalized_query: + key = _extract_key_from_link(result.get("link", "")) + if key: + exact_matches.append((key, title)) + + if len(exact_matches) == 1: + resolved_key, matched_title = exact_matches[0] + logger.info( + "Resolved key for folder '%s': key='%s' (matched title: '%s')", + folder_name, + resolved_key, + matched_title, + ) + return resolved_key + + if len(exact_matches) > 1: + logger.info( + "Multiple exact matches for folder '%s' (%d matches), skipping", + folder_name, + len(exact_matches), + ) + else: + logger.debug( + "No exact title match for folder '%s' in %d results", + folder_name, + len(results), + ) + + return None + + +def _search_provider(query: str) -> list: + """Call the anime provider search synchronously. + + Args: + query: Search term. + + Returns: + List of search result dicts with 'link' and 'title'/'name' fields. + """ + from src.core.providers.provider_factory import Loaders + + loader = Loaders().GetLoader("aniworld.to") + return loader.search(query) + + +async def perform_key_resolution_scan() -> dict[str, int]: + """Scan all anime folders and resolve missing keys. + + Iterates over all subfolders of the anime directory. For each folder + that has no corresponding database entry, attempts to resolve the + provider key via provider search and saves it to the database. + + Returns: + Dictionary with counts: + - 'scanned': total folders checked + - 'resolved': keys successfully resolved and saved + - 'skipped': folders already in DB or resolution uncertain + - 'errors': folders that caused errors during resolution + """ + from src.server.database.connection import get_db_session + from src.server.database.service import AnimeSeriesService + + stats = {"scanned": 0, "resolved": 0, "skipped": 0, "errors": 0} + + if not _settings.anime_directory: + logger.warning("Key resolution scan skipped — anime directory not configured") + return stats + + anime_dir = Path(_settings.anime_directory) + if not anime_dir.is_dir(): + logger.warning( + "Key resolution scan skipped — anime directory not found: %s", + anime_dir, + ) + return stats + + # Collect folders that need resolution + folders_to_resolve: list[str] = [] + + async with get_db_session() as db: + for series_dir in sorted(anime_dir.iterdir()): + if not series_dir.is_dir(): + continue + folder_name = series_dir.name + stats["scanned"] += 1 + + # Check if already in database + existing = await AnimeSeriesService.get_by_folder(db, folder_name) + if existing: + stats["skipped"] += 1 + continue + + folders_to_resolve.append(folder_name) + + if not folders_to_resolve: + logger.info("Key resolution scan: all folders already have DB entries") + return stats + + logger.info( + "Key resolution scan: %d folders need resolution", len(folders_to_resolve) + ) + + # Resolve keys one by one (provider search is rate-limited) + for folder_name in folders_to_resolve: + try: + key = await resolve_key_for_folder(folder_name) + if key: + # Save to database + await _save_resolved_key(folder_name, key) + stats["resolved"] += 1 + else: + stats["skipped"] += 1 + except Exception as exc: + logger.error( + "Error resolving key for folder '%s': %s", + folder_name, + exc, + ) + stats["errors"] += 1 + + logger.info( + "Key resolution scan complete: scanned=%d, resolved=%d, skipped=%d, errors=%d", + stats["scanned"], + stats["resolved"], + stats["skipped"], + stats["errors"], + ) + return stats + + +async def _save_resolved_key(folder_name: str, key: str) -> None: + """Save a resolved key to the database. + + Creates a new AnimeSeries entry with the resolved key and folder name. + Does NOT write any key/data file to disk. + + Args: + folder_name: The anime folder name (e.g. 'Rent-A-Girlfriend (2020)'). + key: The resolved provider key (e.g. 'rent-a-girlfriend'). + """ + from src.server.database.connection import get_db_session + from src.server.database.service import AnimeSeriesService + + name = _strip_year_from_folder(folder_name) + year = _extract_year_from_folder(folder_name) + + async with get_db_session() as db: + # Double-check: another task might have resolved it concurrently + existing = await AnimeSeriesService.get_by_folder(db, folder_name) + if existing: + logger.debug( + "Folder '%s' already in DB (resolved concurrently), skipping", + folder_name, + ) + return + + # Also check if a series with this key already exists + existing_key = await AnimeSeriesService.get_by_key(db, key) + if existing_key: + logger.warning( + "Key '%s' already exists in DB for folder '%s', " + "cannot assign to folder '%s'", + key, + existing_key.folder, + folder_name, + ) + return + + await AnimeSeriesService.create( + db, + key=key, + name=name, + site="aniworld.to", + folder=folder_name, + year=year, + loading_status="pending", + episodes_loaded=False, + ) + logger.info( + "Saved resolved key '%s' for folder '%s' to database", + key, + folder_name, + ) diff --git a/src/server/services/scheduler_service.py b/src/server/services/scheduler_service.py index 968c85e..fc34cbd 100644 --- a/src/server/services/scheduler_service.py +++ b/src/server/services/scheduler_service.py @@ -316,11 +316,9 @@ class SchedulerService: return try: - from src.server.database.connection import ( # noqa: PLC0415 - get_db_session, - ) - from src.server.database.system_settings_service import ( # noqa: PLC0415 - SystemSettingsService, + from src.server.database.connection import get_db_session # noqa: PLC0415 + from src.server.database.system_settings_service import ( + SystemSettingsService, # noqa: PLC0415 ) async with get_db_session() as db: @@ -367,8 +365,8 @@ class SchedulerService: async def _broadcast(self, event_type: str, data: dict) -> None: """Broadcast a WebSocket event to all connected clients.""" try: - from src.server.services.websocket_service import ( # noqa: PLC0415 - get_websocket_service, + from src.server.services.websocket_service import ( + get_websocket_service, # noqa: PLC0415 ) ws_service = get_websocket_service() @@ -503,8 +501,8 @@ class SchedulerService: if self._config and self._config.folder_scan_enabled: logger.info("Folder scan is enabled — starting") try: - from src.server.services.folder_scan_service import ( # noqa: PLC0415 - FolderScanService, + from src.server.services.folder_scan_service import ( + FolderScanService, # noqa: PLC0415 ) folder_scan_service = FolderScanService() @@ -519,6 +517,26 @@ class SchedulerService: await self._broadcast( "folder_scan_error", {"error": str(fs_exc)} ) + + # Key resolution scan (resolve orphaned folders) + try: + from src.server.services.key_resolution_service import ( + perform_key_resolution_scan, # noqa: PLC0415 + ) + + key_stats = await perform_key_resolution_scan() + logger.info( + "Key resolution scan completed: resolved=%d, skipped=%d, errors=%d", + key_stats["resolved"], + key_stats["skipped"], + key_stats["errors"], + ) + except Exception as kr_exc: # pylint: disable=broad-exception-caught + logger.error( + "Key resolution scan failed: %s", + kr_exc, + exc_info=True, + ) else: logger.debug("Folder scan is disabled — skipping") diff --git a/tests/unit/test_key_resolution_service.py b/tests/unit/test_key_resolution_service.py new file mode 100644 index 0000000..1515546 --- /dev/null +++ b/tests/unit/test_key_resolution_service.py @@ -0,0 +1,218 @@ +"""Unit tests for key_resolution_service.""" +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.server.services.key_resolution_service import ( + _extract_key_from_link, + _extract_year_from_folder, + _normalize_for_comparison, + _strip_year_from_folder, + resolve_key_for_folder, +) + + +class TestStripYearFromFolder: + """Tests for _strip_year_from_folder.""" + + def test_removes_year_suffix(self): + assert _strip_year_from_folder("Rent-A-Girlfriend (2020)") == "Rent-A-Girlfriend" + + def test_removes_year_suffix_with_spaces(self): + assert _strip_year_from_folder("Attack on Titan (2013)") == "Attack on Titan" + + def test_no_year_returns_original(self): + assert _strip_year_from_folder("Naruto") == "Naruto" + + def test_year_in_middle_not_stripped(self): + assert _strip_year_from_folder("2024 Anime (2024)") == "2024 Anime" + + def test_empty_string(self): + assert _strip_year_from_folder("") == "" + + def test_only_year(self): + assert _strip_year_from_folder("(2020)") == "" + + +class TestExtractYearFromFolder: + """Tests for _extract_year_from_folder.""" + + def test_extracts_year(self): + assert _extract_year_from_folder("Rent-A-Girlfriend (2020)") == 2020 + + def test_no_year_returns_none(self): + assert _extract_year_from_folder("Naruto") is None + + def test_year_in_middle_not_extracted(self): + # Only trailing year is extracted + assert _extract_year_from_folder("2024 Anime") is None + + +class TestExtractKeyFromLink: + """Tests for _extract_key_from_link.""" + + def test_relative_link(self): + assert _extract_key_from_link("/anime/stream/rent-a-girlfriend") == "rent-a-girlfriend" + + def test_full_url(self): + assert ( + _extract_key_from_link("https://aniworld.to/anime/stream/attack-on-titan") + == "attack-on-titan" + ) + + def test_link_with_trailing_slash(self): + assert _extract_key_from_link("/anime/stream/naruto/") == "naruto" + + def test_empty_link(self): + assert _extract_key_from_link("") is None + + def test_none_link(self): + assert _extract_key_from_link(None) is None + + def test_slug_only(self): + assert _extract_key_from_link("one-piece") == "one-piece" + + +class TestNormalizeForComparison: + """Tests for _normalize_for_comparison.""" + + def test_case_insensitive(self): + assert _normalize_for_comparison("Rent-A-Girlfriend") == _normalize_for_comparison( + "rent-a-girlfriend" + ) + + def test_strips_whitespace(self): + assert _normalize_for_comparison(" Naruto ") == "naruto" + + def test_normalizes_dashes(self): + assert _normalize_for_comparison("Rent-A-Girlfriend") == "rent a girlfriend" + + def test_collapses_spaces(self): + assert _normalize_for_comparison("Attack on Titan") == "attack on titan" + + +class TestResolveKeyForFolder: + """Tests for resolve_key_for_folder.""" + + @pytest.mark.asyncio + async def test_single_exact_match_returns_key(self): + """When provider returns exactly one exact-name match, key is resolved.""" + search_results = [ + {"link": "/anime/stream/rent-a-girlfriend", "title": "Rent-A-Girlfriend"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("Rent-A-Girlfriend (2020)") + assert key == "rent-a-girlfriend" + + @pytest.mark.asyncio + async def test_no_results_returns_none(self): + """When provider returns no results, returns None.""" + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=[], + ): + key = await resolve_key_for_folder("Unknown Anime (2020)") + assert key is None + + @pytest.mark.asyncio + async def test_multiple_exact_matches_returns_none(self): + """When multiple results match the same name exactly, returns None.""" + search_results = [ + {"link": "/anime/stream/my-anime", "title": "My Anime"}, + {"link": "/anime/stream/my-anime-2", "title": "My Anime"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("My Anime (2022)") + assert key is None + + @pytest.mark.asyncio + async def test_no_exact_match_returns_none(self): + """When results exist but none match the folder name, returns None.""" + search_results = [ + {"link": "/anime/stream/rent-a-girlfriend-2", "title": "Rent-A-Girlfriend 2nd Season"}, + {"link": "/anime/stream/rent-a-girlfriend-3", "title": "Rent-A-Girlfriend 3rd Season"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("Rent-A-Girlfriend (2020)") + assert key is None + + @pytest.mark.asyncio + async def test_case_insensitive_match(self): + """Matching is case-insensitive.""" + search_results = [ + {"link": "/anime/stream/naruto", "title": "NARUTO"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("Naruto (2002)") + assert key == "naruto" + + @pytest.mark.asyncio + async def test_provider_error_returns_none(self): + """When provider search raises an exception, returns None gracefully.""" + with patch( + "src.server.services.key_resolution_service._search_provider", + side_effect=RuntimeError("Network error"), + ): + key = await resolve_key_for_folder("Some Anime (2020)") + assert key is None + + @pytest.mark.asyncio + async def test_result_with_name_field_instead_of_title(self): + """Search results using 'name' field instead of 'title' work.""" + search_results = [ + {"link": "/anime/stream/one-piece", "name": "One Piece"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("One Piece (1999)") + assert key == "one-piece" + + @pytest.mark.asyncio + async def test_folder_without_year(self): + """Folders without year suffix still work.""" + search_results = [ + {"link": "/anime/stream/naruto", "title": "Naruto"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("Naruto") + assert key == "naruto" + + @pytest.mark.asyncio + async def test_exact_match_among_partial_matches(self): + """Only exact matches count, partial matches are ignored.""" + search_results = [ + {"link": "/anime/stream/dororo", "title": "Dororo"}, + {"link": "/anime/stream/dororo-to-hyakkimaru", "title": "Dororo to Hyakkimaru"}, + ] + + with patch( + "src.server.services.key_resolution_service._search_provider", + return_value=search_results, + ): + key = await resolve_key_for_folder("Dororo (2019)") + assert key == "dororo"