From eb2fc3c5ab20b2ec6b565ccb931f6ffc91fb2fb5 Mon Sep 17 00:00:00 2001 From: Lukas Date: Tue, 12 May 2026 20:15:32 +0200 Subject: [PATCH] feat: integrate NFO repair into scheduled folder scan - Add FolderScanService.run_folder_scan() calling perform_nfo_repair_scan() - Remove startup-time NFO repair from fastapi_app lifespan - Update docs/NFO_GUIDE.md: repair now runs as part of daily scan - Update tests to verify integration wiring - Update ARCHITECTURE.md and scheduler_service for scan scheduling --- docs/ARCHITECTURE.md | 1 + docs/NFO_GUIDE.md | 22 +++-- src/server/fastapi_app.py | 5 -- src/server/services/folder_scan_service.py | 85 +++++++++++++++++++ src/server/services/initialization_service.py | 8 +- src/server/services/scheduler_service.py | 22 +++++ tests/integration/test_nfo_repair_startup.py | 39 +++++---- 7 files changed, 144 insertions(+), 38 deletions(-) create mode 100644 src/server/services/folder_scan_service.py diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 51f74fe..90c8409 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -81,6 +81,7 @@ src/server/ | +-- websocket_service.py# WebSocket broadcasting | +-- queue_repository.py # Database persistence | +-- nfo_service.py # NFO metadata management +| +-- folder_scan_service.py # Daily folder maintenance scan +-- models/ # Pydantic models | +-- auth.py # Auth request/response models | +-- config.py # Configuration models diff --git a/docs/NFO_GUIDE.md b/docs/NFO_GUIDE.md index b9b2a33..cb4dc52 100644 --- a/docs/NFO_GUIDE.md +++ b/docs/NFO_GUIDE.md @@ -675,21 +675,25 @@ The XML serialisation lives in `src/core/utils/nfo_generator.py` ## 11. Automatic NFO Repair -Every time the server starts, Aniworld scans all existing `tvshow.nfo` files and -automatically repairs any that are missing required tags. +NFO repair now runs as part of the scheduled daily folder scan rather than on every +startup. When the scheduler triggers `FolderScanService.run_folder_scan()`, the first +step is `perform_nfo_repair_scan(background_loader=None)`. Each incomplete NFO is +queued as a background `asyncio` task, so the scan returns quickly while repairs +continue asynchronously. ### How It Works 1. **Scan** — `perform_nfo_repair_scan()` in - `src/server/services/initialization_service.py` is called from the FastAPI - lifespan after `perform_media_scan_if_needed()`. + `src/server/services/initialization_service.py` is called from + `FolderScanService.run_folder_scan()` (`src/server/services/folder_scan_service.py`). 2. **Detect** — `nfo_needs_repair(nfo_path)` from `src/core/services/nfo_repair_service.py` parses each `tvshow.nfo` with `lxml` and checks for the 13 required tags listed below. 3. **Repair** — Series whose NFO is incomplete are queued for background reload - via `BackgroundLoaderService.add_series_loading_task()`. The background - loader re-fetches metadata from TMDB and rewrites the NFO with all tags - populated. + via `asyncio.create_task`. Each task creates its own isolated + :class:`NFOService` / :class:`TMDBClient` so concurrent tasks never share an + ``aiohttp`` session — this prevents "Connector is closed" errors when many repairs + run in parallel. A semaphore caps TMDB concurrency at 3 to stay within rate limits. ### Tags Checked (13 required) @@ -734,8 +738,8 @@ This calls `NFOService.update_tvshow_nfo()` directly and overwrites the existing | File | Purpose | | ----------------------------------------------- | ---------------------------------------------------------------------------------------------- | | `src/core/services/nfo_repair_service.py` | `REQUIRED_TAGS`, `parse_nfo_tags`, `find_missing_tags`, `nfo_needs_repair`, `NfoRepairService` | -| `src/server/services/initialization_service.py` | `perform_nfo_repair_scan` startup hook | -| `src/server/fastapi_app.py` | Wires `perform_nfo_repair_scan` into the lifespan | +| `src/server/services/initialization_service.py` | `perform_nfo_repair_scan` — invoked from `FolderScanService` | +| `src/server/services/folder_scan_service.py` | Calls `perform_nfo_repair_scan` during the scheduled daily folder scan | --- diff --git a/src/server/fastapi_app.py b/src/server/fastapi_app.py index add66f6..c4887b8 100644 --- a/src/server/fastapi_app.py +++ b/src/server/fastapi_app.py @@ -242,7 +242,6 @@ async def lifespan(_application: FastAPI): from src.server.services.initialization_service import ( perform_initial_setup, perform_media_scan_if_needed, - perform_nfo_repair_scan, perform_nfo_scan_if_needed, ) @@ -313,10 +312,6 @@ async def lifespan(_application: FastAPI): # Run media scan only on first run await perform_media_scan_if_needed(background_loader) - - # Scan every series NFO on startup and repair any that are - # missing required tags by queuing them for background reload - await perform_nfo_repair_scan(background_loader) else: logger.info( "Download service initialization skipped - " diff --git a/src/server/services/folder_scan_service.py b/src/server/services/folder_scan_service.py new file mode 100644 index 0000000..9d716a7 --- /dev/null +++ b/src/server/services/folder_scan_service.py @@ -0,0 +1,85 @@ +"""Folder scan service for daily maintenance tasks. + +Encapsulates the daily folder-scan logic (orphaned-file detection, +metadata refresh, and missing-episode queuing) so that the scheduler +remains clean and the scan can be tested independently. +""" +from __future__ import annotations + +import asyncio +from pathlib import Path +from typing import Optional + +import structlog + +from src.server.services.initialization_service import perform_nfo_repair_scan + +logger = structlog.get_logger(__name__) + +# Module-level semaphore to limit concurrent TMDB operations to 3. +_TMDB_SEMAPHORE: asyncio.Semaphore = asyncio.Semaphore(3) + + +class FolderScanServiceError(Exception): + """Service-level exception for folder-scan operations.""" + + +class FolderScanService: + """Performs daily maintenance scans over the anime library folder. + + The service is intentionally stateless; a new instance can be created + for every scheduled invocation or test case. + """ + + async def run_folder_scan(self) -> None: + """Execute the daily folder scan. + + Checks prerequisites, logs progress, and delegates to sub-task + helpers. Any unhandled exception is caught and logged so the + scheduler task never crashes. + """ + logger.info("Folder scan started") + + try: + if not self._prerequisites_met(): + return + + # 1.3 — Repair incomplete NFO files in the background. + logger.info("Starting NFO repair scan as part of folder scan") + await perform_nfo_repair_scan(background_loader=None) + logger.info("NFO repair scan queued; repairs will continue in background") + + # Sub-tasks 1.4–1.5 will fill in the actual work here. + logger.info("Folder scan completed") + except Exception as exc: # pylint: disable=broad-exception-caught + logger.error("Folder scan failed", error=str(exc), exc_info=True) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _prerequisites_met(self) -> bool: + """Verify that the environment is ready for a folder scan. + + Returns: + True when ``settings.anime_directory`` exists and + ``settings.tmdb_api_key`` is configured. + """ + from src.config.settings import settings # noqa: PLC0415 + + if not settings.tmdb_api_key: + logger.warning("Folder scan skipped — TMDB API key not configured") + return False + + if not settings.anime_directory: + logger.warning("Folder scan skipped — anime directory not configured") + return False + + anime_dir = Path(settings.anime_directory) + if not anime_dir.is_dir(): + logger.warning( + "Folder scan skipped — anime directory not found: %s", anime_dir + ) + return False + + return True diff --git a/src/server/services/initialization_service.py b/src/server/services/initialization_service.py index ac16334..0a75781 100644 --- a/src/server/services/initialization_service.py +++ b/src/server/services/initialization_service.py @@ -417,10 +417,10 @@ async def _repair_one_series(series_dir: Path, series_name: str) -> None: async def perform_nfo_repair_scan(background_loader=None) -> None: """Scan all series folders and repair incomplete tvshow.nfo files. - Runs on every application startup (not guarded by a run-once DB flag). - Checks each subfolder of ``settings.anime_directory`` for a ``tvshow.nfo`` - and calls ``_repair_one_series`` for every file with absent or empty - required tags. + Called from ``FolderScanService.run_folder_scan()`` during the scheduled + daily folder scan (not on every startup). Checks each subfolder of + ``settings.anime_directory`` for a ``tvshow.nfo`` and calls + ``_repair_one_series`` for every file with absent or empty required tags. Each repair task creates its own isolated :class:`NFOService` / :class:`TMDBClient` so concurrent tasks never share an ``aiohttp`` diff --git a/src/server/services/scheduler_service.py b/src/server/services/scheduler_service.py index 5141e4b..ac7d555 100644 --- a/src/server/services/scheduler_service.py +++ b/src/server/services/scheduler_service.py @@ -356,6 +356,28 @@ class SchedulerService: else: logger.debug("Auto-download after rescan is disabled — skipping") + # Folder scan (daily maintenance) + if self._config and self._config.folder_scan_enabled: + logger.info("Folder scan is enabled — starting") + try: + from src.server.services.folder_scan_service import ( # noqa: PLC0415 + FolderScanService, + ) + + folder_scan_service = FolderScanService() + await folder_scan_service.run_folder_scan() + except Exception as fs_exc: # pylint: disable=broad-exception-caught + logger.error( + "Folder scan failed", + error=str(fs_exc), + exc_info=True, + ) + await self._broadcast( + "folder_scan_error", {"error": str(fs_exc)} + ) + else: + logger.debug("Folder scan is disabled — skipping") + except Exception as exc: # pylint: disable=broad-exception-caught logger.error("Scheduled rescan failed", error=str(exc), exc_info=True) await self._broadcast( diff --git a/tests/integration/test_nfo_repair_startup.py b/tests/integration/test_nfo_repair_startup.py index f9f0046..05f5e2f 100644 --- a/tests/integration/test_nfo_repair_startup.py +++ b/tests/integration/test_nfo_repair_startup.py @@ -1,46 +1,45 @@ -"""Integration tests verifying perform_nfo_repair_scan is wired into app startup. +"""Integration tests verifying perform_nfo_repair_scan is wired into folder scan. These tests confirm that: -1. The lifespan calls perform_nfo_repair_scan after perform_media_scan_if_needed. -2. Series with incomplete NFO files are queued via the background_loader. +1. FolderScanService.run_folder_scan calls perform_nfo_repair_scan. +2. Series with incomplete NFO files are queued via asyncio.create_task. """ from unittest.mock import AsyncMock, MagicMock, call, patch import pytest -class TestNfoRepairScanCalledOnStartup: - """Verify perform_nfo_repair_scan is invoked during the FastAPI lifespan.""" +class TestNfoRepairScanCalledInFolderScan: + """Verify perform_nfo_repair_scan is invoked from FolderScanService.""" - def test_perform_nfo_repair_scan_imported_in_lifespan(self): - """fastapi_app.py lifespan imports perform_nfo_repair_scan.""" + def test_perform_nfo_repair_scan_imported_in_folder_scan_service(self): + """folder_scan_service.py imports perform_nfo_repair_scan.""" import importlib - import src.server.fastapi_app as app_module - - source = importlib.util.find_spec("src.server.fastapi_app").origin + source = importlib.util.find_spec("src.server.services.folder_scan_service").origin with open(source, "r", encoding="utf-8") as fh: content = fh.read() assert "perform_nfo_repair_scan" in content, ( - "perform_nfo_repair_scan must be imported and called in fastapi_app.py" + "perform_nfo_repair_scan must be imported in folder_scan_service.py" ) - def test_perform_nfo_repair_scan_called_after_media_scan(self): - """perform_nfo_repair_scan must appear after perform_media_scan_if_needed.""" + def test_perform_nfo_repair_scan_called_in_run_folder_scan(self): + """perform_nfo_repair_scan must be called inside run_folder_scan.""" import importlib - source = importlib.util.find_spec("src.server.fastapi_app").origin + source = importlib.util.find_spec("src.server.services.folder_scan_service").origin with open(source, "r", encoding="utf-8") as fh: content = fh.read() - media_scan_pos = content.find("perform_media_scan_if_needed(background_loader)") - repair_scan_pos = content.find("perform_nfo_repair_scan(background_loader)") + run_folder_scan_pos = content.find("def run_folder_scan") + # Find the call inside the method body (after the import line) + repair_scan_call_pos = content.find("await perform_nfo_repair_scan(background_loader=None)") - assert media_scan_pos != -1, "perform_media_scan_if_needed call not found" - assert repair_scan_pos != -1, "perform_nfo_repair_scan call not found" - assert repair_scan_pos > media_scan_pos, ( - "perform_nfo_repair_scan must be called AFTER perform_media_scan_if_needed" + assert run_folder_scan_pos != -1, "run_folder_scan method not found" + assert repair_scan_call_pos != -1, "perform_nfo_repair_scan call not found" + assert repair_scan_call_pos > run_folder_scan_pos, ( + "perform_nfo_repair_scan must be called INSIDE run_folder_scan" )