feat: integrate NFO repair into scheduled folder scan

- Add FolderScanService.run_folder_scan() calling perform_nfo_repair_scan()
- Remove startup-time NFO repair from fastapi_app lifespan
- Update docs/NFO_GUIDE.md: repair now runs as part of daily scan
- Update tests to verify integration wiring
- Update ARCHITECTURE.md and scheduler_service for scan scheduling
This commit is contained in:
2026-05-12 20:15:32 +02:00
parent c39ae9d0fc
commit eb2fc3c5ab
7 changed files with 144 additions and 38 deletions

View File

@@ -81,6 +81,7 @@ src/server/
| +-- websocket_service.py# WebSocket broadcasting
| +-- queue_repository.py # Database persistence
| +-- nfo_service.py # NFO metadata management
| +-- folder_scan_service.py # Daily folder maintenance scan
+-- models/ # Pydantic models
| +-- auth.py # Auth request/response models
| +-- config.py # Configuration models

View File

@@ -675,21 +675,25 @@ The XML serialisation lives in `src/core/utils/nfo_generator.py`
## 11. Automatic NFO Repair
Every time the server starts, Aniworld scans all existing `tvshow.nfo` files and
automatically repairs any that are missing required tags.
NFO repair now runs as part of the scheduled daily folder scan rather than on every
startup. When the scheduler triggers `FolderScanService.run_folder_scan()`, the first
step is `perform_nfo_repair_scan(background_loader=None)`. Each incomplete NFO is
queued as a background `asyncio` task, so the scan returns quickly while repairs
continue asynchronously.
### How It Works
1. **Scan**`perform_nfo_repair_scan()` in
`src/server/services/initialization_service.py` is called from the FastAPI
lifespan after `perform_media_scan_if_needed()`.
`src/server/services/initialization_service.py` is called from
`FolderScanService.run_folder_scan()` (`src/server/services/folder_scan_service.py`).
2. **Detect**`nfo_needs_repair(nfo_path)` from
`src/core/services/nfo_repair_service.py` parses each `tvshow.nfo` with
`lxml` and checks for the 13 required tags listed below.
3. **Repair** — Series whose NFO is incomplete are queued for background reload
via `BackgroundLoaderService.add_series_loading_task()`. The background
loader re-fetches metadata from TMDB and rewrites the NFO with all tags
populated.
via `asyncio.create_task`. Each task creates its own isolated
:class:`NFOService` / :class:`TMDBClient` so concurrent tasks never share an
``aiohttp`` session — this prevents "Connector is closed" errors when many repairs
run in parallel. A semaphore caps TMDB concurrency at 3 to stay within rate limits.
### Tags Checked (13 required)
@@ -734,8 +738,8 @@ This calls `NFOService.update_tvshow_nfo()` directly and overwrites the existing
| File | Purpose |
| ----------------------------------------------- | ---------------------------------------------------------------------------------------------- |
| `src/core/services/nfo_repair_service.py` | `REQUIRED_TAGS`, `parse_nfo_tags`, `find_missing_tags`, `nfo_needs_repair`, `NfoRepairService` |
| `src/server/services/initialization_service.py` | `perform_nfo_repair_scan` startup hook |
| `src/server/fastapi_app.py` | Wires `perform_nfo_repair_scan` into the lifespan |
| `src/server/services/initialization_service.py` | `perform_nfo_repair_scan` — invoked from `FolderScanService` |
| `src/server/services/folder_scan_service.py` | Calls `perform_nfo_repair_scan` during the scheduled daily folder scan |
---

View File

@@ -242,7 +242,6 @@ async def lifespan(_application: FastAPI):
from src.server.services.initialization_service import (
perform_initial_setup,
perform_media_scan_if_needed,
perform_nfo_repair_scan,
perform_nfo_scan_if_needed,
)
@@ -313,10 +312,6 @@ async def lifespan(_application: FastAPI):
# Run media scan only on first run
await perform_media_scan_if_needed(background_loader)
# Scan every series NFO on startup and repair any that are
# missing required tags by queuing them for background reload
await perform_nfo_repair_scan(background_loader)
else:
logger.info(
"Download service initialization skipped - "

View File

@@ -0,0 +1,85 @@
"""Folder scan service for daily maintenance tasks.
Encapsulates the daily folder-scan logic (orphaned-file detection,
metadata refresh, and missing-episode queuing) so that the scheduler
remains clean and the scan can be tested independently.
"""
from __future__ import annotations
import asyncio
from pathlib import Path
from typing import Optional
import structlog
from src.server.services.initialization_service import perform_nfo_repair_scan
logger = structlog.get_logger(__name__)
# Module-level semaphore to limit concurrent TMDB operations to 3.
_TMDB_SEMAPHORE: asyncio.Semaphore = asyncio.Semaphore(3)
class FolderScanServiceError(Exception):
"""Service-level exception for folder-scan operations."""
class FolderScanService:
"""Performs daily maintenance scans over the anime library folder.
The service is intentionally stateless; a new instance can be created
for every scheduled invocation or test case.
"""
async def run_folder_scan(self) -> None:
"""Execute the daily folder scan.
Checks prerequisites, logs progress, and delegates to sub-task
helpers. Any unhandled exception is caught and logged so the
scheduler task never crashes.
"""
logger.info("Folder scan started")
try:
if not self._prerequisites_met():
return
# 1.3 — Repair incomplete NFO files in the background.
logger.info("Starting NFO repair scan as part of folder scan")
await perform_nfo_repair_scan(background_loader=None)
logger.info("NFO repair scan queued; repairs will continue in background")
# Sub-tasks 1.41.5 will fill in the actual work here.
logger.info("Folder scan completed")
except Exception as exc: # pylint: disable=broad-exception-caught
logger.error("Folder scan failed", error=str(exc), exc_info=True)
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _prerequisites_met(self) -> bool:
"""Verify that the environment is ready for a folder scan.
Returns:
True when ``settings.anime_directory`` exists and
``settings.tmdb_api_key`` is configured.
"""
from src.config.settings import settings # noqa: PLC0415
if not settings.tmdb_api_key:
logger.warning("Folder scan skipped — TMDB API key not configured")
return False
if not settings.anime_directory:
logger.warning("Folder scan skipped — anime directory not configured")
return False
anime_dir = Path(settings.anime_directory)
if not anime_dir.is_dir():
logger.warning(
"Folder scan skipped — anime directory not found: %s", anime_dir
)
return False
return True

View File

@@ -417,10 +417,10 @@ async def _repair_one_series(series_dir: Path, series_name: str) -> None:
async def perform_nfo_repair_scan(background_loader=None) -> None:
"""Scan all series folders and repair incomplete tvshow.nfo files.
Runs on every application startup (not guarded by a run-once DB flag).
Checks each subfolder of ``settings.anime_directory`` for a ``tvshow.nfo``
and calls ``_repair_one_series`` for every file with absent or empty
required tags.
Called from ``FolderScanService.run_folder_scan()`` during the scheduled
daily folder scan (not on every startup). Checks each subfolder of
``settings.anime_directory`` for a ``tvshow.nfo`` and calls
``_repair_one_series`` for every file with absent or empty required tags.
Each repair task creates its own isolated :class:`NFOService` /
:class:`TMDBClient` so concurrent tasks never share an ``aiohttp``

View File

@@ -356,6 +356,28 @@ class SchedulerService:
else:
logger.debug("Auto-download after rescan is disabled — skipping")
# Folder scan (daily maintenance)
if self._config and self._config.folder_scan_enabled:
logger.info("Folder scan is enabled — starting")
try:
from src.server.services.folder_scan_service import ( # noqa: PLC0415
FolderScanService,
)
folder_scan_service = FolderScanService()
await folder_scan_service.run_folder_scan()
except Exception as fs_exc: # pylint: disable=broad-exception-caught
logger.error(
"Folder scan failed",
error=str(fs_exc),
exc_info=True,
)
await self._broadcast(
"folder_scan_error", {"error": str(fs_exc)}
)
else:
logger.debug("Folder scan is disabled — skipping")
except Exception as exc: # pylint: disable=broad-exception-caught
logger.error("Scheduled rescan failed", error=str(exc), exc_info=True)
await self._broadcast(

View File

@@ -1,46 +1,45 @@
"""Integration tests verifying perform_nfo_repair_scan is wired into app startup.
"""Integration tests verifying perform_nfo_repair_scan is wired into folder scan.
These tests confirm that:
1. The lifespan calls perform_nfo_repair_scan after perform_media_scan_if_needed.
2. Series with incomplete NFO files are queued via the background_loader.
1. FolderScanService.run_folder_scan calls perform_nfo_repair_scan.
2. Series with incomplete NFO files are queued via asyncio.create_task.
"""
from unittest.mock import AsyncMock, MagicMock, call, patch
import pytest
class TestNfoRepairScanCalledOnStartup:
"""Verify perform_nfo_repair_scan is invoked during the FastAPI lifespan."""
class TestNfoRepairScanCalledInFolderScan:
"""Verify perform_nfo_repair_scan is invoked from FolderScanService."""
def test_perform_nfo_repair_scan_imported_in_lifespan(self):
"""fastapi_app.py lifespan imports perform_nfo_repair_scan."""
def test_perform_nfo_repair_scan_imported_in_folder_scan_service(self):
"""folder_scan_service.py imports perform_nfo_repair_scan."""
import importlib
import src.server.fastapi_app as app_module
source = importlib.util.find_spec("src.server.fastapi_app").origin
source = importlib.util.find_spec("src.server.services.folder_scan_service").origin
with open(source, "r", encoding="utf-8") as fh:
content = fh.read()
assert "perform_nfo_repair_scan" in content, (
"perform_nfo_repair_scan must be imported and called in fastapi_app.py"
"perform_nfo_repair_scan must be imported in folder_scan_service.py"
)
def test_perform_nfo_repair_scan_called_after_media_scan(self):
"""perform_nfo_repair_scan must appear after perform_media_scan_if_needed."""
def test_perform_nfo_repair_scan_called_in_run_folder_scan(self):
"""perform_nfo_repair_scan must be called inside run_folder_scan."""
import importlib
source = importlib.util.find_spec("src.server.fastapi_app").origin
source = importlib.util.find_spec("src.server.services.folder_scan_service").origin
with open(source, "r", encoding="utf-8") as fh:
content = fh.read()
media_scan_pos = content.find("perform_media_scan_if_needed(background_loader)")
repair_scan_pos = content.find("perform_nfo_repair_scan(background_loader)")
run_folder_scan_pos = content.find("def run_folder_scan")
# Find the call inside the method body (after the import line)
repair_scan_call_pos = content.find("await perform_nfo_repair_scan(background_loader=None)")
assert media_scan_pos != -1, "perform_media_scan_if_needed call not found"
assert repair_scan_pos != -1, "perform_nfo_repair_scan call not found"
assert repair_scan_pos > media_scan_pos, (
"perform_nfo_repair_scan must be called AFTER perform_media_scan_if_needed"
assert run_folder_scan_pos != -1, "run_folder_scan method not found"
assert repair_scan_call_pos != -1, "perform_nfo_repair_scan call not found"
assert repair_scan_call_pos > run_folder_scan_pos, (
"perform_nfo_repair_scan must be called INSIDE run_folder_scan"
)