feat(scanner): add DB fallback for series key resolution

When SerieScanner encounters a folder without a local key or data file,
it now optionally falls back to a database lookup by folder name. This
prevents newly-added series from being silently skipped on rescan when
their metadata only lives in the DB.

Changes:
- SerieScanner accepts an optional db_lookup callable
- SeriesApp forwards db_lookup to SerieScanner
- AnimeSeriesService adds get_by_folder_sync() helper
- dependencies.py wires a sync DB lookup into get_series_app()
- Unit tests cover fallback hit, miss, and exception paths
This commit is contained in:
2026-05-14 19:28:43 +02:00
parent 69c2fd01f9
commit e3509f5c8f
5 changed files with 288 additions and 8 deletions

View File

@@ -15,7 +15,7 @@ import os
import re import re
import traceback import traceback
import uuid import uuid
from typing import Iterable, Iterator, Optional from typing import Callable, Iterable, Iterator, Optional
from events import Events from events import Events
@@ -43,12 +43,17 @@ class SerieScanner:
scanner = SerieScanner("/path/to/anime", loader) scanner = SerieScanner("/path/to/anime", loader)
scanner.scan() scanner.scan()
# Results are in scanner.keyDict # Results are in scanner.keyDict
# With DB lookup fallback:
scanner = SerieScanner("/path/to/anime", loader,
db_lookup=lambda folder: my_db.get_by_folder(folder))
""" """
def __init__( def __init__(
self, self,
basePath: str, basePath: str,
loader: Loader, loader: Loader,
db_lookup: Optional[Callable[[str], Optional["Serie"]]] = None,
) -> None: ) -> None:
""" """
Initialize the SerieScanner. Initialize the SerieScanner.
@@ -56,8 +61,12 @@ class SerieScanner:
Args: Args:
basePath: Base directory containing anime series basePath: Base directory containing anime series
loader: Loader instance for fetching series information loader: Loader instance for fetching series information
callback_manager: Optional callback manager for progress updates db_lookup: Optional callable ``(folder_name) -> Serie | None``.
When provided, it is called as a fallback when neither a
``key`` file nor a ``data`` file is found in the folder.
This allows the database to supply the series key for
folders that have never had a local key file.
Raises: Raises:
ValueError: If basePath is invalid or doesn't exist ValueError: If basePath is invalid or doesn't exist
""" """
@@ -75,6 +84,7 @@ class SerieScanner:
self.directory: str = abs_path self.directory: str = abs_path
self.keyDict: dict[str, Serie] = {} self.keyDict: dict[str, Serie] = {}
self.loader: Loader = loader self.loader: Loader = loader
self._db_lookup: Optional[Callable[[str], Optional[Serie]]] = db_lookup
self._current_operation_id: Optional[str] = None self._current_operation_id: Optional[str] = None
self.events = Events() self.events = Events()
@@ -268,6 +278,30 @@ class SerieScanner:
) )
serie = self.__read_data_from_file(folder) serie = self.__read_data_from_file(folder)
if serie is None or not serie.key or not serie.key.strip():
# Fallback: ask the database for a matching series
if self._db_lookup is not None:
try:
serie = self._db_lookup(folder)
if serie:
logger.info(
"DB lookup resolved folder '%s' -> key='%s'",
folder,
serie.key,
)
except Exception as exc:
logger.warning(
"DB lookup failed for folder '%s': %s",
folder,
exc,
)
serie = None
if serie is None or not serie.key or not serie.key.strip():
logger.warning(
"No key or data file found for folder '%s', skipping",
folder,
)
if ( if (
serie is not None serie is not None
and serie.key and serie.key

View File

@@ -14,7 +14,7 @@ import asyncio
import logging import logging
import os import os
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from typing import Any, Dict, List, Optional from typing import Any, Callable, Dict, List, Optional
from events import Events from events import Events
@@ -143,12 +143,16 @@ class SeriesApp:
def __init__( def __init__(
self, self,
directory_to_search: str, directory_to_search: str,
db_lookup: Optional[Callable[[str], Optional["Serie"]]] = None,
): ):
""" """
Initialize SeriesApp. Initialize SeriesApp.
Args: Args:
directory_to_search: Base directory for anime series directory_to_search: Base directory for anime series
db_lookup: Optional callable ``(folder_name) -> Serie | None``
passed through to ``SerieScanner`` as a fallback key source
when no local ``key`` or ``data`` file exists.
""" """
self.directory_to_search = directory_to_search self.directory_to_search = directory_to_search
@@ -162,7 +166,7 @@ class SeriesApp:
self.loaders = Loaders() self.loaders = Loaders()
self.loader = self.loaders.GetLoader(key="aniworld.to") self.loader = self.loaders.GetLoader(key="aniworld.to")
self.serie_scanner = SerieScanner( self.serie_scanner = SerieScanner(
directory_to_search, self.loader directory_to_search, self.loader, db_lookup=db_lookup
) )
# Skip automatic loading from data files - series will be loaded # Skip automatic loading from data files - series will be loaded
# from database by the service layer during application setup # from database by the service layer during application setup

View File

@@ -148,7 +148,27 @@ class AnimeSeriesService:
select(AnimeSeries).where(AnimeSeries.key == key) select(AnimeSeries).where(AnimeSeries.key == key)
) )
return result.scalar_one_or_none() return result.scalar_one_or_none()
@staticmethod
def get_by_folder_sync(db: Session, folder: str) -> Optional[AnimeSeries]:
"""Look up an anime series by its filesystem folder name (sync).
Intended as a fallback for ``SerieScanner`` when neither a ``key``
file nor a ``data`` file exists on disk for a given folder.
Args:
db: Synchronous database session (from ``get_sync_session``).
folder: Filesystem folder name to match (e.g.
``"Rooster Fighter (2026)"``).
Returns:
``AnimeSeries`` instance or ``None`` if not found.
"""
result = db.execute(
select(AnimeSeries).where(AnimeSeries.folder == folder)
)
return result.scalar_one_or_none()
@staticmethod @staticmethod
async def get_all( async def get_all(
db: AsyncSession, db: AsyncSession,

View File

@@ -57,6 +57,44 @@ _rate_limit_lock = Lock()
_RATE_LIMIT_WINDOW_SECONDS = 60.0 _RATE_LIMIT_WINDOW_SECONDS = 60.0
def _make_db_lookup():
"""Build a synchronous ``(folder) -> Serie | None`` callable for SerieScanner.
The returned function opens a short-lived sync DB session, queries for a
series whose ``folder`` column matches the given name, and converts the
ORM row to a ``Serie`` domain object. Returns ``None`` when the DB is not
yet initialised or no matching row is found.
"""
from src.core.entities.series import Serie
def _lookup(folder: str) -> Optional["Serie"]:
try:
from src.server.database.connection import get_sync_session
from src.server.database.service import AnimeSeriesService
db = get_sync_session()
try:
row = AnimeSeriesService.get_by_folder_sync(db, folder)
finally:
db.close()
if row is None:
return None
return Serie(
key=row.key,
name=row.name or "",
site=row.site,
folder=row.folder,
episodeDict={},
year=row.year,
)
except RuntimeError:
# DB not initialised yet (e.g. first boot before init_db())
return None
return _lookup
def get_series_app() -> SeriesApp: def get_series_app() -> SeriesApp:
""" """
Dependency to get SeriesApp instance. Dependency to get SeriesApp instance.
@@ -134,7 +172,7 @@ def get_series_app() -> SeriesApp:
), ),
) )
_series_app = SeriesApp(anime_dir) _series_app = SeriesApp(anime_dir, db_lookup=_make_db_lookup())
except Exception as e: except Exception as e:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,

View File

@@ -1,5 +1,6 @@
"""Tests for SerieScanner class - file-based operations.""" """Tests for SerieScanner class - file-based operations."""
import logging
import os import os
import tempfile import tempfile
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
@@ -651,4 +652,187 @@ class TestScanProgressEvents:
error_handler.assert_called_once() error_handler.assert_called_once()
call_data = error_handler.call_args[0][0] call_data = error_handler.call_args[0][0]
assert call_data["recoverable"] is True assert call_data["recoverable"] is True
class TestDbLookupFallback:
"""Tests for the db_lookup callback in SerieScanner."""
def _make_scanner(self, tmp_dir, mock_loader, db_lookup=None):
"""Create a scanner with an optional db_lookup."""
# Create a folder with an mp4 but NO key/data file
folder = os.path.join(tmp_dir, "Rooster Fighter (2026)")
os.makedirs(folder, exist_ok=True)
mp4 = os.path.join(folder, "Rooster Fighter - S01E001 - (German Dub).mp4")
with open(mp4, "w") as f:
f.write("dummy")
return SerieScanner(tmp_dir, mock_loader, db_lookup=db_lookup)
def test_db_lookup_stored_on_init(self, temp_directory, mock_loader):
"""db_lookup callable should be stored as _db_lookup."""
lookup = MagicMock(return_value=None)
scanner = SerieScanner(temp_directory, mock_loader, db_lookup=lookup)
assert scanner._db_lookup is lookup
def test_no_db_lookup_defaults_to_none(self, temp_directory, mock_loader):
"""Without db_lookup, _db_lookup should be None."""
scanner = SerieScanner(temp_directory, mock_loader)
assert scanner._db_lookup is None
def test_db_lookup_called_when_no_files(self, mock_loader):
"""db_lookup is called when neither key nor data file exists."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
lookup = MagicMock(return_value=None)
scanner = self._make_scanner(tmp_dir, mock_loader, db_lookup=lookup)
with patch.object(scanner, 'get_total_to_scan', return_value=1), \
patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({}, "aniworld.to"),
):
scanner.scan()
lookup.assert_called_once_with("Rooster Fighter (2026)")
def test_db_lookup_not_called_when_key_file_exists(self, mock_loader):
"""db_lookup is NOT called when a key file is present."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
folder = os.path.join(tmp_dir, "Rooster Fighter (2026)")
os.makedirs(folder, exist_ok=True)
mp4 = os.path.join(folder, "S01E001.mp4")
with open(mp4, "w") as f:
f.write("dummy")
with open(os.path.join(folder, "key"), "w") as f:
f.write("rooster-fighter")
lookup = MagicMock(return_value=None)
scanner = SerieScanner(tmp_dir, mock_loader, db_lookup=lookup)
with patch.object(scanner, 'get_total_to_scan', return_value=1), \
patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({1: []}, "aniworld.to"),
), \
patch.object(
SerieScanner,
'_SerieScanner__read_data_from_file',
return_value=Serie(
key="rooster-fighter", name="", site="aniworld.to",
folder="Rooster Fighter (2026)", episodeDict={},
),
):
scanner.scan()
lookup.assert_not_called()
def test_db_lookup_resolves_serie_and_scans(self, mock_loader):
"""When db_lookup returns a Serie, scanning continues normally."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
resolved = Serie(
key="rooster-fighter",
name="Rooster Fighter",
site="aniworld.to",
folder="Rooster Fighter (2026)",
episodeDict={},
year=2026,
)
lookup = MagicMock(return_value=resolved)
scanner = self._make_scanner(tmp_dir, mock_loader, db_lookup=lookup)
with patch.object(scanner, 'get_total_to_scan', return_value=1), \
patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({1: [1, 2, 3]}, "aniworld.to"),
), \
patch.object(resolved, 'save_to_file'):
scanner.scan()
assert "rooster-fighter" in scanner.keyDict
assert scanner.keyDict["rooster-fighter"].episodeDict == {1: [1, 2, 3]}
def test_db_lookup_returns_none_folder_skipped(self, mock_loader):
"""When db_lookup returns None, the folder is skipped with a warning."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
lookup = MagicMock(return_value=None)
scanner = self._make_scanner(tmp_dir, mock_loader, db_lookup=lookup)
with patch.object(scanner, 'get_total_to_scan', return_value=1):
scanner.scan()
assert len(scanner.keyDict) == 0
def test_db_lookup_exception_skips_folder(self, mock_loader):
"""When db_lookup raises, the folder is skipped gracefully."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
lookup = MagicMock(side_effect=RuntimeError("DB offline"))
scanner = self._make_scanner(tmp_dir, mock_loader, db_lookup=lookup)
with patch.object(scanner, 'get_total_to_scan', return_value=1):
scanner.scan() # should not raise
assert len(scanner.keyDict) == 0
def test_db_lookup_warning_logged_when_no_files(
self, mock_loader, caplog
):
"""A warning is logged for folders without key/data file."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
scanner = self._make_scanner(tmp_dir, mock_loader, db_lookup=None)
with caplog.at_level(logging.WARNING, logger="src.core.SerieScanner"):
with patch.object(scanner, 'get_total_to_scan', return_value=1):
scanner.scan()
assert any(
"Rooster Fighter (2026)" in record.message
for record in caplog.records
if record.levelname == "WARNING"
)
def test_db_lookup_info_logged_on_resolution(
self, mock_loader, caplog
):
"""An INFO log is emitted when db_lookup resolves a folder."""
import tempfile
with tempfile.TemporaryDirectory() as tmp_dir:
resolved = Serie(
key="rooster-fighter",
name="",
site="aniworld.to",
folder="Rooster Fighter (2026)",
episodeDict={},
)
lookup = MagicMock(return_value=resolved)
scanner = self._make_scanner(tmp_dir, mock_loader, db_lookup=lookup)
with caplog.at_level(logging.INFO, logger="src.core.SerieScanner"), \
patch.object(scanner, 'get_total_to_scan', return_value=1), \
patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({}, "aniworld.to"),
), \
patch.object(resolved, 'save_to_file'):
scanner.scan()
assert any(
"rooster-fighter" in record.message
for record in caplog.records
if record.levelname == "INFO"
)