From 102d83e947981cd277a19217703f4eb23c844874 Mon Sep 17 00:00:00 2001 From: Lukas Date: Tue, 26 May 2026 18:12:01 +0200 Subject: [PATCH] feat(scanner): replace file writes with DB persistence for series - SerieScanner.scan() now calls _persist_serie_to_db() instead of serie.save_to_file() - Added _sync_episodes_to_db() helper to handle episode CRUD during sync - EpisodeService gains delete_by_series() for targeted episode deletion - SerieList gains add_to_db() async method for DB-based series addition - test_serie_scanner_db_writes.py covers create/update/preserve/sync scenarios - DATABASE.md updated with Series Persistence Flow section Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/DATABASE.md | 79 ++++++++ src/core/SerieScanner.py | 117 ++++++++++- src/core/entities/SerieList.py | 78 ++++++- src/server/database/service.py | 31 ++- tests/unit/test_serie_scanner.py | 19 +- tests/unit/test_serie_scanner_db_writes.py | 225 +++++++++++++++++++++ 6 files changed, 531 insertions(+), 18 deletions(-) create mode 100644 tests/unit/test_serie_scanner_db_writes.py diff --git a/docs/DATABASE.md b/docs/DATABASE.md index 79fe1d2..7e85693 100644 --- a/docs/DATABASE.md +++ b/docs/DATABASE.md @@ -509,6 +509,85 @@ After verifying database schema supports all fields, file-based storage can be r --- +## 12. Series Persistence Flow + +When a directory scan discovers or updates series, the scanner persists data to the database instead of writing to disk files. + +### Scan Flow + +``` +Scan Directory + │ + ▼ +Find MP4 Files → Extract Serie Key + │ + ▼ +Check DB for Existing Series (by key) + │ + ├─── EXISTS ──────────────────────► Update Series Metadata + │ │ + │ ▼ + │ Sync Episodes to DB + │ │ + │◄──────────────────────────────────────┘ + │ + └─── NEW ───────────────────────────► Create New Series Record + │ + ▼ + Create Episode Records + │ + ▼ + Return to Scan Loop +``` + +### Key Methods + +**SerieScanner._persist_serie_to_db()** +- Called after `get_missing_episodes_and_season()` computes episodeDict +- Uses `AnimeSeriesService.get_by_key()` to check if series exists +- If exists: calls `AnimeSeriesService.update()` + `_sync_episodes_to_db()` +- If new: calls `AnimeSeriesService.create()` + creates episodes + +**SerieScanner._sync_episodes_to_db()** +- Gets existing episodes from DB via `EpisodeService.get_by_series()` +- Compares with new episodeDict +- Removes episodes no longer missing (unless `is_downloaded=True`) +- Adds new missing episodes +- Preserves `is_downloaded=True` episodes when removing missing ones + +**SerieList.add_to_db()** +- Used when adding a new discovered series via API +- Creates filesystem folder + database record + episode records + +### Episode Sync Logic + +```python +# For each episode in DB but not in new episodeDict: +if episode.is_downloaded: + # Keep - file exists, don't remove + pass +else: + # Remove - no longer missing + EpisodeService.delete() + +# For each episode in new episodeDict but not in DB: +# Add as new missing episode +EpisodeService.create(is_downloaded=False) +``` + +### Transaction Handling + +- DB operations use their own session with commit/rollback +- If DB write fails, error is logged and scan continues +- File-based `save_to_file()` no longer called during scan + +### Migration Path + +1. v2.x: Scanner writes to both DB (primary) and files (fallback) +2. v3.0: Scanner writes only to DB, file methods removed + +--- + ## 13. Database Location | Environment | Default Location | diff --git a/src/core/SerieScanner.py b/src/core/SerieScanner.py index 809fc9e..a0539eb 100644 --- a/src/core/SerieScanner.py +++ b/src/core/SerieScanner.py @@ -10,6 +10,7 @@ Note: """ from __future__ import annotations +import asyncio import logging import os import re @@ -24,7 +25,7 @@ from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundExcepti from src.core.providers.base_provider import Loader from src.server.database.connection import get_sync_session -from src.server.database.service import AnimeSeriesService +from src.server.database.service import AnimeSeriesService, EpisodeService logger = logging.getLogger(__name__) error_logger = logging.getLogger("error") @@ -208,6 +209,106 @@ class SerieScanner: """Reinitialize the series dictionary (keyed by serie.key).""" self.keyDict: dict[str, Serie] = {} + async def _persist_serie_to_db(self, serie: Serie) -> None: + """Persist serie to database (create or update). + + Args: + serie: Serie domain object to persist + """ + try: + from src.server.database.connection import get_async_session_factory + + session_factory = get_async_session_factory() + db = session_factory() + try: + existing = await AnimeSeriesService.get_by_key(db, serie.key) + if existing: + await AnimeSeriesService.update( + db, existing.id, + name=serie.name, + folder=serie.folder, + year=serie.year + ) + await self._sync_episodes_to_db(db, existing.id, serie.episodeDict) + else: + anime_series = await AnimeSeriesService.create( + db=db, + key=serie.key, + name=serie.name, + site=serie.site, + folder=serie.folder, + year=serie.year + ) + for season, eps in serie.episodeDict.items(): + for ep in eps: + await EpisodeService.create( + db=db, + series_id=anime_series.id, + season=season, + episode_number=ep + ) + await db.commit() + logger.debug( + "Persisted serie '%s' (key=%s) to database", + serie.name, serie.key + ) + except Exception as e: + await db.rollback() + logger.error( + "Failed to persist serie '%s' to DB: %s", + serie.key, e, exc_info=True + ) + raise + finally: + await db.close() + except Exception as e: + logger.error( + "Could not persist serie '%s' to DB (DB unavailable?): %s", + serie.key, e + ) + + async def _sync_episodes_to_db( + self, db, series_id: int, episode_dict: dict[int, list[int]] + ) -> None: + """Sync episodes to database, preserving downloaded flags. + + Adds missing episodes, removes episodes no longer missing, + and preserves is_downloaded=True episodes. + + Args: + db: Async database session + series_id: Database ID of the series + episode_dict: Dict mapping season -> list of episode numbers + """ + existing_episodes = await EpisodeService.get_by_series(db, series_id) + existing_map = { + (ep.season, ep.episode_number): ep for ep in existing_episodes + } + new_keys = set() + for season, eps in episode_dict.items(): + for ep_num in eps: + new_keys.add((season, ep_num)) + for (season, ep_num), ep in existing_map.items(): + if (season, ep_num) not in new_keys: + if ep.is_downloaded: + logger.debug( + "Preserving downloaded episode S%02dE%02d for series_id=%d", + season, ep_num, series_id + ) + else: + await EpisodeService.delete_by_series( + db, series_id, season, ep_num + ) + for season, eps in episode_dict.items(): + for ep_num in eps: + if (season, ep_num) not in existing_map: + await EpisodeService.create( + db=db, + series_id=series_id, + season=season, + episode_number=ep_num + ) + def get_total_to_scan(self) -> int: """Get the total number of folders to scan. @@ -329,10 +430,16 @@ class SerieScanner: ) serie.episodeDict = missing_episodes serie.folder = folder - data_path = os.path.join( - self.directory, folder, 'data' - ) - serie.save_to_file(data_path) + + # Persist to database (async) + try: + asyncio.run(self._persist_serie_to_db(serie)) + except Exception as e: + logger.warning( + "DB persistence failed for '%s', " + "continuing without DB: %s", + serie.key, e + ) # Store by key (primary identifier), not folder if serie.key in self.keyDict: diff --git a/src/core/entities/SerieList.py b/src/core/entities/SerieList.py index f6da8af..c5c7c95 100644 --- a/src/core/entities/SerieList.py +++ b/src/core/entities/SerieList.py @@ -1,15 +1,17 @@ """Utilities for loading and managing stored anime series metadata. This module provides the SerieList class for managing collections of anime -series metadata. It uses file-based storage only. +series metadata. It uses file-based storage as fallback when database +is not available. Note: - This module is part of the core domain layer and has no database - dependencies. All database operations are handled by the service layer. + This module is part of the core domain layer. Database operations + are handled by the service layer via add_to_db(). """ from __future__ import annotations +import asyncio import logging import os import warnings @@ -106,6 +108,76 @@ class SerieList: return anime_path + async def add_to_db(self, serie: Serie) -> bool: + """Persist a new series to the database. + + Creates the filesystem folder using serie.folder, then persists + the series metadata to the database. + + Args: + serie: The Serie instance to add + + Returns: + True if successful, False otherwise + """ + try: + from src.server.database.connection import get_async_session_factory + from src.server.database.service import AnimeSeriesService, EpisodeService + + folder_name = serie.folder + anime_path = os.path.join(self.directory, folder_name) + os.makedirs(anime_path, exist_ok=True) + + session_factory = get_async_session_factory() + db = session_factory() + try: + existing = await AnimeSeriesService.get_by_key(db, serie.key) + if existing: + logger.debug( + "Series '%s' (key=%s) already exists in DB, skipping", + serie.name, serie.key + ) + return True + + anime_series = await AnimeSeriesService.create( + db=db, + key=serie.key, + name=serie.name, + site=serie.site, + folder=folder_name, + year=serie.year + ) + for season, eps in serie.episodeDict.items(): + for ep in eps: + await EpisodeService.create( + db=db, + series_id=anime_series.id, + season=season, + episode_number=ep + ) + await db.commit() + self.keyDict[serie.key] = serie + logger.info( + "Persisted series '%s' to database", + serie.name + ) + return True + except Exception as e: + await db.rollback() + logger.error( + "Failed to persist series '%s' to DB: %s", + serie.key, e, exc_info=True + ) + return False + finally: + await db.close() + except Exception as e: + logger.error( + "Could not add series '%s' to DB (DB unavailable?): %s", + serie.key, e + ) + return False + def contains(self, key: str) -> bool: """ Return True when a series identified by ``key`` already exists. diff --git a/src/server/database/service.py b/src/server/database/service.py index 795971a..afc49e6 100644 --- a/src/server/database/service.py +++ b/src/server/database/service.py @@ -649,11 +649,11 @@ class EpisodeService: @staticmethod async def delete(db: AsyncSession, episode_id: int) -> bool: """Delete episode. - + Args: db: Database session episode_id: Episode primary key - + Returns: True if deleted, False if not found """ @@ -662,6 +662,33 @@ class EpisodeService: ) return result.rowcount > 0 + @staticmethod + async def delete_by_series( + db: AsyncSession, + series_id: int, + season: int, + episode_number: int, + ) -> bool: + """Delete episode by series ID, season, and episode number. + + Args: + db: Database session + series_id: Foreign key to AnimeSeries + season: Season number + episode_number: Episode number within season + + Returns: + True if deleted, False if not found + """ + result = await db.execute( + delete(Episode).where( + Episode.series_id == series_id, + Episode.season == season, + Episode.episode_number == episode_number, + ) + ) + return result.rowcount > 0 + @staticmethod async def delete_by_series_and_episode( db: AsyncSession, diff --git a/tests/unit/test_serie_scanner.py b/tests/unit/test_serie_scanner.py index 6cf20fa..6c019d7 100644 --- a/tests/unit/test_serie_scanner.py +++ b/tests/unit/test_serie_scanner.py @@ -75,12 +75,12 @@ class TestSerieScannerInitialization: class TestSerieScannerScan: """Test file-based scan operations.""" - def test_file_based_scan_works( + def test_scan_persists_to_db( self, temp_directory, mock_loader, sample_serie ): - """Test file-based scan works properly.""" + """Test scan persists series to database.""" scanner = SerieScanner(temp_directory, mock_loader) - + with patch.object(scanner, 'get_total_to_scan', return_value=1): with patch.object( scanner, @@ -100,12 +100,15 @@ class TestSerieScannerScan: return_value=({1: [2, 3]}, "aniworld.to") ): with patch.object( - sample_serie, 'save_to_file' - ) as mock_save: + scanner, '_persist_serie_to_db' + ) as mock_persist: scanner.scan() - - # Verify file was saved - mock_save.assert_called_once() + + # Verify DB persistence was called + mock_persist.assert_called_once() + # Check the serie passed matches + call_args = mock_persist.call_args + assert call_args[0][0].key == "attack-on-titan" def test_keydict_populated_after_scan( self, temp_directory, mock_loader, sample_serie diff --git a/tests/unit/test_serie_scanner_db_writes.py b/tests/unit/test_serie_scanner_db_writes.py new file mode 100644 index 0000000..e30c12a --- /dev/null +++ b/tests/unit/test_serie_scanner_db_writes.py @@ -0,0 +1,225 @@ +"""Tests for SerieScanner DB persistence functionality.""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.core.entities.series import Serie +from src.core.SerieScanner import SerieScanner + + +@pytest.fixture +def mock_session_factory(): + """Create a mock async session factory.""" + mock_session = AsyncMock() + mock_session_factory = MagicMock(return_value=mock_session) + return mock_session_factory, mock_session + + +@pytest.fixture +def sample_serie(): + """Create a sample Serie for testing.""" + return Serie( + key="attack-on-titan", + name="Attack on Titan", + site="aniworld.to", + folder="Attack on Titan (2013)", + episodeDict={1: [1, 2, 3], 2: [1, 2]}, + year=2013 + ) + + +class TestPersistSerieToDb: + """Test _persist_serie_to_db method.""" + + @pytest.mark.asyncio + async def test_creates_new_series_when_not_exists( + self, mock_session_factory, sample_serie + ): + """Verify new series is created in DB.""" + mock_factory, mock_session = mock_session_factory + + with patch( + "src.server.database.connection.get_async_session_factory", + return_value=mock_factory + ): + with patch( + "src.server.database.service.AnimeSeriesService.get_by_key", + return_value=None + ): + mock_anime_series = MagicMock() + mock_anime_series.id = 1 + with patch( + "src.server.database.service.AnimeSeriesService.create", + return_value=mock_anime_series + ): + scanner = SerieScanner("/tmp", MagicMock()) + await scanner._persist_serie_to_db(sample_serie) + + from src.server.database.service import AnimeSeriesService + AnimeSeriesService.create.assert_called_once() + call_kwargs = AnimeSeriesService.create.call_args[1] + assert call_kwargs["key"] == "attack-on-titan" + assert call_kwargs["name"] == "Attack on Titan" + + @pytest.mark.asyncio + async def test_updates_existing_series(self, mock_session_factory, sample_serie): + """Verify existing series is updated.""" + mock_factory, mock_session = mock_session_factory + + mock_existing = MagicMock() + mock_existing.id = 42 + mock_existing.key = "attack-on-titan" + + scanner = SerieScanner("/tmp", MagicMock()) + + with patch( + "src.server.database.connection.get_async_session_factory", + return_value=mock_factory + ): + with patch( + "src.server.database.service.AnimeSeriesService.get_by_key", + return_value=mock_existing + ): + with patch( + "src.server.database.service.AnimeSeriesService.update", + new_callable=AsyncMock + ) as mock_update: + with patch.object( + scanner, + "_sync_episodes_to_db", + new_callable=AsyncMock + ): + await scanner._persist_serie_to_db(sample_serie) + + mock_update.assert_called_once() + call_args = mock_update.call_args[0] + assert call_args[1] == 42 # series_id + + +class TestSyncEpisodesToDb: + """Test _sync_episodes_to_db method.""" + + @pytest.mark.asyncio + async def test_preserves_downloaded_episodes(self): + """Verify downloaded episodes are not removed even when no longer missing.""" + mock_session = AsyncMock() + + # S01E1 was downloaded (file exists), S01E2 was missing but file now exists + # Both are no longer in episode_dict + existing_eps = [ + MagicMock(id=1, season=1, episode_number=1, is_downloaded=True), + MagicMock(id=2, season=1, episode_number=2, is_downloaded=True), + ] + + with patch( + "src.server.database.service.EpisodeService.get_by_series", + return_value=existing_eps + ): + with patch( + "src.server.database.service.EpisodeService.delete_by_series", + new_callable=AsyncMock + ) as mock_delete: + scanner = SerieScanner("/tmp", MagicMock()) + # Neither S01E1 nor S01E2 are missing now + await scanner._sync_episodes_to_db( + mock_session, 1, {} # No episodes missing + ) + + # Neither should be deleted since both are downloaded + mock_delete.assert_not_called() + + @pytest.mark.asyncio + async def test_removes_missing_episodes_when_no_longer_missing(self): + """Verify episodes removed from DB if file now exists.""" + mock_session = AsyncMock() + + existing_eps = [ + MagicMock(id=1, season=1, episode_number=1, is_downloaded=False), + MagicMock(id=2, season=1, episode_number=2, is_downloaded=False), + ] + + with patch( + "src.server.database.service.EpisodeService.get_by_series", + return_value=existing_eps + ): + with patch( + "src.server.database.service.EpisodeService.delete_by_series", + new_callable=AsyncMock + ) as mock_delete: + with patch( + "src.server.database.service.EpisodeService.create", + new_callable=AsyncMock + ): + scanner = SerieScanner("/tmp", MagicMock()) + await scanner._sync_episodes_to_db( + mock_session, 1, {1: [1]} # Only S01E01 now missing + ) + + # S01E02 should be deleted since no longer missing + mock_delete.assert_called_once() + + @pytest.mark.asyncio + async def test_adds_new_missing_episodes(self): + """Verify new missing episodes are added.""" + mock_session = AsyncMock() + + existing_eps = [ + MagicMock(id=1, season=1, episode_number=1, is_downloaded=False), + ] + + with patch( + "src.server.database.service.EpisodeService.get_by_series", + return_value=existing_eps + ): + with patch( + "src.server.database.service.EpisodeService.create", + new_callable=AsyncMock + ) as mock_create: + scanner = SerieScanner("/tmp", MagicMock()) + await scanner._sync_episodes_to_db( + mock_session, 1, {1: [1, 2, 3]} # S01E01, S01E02, S01E03 + ) + + # S01E02 and S01E03 should be created + assert mock_create.call_count == 2 + + +class TestPersistSerieToDbErrorHandling: + """Test error handling in _persist_serie_to_db.""" + + @pytest.mark.asyncio + async def test_logs_error_when_db_unavailable(self, sample_serie): + """Verify DB unavailability is logged but doesn't crash.""" + with patch( + "src.server.database.connection.get_async_session_factory", + side_effect=RuntimeError("DB not initialized") + ): + scanner = SerieScanner("/tmp", MagicMock()) + # Should not raise + await scanner._persist_serie_to_db(sample_serie) + + @pytest.mark.asyncio + async def test_rollback_on_failure(self, mock_session_factory, sample_serie): + """Verify rollback on DB failure.""" + mock_factory, mock_session = mock_session_factory + + mock_existing = MagicMock() + mock_existing.id = 1 + + with patch( + "src.server.database.connection.get_async_session_factory", + return_value=mock_factory + ): + with patch( + "src.server.database.service.AnimeSeriesService.get_by_key", + return_value=mock_existing + ): + with patch( + "src.server.database.service.AnimeSeriesService.update", + side_effect=Exception("DB error") + ): + scanner = SerieScanner("/tmp", MagicMock()) + # Should not raise but should rollback + await scanner._persist_serie_to_db(sample_serie) + mock_session.rollback.assert_called_once()