feat(scanner): replace file writes with DB persistence for series

- SerieScanner.scan() now calls _persist_serie_to_db() instead of serie.save_to_file()
- Added _sync_episodes_to_db() helper to handle episode CRUD during sync
- EpisodeService gains delete_by_series() for targeted episode deletion
- SerieList gains add_to_db() async method for DB-based series addition
- test_serie_scanner_db_writes.py covers create/update/preserve/sync scenarios
- DATABASE.md updated with Series Persistence Flow section

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-26 18:12:01 +02:00
parent 841368bf85
commit 102d83e947
6 changed files with 531 additions and 18 deletions

View File

@@ -509,6 +509,85 @@ After verifying database schema supports all fields, file-based storage can be r
---
## 12. Series Persistence Flow
When a directory scan discovers or updates series, the scanner persists data to the database instead of writing to disk files.
### Scan Flow
```
Scan Directory
Find MP4 Files → Extract Serie Key
Check DB for Existing Series (by key)
├─── EXISTS ──────────────────────► Update Series Metadata
│ │
│ ▼
│ Sync Episodes to DB
│ │
│◄──────────────────────────────────────┘
└─── NEW ───────────────────────────► Create New Series Record
Create Episode Records
Return to Scan Loop
```
### Key Methods
**SerieScanner._persist_serie_to_db()**
- Called after `get_missing_episodes_and_season()` computes episodeDict
- Uses `AnimeSeriesService.get_by_key()` to check if series exists
- If exists: calls `AnimeSeriesService.update()` + `_sync_episodes_to_db()`
- If new: calls `AnimeSeriesService.create()` + creates episodes
**SerieScanner._sync_episodes_to_db()**
- Gets existing episodes from DB via `EpisodeService.get_by_series()`
- Compares with new episodeDict
- Removes episodes no longer missing (unless `is_downloaded=True`)
- Adds new missing episodes
- Preserves `is_downloaded=True` episodes when removing missing ones
**SerieList.add_to_db()**
- Used when adding a new discovered series via API
- Creates filesystem folder + database record + episode records
### Episode Sync Logic
```python
# For each episode in DB but not in new episodeDict:
if episode.is_downloaded:
# Keep - file exists, don't remove
pass
else:
# Remove - no longer missing
EpisodeService.delete()
# For each episode in new episodeDict but not in DB:
# Add as new missing episode
EpisodeService.create(is_downloaded=False)
```
### Transaction Handling
- DB operations use their own session with commit/rollback
- If DB write fails, error is logged and scan continues
- File-based `save_to_file()` no longer called during scan
### Migration Path
1. v2.x: Scanner writes to both DB (primary) and files (fallback)
2. v3.0: Scanner writes only to DB, file methods removed
---
## 13. Database Location
| Environment | Default Location |

View File

@@ -10,6 +10,7 @@ Note:
"""
from __future__ import annotations
import asyncio
import logging
import os
import re
@@ -24,7 +25,7 @@ from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundExcepti
from src.core.providers.base_provider import Loader
from src.server.database.connection import get_sync_session
from src.server.database.service import AnimeSeriesService
from src.server.database.service import AnimeSeriesService, EpisodeService
logger = logging.getLogger(__name__)
error_logger = logging.getLogger("error")
@@ -208,6 +209,106 @@ class SerieScanner:
"""Reinitialize the series dictionary (keyed by serie.key)."""
self.keyDict: dict[str, Serie] = {}
async def _persist_serie_to_db(self, serie: Serie) -> None:
"""Persist serie to database (create or update).
Args:
serie: Serie domain object to persist
"""
try:
from src.server.database.connection import get_async_session_factory
session_factory = get_async_session_factory()
db = session_factory()
try:
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
await AnimeSeriesService.update(
db, existing.id,
name=serie.name,
folder=serie.folder,
year=serie.year
)
await self._sync_episodes_to_db(db, existing.id, serie.episodeDict)
else:
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=serie.folder,
year=serie.year
)
for season, eps in serie.episodeDict.items():
for ep in eps:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=season,
episode_number=ep
)
await db.commit()
logger.debug(
"Persisted serie '%s' (key=%s) to database",
serie.name, serie.key
)
except Exception as e:
await db.rollback()
logger.error(
"Failed to persist serie '%s' to DB: %s",
serie.key, e, exc_info=True
)
raise
finally:
await db.close()
except Exception as e:
logger.error(
"Could not persist serie '%s' to DB (DB unavailable?): %s",
serie.key, e
)
async def _sync_episodes_to_db(
self, db, series_id: int, episode_dict: dict[int, list[int]]
) -> None:
"""Sync episodes to database, preserving downloaded flags.
Adds missing episodes, removes episodes no longer missing,
and preserves is_downloaded=True episodes.
Args:
db: Async database session
series_id: Database ID of the series
episode_dict: Dict mapping season -> list of episode numbers
"""
existing_episodes = await EpisodeService.get_by_series(db, series_id)
existing_map = {
(ep.season, ep.episode_number): ep for ep in existing_episodes
}
new_keys = set()
for season, eps in episode_dict.items():
for ep_num in eps:
new_keys.add((season, ep_num))
for (season, ep_num), ep in existing_map.items():
if (season, ep_num) not in new_keys:
if ep.is_downloaded:
logger.debug(
"Preserving downloaded episode S%02dE%02d for series_id=%d",
season, ep_num, series_id
)
else:
await EpisodeService.delete_by_series(
db, series_id, season, ep_num
)
for season, eps in episode_dict.items():
for ep_num in eps:
if (season, ep_num) not in existing_map:
await EpisodeService.create(
db=db,
series_id=series_id,
season=season,
episode_number=ep_num
)
def get_total_to_scan(self) -> int:
"""Get the total number of folders to scan.
@@ -329,10 +430,16 @@ class SerieScanner:
)
serie.episodeDict = missing_episodes
serie.folder = folder
data_path = os.path.join(
self.directory, folder, 'data'
)
serie.save_to_file(data_path)
# Persist to database (async)
try:
asyncio.run(self._persist_serie_to_db(serie))
except Exception as e:
logger.warning(
"DB persistence failed for '%s', "
"continuing without DB: %s",
serie.key, e
)
# Store by key (primary identifier), not folder
if serie.key in self.keyDict:

View File

@@ -1,15 +1,17 @@
"""Utilities for loading and managing stored anime series metadata.
This module provides the SerieList class for managing collections of anime
series metadata. It uses file-based storage only.
series metadata. It uses file-based storage as fallback when database
is not available.
Note:
This module is part of the core domain layer and has no database
dependencies. All database operations are handled by the service layer.
This module is part of the core domain layer. Database operations
are handled by the service layer via add_to_db().
"""
from __future__ import annotations
import asyncio
import logging
import os
import warnings
@@ -106,6 +108,76 @@ class SerieList:
return anime_path
async def add_to_db(self, serie: Serie) -> bool:
"""Persist a new series to the database.
Creates the filesystem folder using serie.folder, then persists
the series metadata to the database.
Args:
serie: The Serie instance to add
Returns:
True if successful, False otherwise
"""
try:
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService, EpisodeService
folder_name = serie.folder
anime_path = os.path.join(self.directory, folder_name)
os.makedirs(anime_path, exist_ok=True)
session_factory = get_async_session_factory()
db = session_factory()
try:
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
logger.debug(
"Series '%s' (key=%s) already exists in DB, skipping",
serie.name, serie.key
)
return True
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=folder_name,
year=serie.year
)
for season, eps in serie.episodeDict.items():
for ep in eps:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=season,
episode_number=ep
)
await db.commit()
self.keyDict[serie.key] = serie
logger.info(
"Persisted series '%s' to database",
serie.name
)
return True
except Exception as e:
await db.rollback()
logger.error(
"Failed to persist series '%s' to DB: %s",
serie.key, e, exc_info=True
)
return False
finally:
await db.close()
except Exception as e:
logger.error(
"Could not add series '%s' to DB (DB unavailable?): %s",
serie.key, e
)
return False
def contains(self, key: str) -> bool:
"""
Return True when a series identified by ``key`` already exists.

View File

@@ -649,11 +649,11 @@ class EpisodeService:
@staticmethod
async def delete(db: AsyncSession, episode_id: int) -> bool:
"""Delete episode.
Args:
db: Database session
episode_id: Episode primary key
Returns:
True if deleted, False if not found
"""
@@ -662,6 +662,33 @@ class EpisodeService:
)
return result.rowcount > 0
@staticmethod
async def delete_by_series(
db: AsyncSession,
series_id: int,
season: int,
episode_number: int,
) -> bool:
"""Delete episode by series ID, season, and episode number.
Args:
db: Database session
series_id: Foreign key to AnimeSeries
season: Season number
episode_number: Episode number within season
Returns:
True if deleted, False if not found
"""
result = await db.execute(
delete(Episode).where(
Episode.series_id == series_id,
Episode.season == season,
Episode.episode_number == episode_number,
)
)
return result.rowcount > 0
@staticmethod
async def delete_by_series_and_episode(
db: AsyncSession,

View File

@@ -75,12 +75,12 @@ class TestSerieScannerInitialization:
class TestSerieScannerScan:
"""Test file-based scan operations."""
def test_file_based_scan_works(
def test_scan_persists_to_db(
self, temp_directory, mock_loader, sample_serie
):
"""Test file-based scan works properly."""
"""Test scan persists series to database."""
scanner = SerieScanner(temp_directory, mock_loader)
with patch.object(scanner, 'get_total_to_scan', return_value=1):
with patch.object(
scanner,
@@ -100,12 +100,15 @@ class TestSerieScannerScan:
return_value=({1: [2, 3]}, "aniworld.to")
):
with patch.object(
sample_serie, 'save_to_file'
) as mock_save:
scanner, '_persist_serie_to_db'
) as mock_persist:
scanner.scan()
# Verify file was saved
mock_save.assert_called_once()
# Verify DB persistence was called
mock_persist.assert_called_once()
# Check the serie passed matches
call_args = mock_persist.call_args
assert call_args[0][0].key == "attack-on-titan"
def test_keydict_populated_after_scan(
self, temp_directory, mock_loader, sample_serie

View File

@@ -0,0 +1,225 @@
"""Tests for SerieScanner DB persistence functionality."""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.core.entities.series import Serie
from src.core.SerieScanner import SerieScanner
@pytest.fixture
def mock_session_factory():
"""Create a mock async session factory."""
mock_session = AsyncMock()
mock_session_factory = MagicMock(return_value=mock_session)
return mock_session_factory, mock_session
@pytest.fixture
def sample_serie():
"""Create a sample Serie for testing."""
return Serie(
key="attack-on-titan",
name="Attack on Titan",
site="aniworld.to",
folder="Attack on Titan (2013)",
episodeDict={1: [1, 2, 3], 2: [1, 2]},
year=2013
)
class TestPersistSerieToDb:
"""Test _persist_serie_to_db method."""
@pytest.mark.asyncio
async def test_creates_new_series_when_not_exists(
self, mock_session_factory, sample_serie
):
"""Verify new series is created in DB."""
mock_factory, mock_session = mock_session_factory
with patch(
"src.server.database.connection.get_async_session_factory",
return_value=mock_factory
):
with patch(
"src.server.database.service.AnimeSeriesService.get_by_key",
return_value=None
):
mock_anime_series = MagicMock()
mock_anime_series.id = 1
with patch(
"src.server.database.service.AnimeSeriesService.create",
return_value=mock_anime_series
):
scanner = SerieScanner("/tmp", MagicMock())
await scanner._persist_serie_to_db(sample_serie)
from src.server.database.service import AnimeSeriesService
AnimeSeriesService.create.assert_called_once()
call_kwargs = AnimeSeriesService.create.call_args[1]
assert call_kwargs["key"] == "attack-on-titan"
assert call_kwargs["name"] == "Attack on Titan"
@pytest.mark.asyncio
async def test_updates_existing_series(self, mock_session_factory, sample_serie):
"""Verify existing series is updated."""
mock_factory, mock_session = mock_session_factory
mock_existing = MagicMock()
mock_existing.id = 42
mock_existing.key = "attack-on-titan"
scanner = SerieScanner("/tmp", MagicMock())
with patch(
"src.server.database.connection.get_async_session_factory",
return_value=mock_factory
):
with patch(
"src.server.database.service.AnimeSeriesService.get_by_key",
return_value=mock_existing
):
with patch(
"src.server.database.service.AnimeSeriesService.update",
new_callable=AsyncMock
) as mock_update:
with patch.object(
scanner,
"_sync_episodes_to_db",
new_callable=AsyncMock
):
await scanner._persist_serie_to_db(sample_serie)
mock_update.assert_called_once()
call_args = mock_update.call_args[0]
assert call_args[1] == 42 # series_id
class TestSyncEpisodesToDb:
"""Test _sync_episodes_to_db method."""
@pytest.mark.asyncio
async def test_preserves_downloaded_episodes(self):
"""Verify downloaded episodes are not removed even when no longer missing."""
mock_session = AsyncMock()
# S01E1 was downloaded (file exists), S01E2 was missing but file now exists
# Both are no longer in episode_dict
existing_eps = [
MagicMock(id=1, season=1, episode_number=1, is_downloaded=True),
MagicMock(id=2, season=1, episode_number=2, is_downloaded=True),
]
with patch(
"src.server.database.service.EpisodeService.get_by_series",
return_value=existing_eps
):
with patch(
"src.server.database.service.EpisodeService.delete_by_series",
new_callable=AsyncMock
) as mock_delete:
scanner = SerieScanner("/tmp", MagicMock())
# Neither S01E1 nor S01E2 are missing now
await scanner._sync_episodes_to_db(
mock_session, 1, {} # No episodes missing
)
# Neither should be deleted since both are downloaded
mock_delete.assert_not_called()
@pytest.mark.asyncio
async def test_removes_missing_episodes_when_no_longer_missing(self):
"""Verify episodes removed from DB if file now exists."""
mock_session = AsyncMock()
existing_eps = [
MagicMock(id=1, season=1, episode_number=1, is_downloaded=False),
MagicMock(id=2, season=1, episode_number=2, is_downloaded=False),
]
with patch(
"src.server.database.service.EpisodeService.get_by_series",
return_value=existing_eps
):
with patch(
"src.server.database.service.EpisodeService.delete_by_series",
new_callable=AsyncMock
) as mock_delete:
with patch(
"src.server.database.service.EpisodeService.create",
new_callable=AsyncMock
):
scanner = SerieScanner("/tmp", MagicMock())
await scanner._sync_episodes_to_db(
mock_session, 1, {1: [1]} # Only S01E01 now missing
)
# S01E02 should be deleted since no longer missing
mock_delete.assert_called_once()
@pytest.mark.asyncio
async def test_adds_new_missing_episodes(self):
"""Verify new missing episodes are added."""
mock_session = AsyncMock()
existing_eps = [
MagicMock(id=1, season=1, episode_number=1, is_downloaded=False),
]
with patch(
"src.server.database.service.EpisodeService.get_by_series",
return_value=existing_eps
):
with patch(
"src.server.database.service.EpisodeService.create",
new_callable=AsyncMock
) as mock_create:
scanner = SerieScanner("/tmp", MagicMock())
await scanner._sync_episodes_to_db(
mock_session, 1, {1: [1, 2, 3]} # S01E01, S01E02, S01E03
)
# S01E02 and S01E03 should be created
assert mock_create.call_count == 2
class TestPersistSerieToDbErrorHandling:
"""Test error handling in _persist_serie_to_db."""
@pytest.mark.asyncio
async def test_logs_error_when_db_unavailable(self, sample_serie):
"""Verify DB unavailability is logged but doesn't crash."""
with patch(
"src.server.database.connection.get_async_session_factory",
side_effect=RuntimeError("DB not initialized")
):
scanner = SerieScanner("/tmp", MagicMock())
# Should not raise
await scanner._persist_serie_to_db(sample_serie)
@pytest.mark.asyncio
async def test_rollback_on_failure(self, mock_session_factory, sample_serie):
"""Verify rollback on DB failure."""
mock_factory, mock_session = mock_session_factory
mock_existing = MagicMock()
mock_existing.id = 1
with patch(
"src.server.database.connection.get_async_session_factory",
return_value=mock_factory
):
with patch(
"src.server.database.service.AnimeSeriesService.get_by_key",
return_value=mock_existing
):
with patch(
"src.server.database.service.AnimeSeriesService.update",
side_effect=Exception("DB error")
):
scanner = SerieScanner("/tmp", MagicMock())
# Should not raise but should rollback
await scanner._persist_serie_to_db(sample_serie)
mock_session.rollback.assert_called_once()