Task 4: Update SerieList to use database storage

- Add db_session and skip_load parameters to SerieList.__init__
- Add async load_series_from_db() method for database loading
- Add async add_to_db() method for database storage
- Add async contains_in_db() method for database checks
- Add _convert_from_db() and _convert_to_db_dict() helper methods
- Add deprecation warnings to file-based add() method
- Maintain backward compatibility for file-based operations
- Add comprehensive unit tests (29 tests, all passing)
- Update instructions.md to mark Task 4 complete
This commit is contained in:
Lukas 2025-12-01 19:18:50 +01:00
parent 646385b975
commit 795f83ada5
3 changed files with 606 additions and 25 deletions

View File

@ -187,7 +187,7 @@ async def lifespan(app: FastAPI):
---
### Task 4: Update SerieList to Use Database
### Task 4: Update SerieList to Use Database
**File:** `src/core/entities/SerieList.py`

View File

@ -1,41 +1,119 @@
"""Utilities for loading and managing stored anime series metadata."""
"""Utilities for loading and managing stored anime series metadata.
This module provides the SerieList class for managing collections of anime
series metadata. It supports both file-based and database-backed storage.
The class can operate in two modes:
1. File-based mode (legacy): Reads/writes data files from disk
2. Database mode: Reads/writes to SQLite database via AnimeSeriesService
Database mode is preferred for new code. File-based mode is kept for
backward compatibility with CLI usage.
"""
from __future__ import annotations
import logging
import os
import warnings
from json import JSONDecodeError
from typing import Dict, Iterable, List, Optional
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional
from src.core.entities.series import Serie
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.database.models import AnimeSeries
logger = logging.getLogger(__name__)
class SerieList:
"""
Represents the collection of cached series stored on disk.
Represents the collection of cached series stored on disk or database.
Series are identified by their unique 'key' (provider identifier).
The 'folder' is metadata only and not used for lookups.
The class supports two modes of operation:
1. File-based mode (legacy):
Initialize without db_session to use file-based storage.
Series are loaded from 'data' files in the anime directory.
2. Database mode (preferred):
Pass db_session to use database-backed storage via AnimeSeriesService.
Series are loaded from the AnimeSeries table.
Example:
# File-based mode (legacy)
serie_list = SerieList("/path/to/anime")
# Database mode (preferred)
async with get_db_session() as db:
serie_list = SerieList("/path/to/anime", db_session=db)
await serie_list.load_series_from_db()
Attributes:
directory: Path to the anime directory
keyDict: Internal dictionary mapping serie.key to Serie objects
_db_session: Optional database session for database mode
"""
def __init__(self, base_path: str) -> None:
def __init__(
self,
base_path: str,
db_session: Optional["AsyncSession"] = None,
skip_load: bool = False
) -> None:
"""Initialize the SerieList.
Args:
base_path: Path to the anime directory
db_session: Optional database session for database mode.
If provided, use load_series_from_db() instead of
the automatic file-based loading.
skip_load: If True, skip automatic loading of series.
Useful when using database mode to allow async loading.
"""
self.directory: str = base_path
# Internal storage using serie.key as the dictionary key
self.keyDict: Dict[str, Serie] = {}
self.load_series()
self._db_session: Optional["AsyncSession"] = db_session
# Only auto-load from files if no db_session and not skipping
if not skip_load and db_session is None:
self.load_series()
def add(self, serie: Serie) -> None:
"""
Persist a new series if it is not already present.
Persist a new series if it is not already present (file-based mode).
Uses serie.key for identification. The serie.folder is used for
filesystem operations only.
.. deprecated:: 2.0.0
Use :meth:`add_to_db` for database-backed storage.
File-based storage will be removed in a future version.
Args:
serie: The Serie instance to add
Note:
This method creates data files on disk. For database storage,
use add_to_db() instead.
"""
if self.contains(serie.key):
return
warnings.warn(
"File-based storage via add() is deprecated. "
"Use add_to_db() for database storage.",
DeprecationWarning,
stacklevel=2
)
data_path = os.path.join(self.directory, serie.folder, "data")
anime_path = os.path.join(self.directory, serie.folder)
os.makedirs(anime_path, exist_ok=True)
@ -44,6 +122,63 @@ class SerieList:
# Store by key, not folder
self.keyDict[serie.key] = serie
async def add_to_db(
self,
serie: Serie,
db: "AsyncSession"
) -> Optional["AnimeSeries"]:
"""
Add a series to the database.
Uses serie.key for identification. Creates a new AnimeSeries
record in the database if it doesn't already exist.
Args:
serie: The Serie instance to add
db: Database session for async operations
Returns:
Created AnimeSeries instance, or None if already exists
Example:
async with get_db_session() as db:
result = await serie_list.add_to_db(serie, db)
if result:
print(f"Added series: {result.name}")
"""
from src.server.database.service import AnimeSeriesService
# Check if series already exists in DB
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
logger.debug(
"Series already exists in database: %s (key=%s)",
serie.name,
serie.key
)
return None
# Create new series in database
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=serie.folder,
episode_dict=serie.episodeDict,
)
# Also add to in-memory collection
self.keyDict[serie.key] = serie
logger.info(
"Added series to database: %s (key=%s)",
serie.name,
serie.key
)
return anime_series
def contains(self, key: str) -> bool:
"""
Return True when a series identified by ``key`` already exists.
@ -107,6 +242,119 @@ class SerieList:
error,
)
async def load_series_from_db(self, db: "AsyncSession") -> int:
"""
Load all series from the database into the in-memory collection.
This is the preferred method for populating the series list
when using database-backed storage.
Args:
db: Database session for async operations
Returns:
Number of series loaded from the database
Example:
async with get_db_session() as db:
serie_list = SerieList("/path/to/anime", skip_load=True)
count = await serie_list.load_series_from_db(db)
print(f"Loaded {count} series from database")
"""
from src.server.database.service import AnimeSeriesService
# Clear existing in-memory data
self.keyDict.clear()
# Load all series from database
anime_series_list = await AnimeSeriesService.get_all(db)
for anime_series in anime_series_list:
serie = self._convert_from_db(anime_series)
self.keyDict[serie.key] = serie
logger.info(
"Loaded %d series from database",
len(self.keyDict)
)
return len(self.keyDict)
@staticmethod
def _convert_from_db(anime_series: "AnimeSeries") -> Serie:
"""
Convert an AnimeSeries database model to a Serie entity.
Args:
anime_series: AnimeSeries model from database
Returns:
Serie entity instance
"""
# Convert episode_dict from JSON (string keys) to int keys
episode_dict: dict[int, list[int]] = {}
if anime_series.episode_dict:
for season_str, episodes in anime_series.episode_dict.items():
try:
season = int(season_str)
episode_dict[season] = list(episodes)
except (ValueError, TypeError):
logger.warning(
"Invalid season key '%s' in episode_dict for %s",
season_str,
anime_series.key
)
return Serie(
key=anime_series.key,
name=anime_series.name,
site=anime_series.site,
folder=anime_series.folder,
episodeDict=episode_dict
)
@staticmethod
def _convert_to_db_dict(serie: Serie) -> dict:
"""
Convert a Serie entity to a dictionary for database creation.
Args:
serie: Serie entity instance
Returns:
Dictionary suitable for AnimeSeriesService.create()
"""
# Convert episode_dict keys to strings for JSON storage
episode_dict = None
if serie.episodeDict:
episode_dict = {
str(k): list(v) for k, v in serie.episodeDict.items()
}
return {
"key": serie.key,
"name": serie.name,
"site": serie.site,
"folder": serie.folder,
"episode_dict": episode_dict,
}
async def contains_in_db(self, key: str, db: "AsyncSession") -> bool:
"""
Check if a series with the given key exists in the database.
Args:
key: The unique provider identifier for the series
db: Database session for async operations
Returns:
True if the series exists in the database
"""
from src.server.database.service import AnimeSeriesService
existing = await AnimeSeriesService.get_by_key(db, key)
return existing is not None
def GetMissingEpisode(self) -> List[Serie]:
"""Return all series that still contain missing episodes."""
return [

View File

@ -2,6 +2,8 @@
import os
import tempfile
import warnings
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
@ -28,6 +30,25 @@ def sample_serie():
)
@pytest.fixture
def mock_db_session():
"""Create a mock async database session."""
session = AsyncMock()
return session
@pytest.fixture
def mock_anime_series():
"""Create a mock AnimeSeries database model."""
anime_series = MagicMock()
anime_series.key = "test-series"
anime_series.name = "Test Series"
anime_series.site = "https://aniworld.to/anime/stream/test-series"
anime_series.folder = "Test Series (2020)"
anime_series.episode_dict = {"1": [1, 2, 3], "2": [1, 2]}
return anime_series
class TestSerieListKeyBasedStorage:
"""Test SerieList uses key for internal storage."""
@ -40,7 +61,9 @@ class TestSerieListKeyBasedStorage:
def test_add_stores_by_key(self, temp_directory, sample_serie):
"""Test add() stores series by key."""
serie_list = SerieList(temp_directory)
serie_list.add(sample_serie)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
# Verify stored by key, not folder
assert sample_serie.key in serie_list.keyDict
@ -49,7 +72,9 @@ class TestSerieListKeyBasedStorage:
def test_contains_checks_by_key(self, temp_directory, sample_serie):
"""Test contains() checks by key."""
serie_list = SerieList(temp_directory)
serie_list.add(sample_serie)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
assert serie_list.contains(sample_serie.key)
assert not serie_list.contains("nonexistent-key")
@ -60,11 +85,13 @@ class TestSerieListKeyBasedStorage:
"""Test add() prevents duplicates based on key."""
serie_list = SerieList(temp_directory)
# Add same serie twice
serie_list.add(sample_serie)
initial_count = len(serie_list.keyDict)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
# Add same serie twice
serie_list.add(sample_serie)
initial_count = len(serie_list.keyDict)
serie_list.add(sample_serie)
serie_list.add(sample_serie)
# Should still have only one entry
assert len(serie_list.keyDict) == initial_count
@ -75,7 +102,9 @@ class TestSerieListKeyBasedStorage:
):
"""Test get_by_key() retrieves series correctly."""
serie_list = SerieList(temp_directory)
serie_list.add(sample_serie)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
result = serie_list.get_by_key(sample_serie.key)
assert result is not None
@ -94,9 +123,11 @@ class TestSerieListKeyBasedStorage:
):
"""Test get_by_folder() provides backward compatibility."""
serie_list = SerieList(temp_directory)
serie_list.add(sample_serie)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
result = serie_list.get_by_folder(sample_serie.folder)
result = serie_list.get_by_folder(sample_serie.folder)
assert result is not None
assert result.key == sample_serie.key
assert result.folder == sample_serie.folder
@ -105,13 +136,14 @@ class TestSerieListKeyBasedStorage:
"""Test get_by_folder() returns None for nonexistent folder."""
serie_list = SerieList(temp_directory)
result = serie_list.get_by_folder("Nonexistent Folder")
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
result = serie_list.get_by_folder("Nonexistent Folder")
assert result is None
def test_get_all_returns_all_series(self, temp_directory, sample_serie):
"""Test get_all() returns all series from keyDict."""
serie_list = SerieList(temp_directory)
serie_list.add(sample_serie)
serie2 = Serie(
key="naruto",
@ -120,7 +152,11 @@ class TestSerieListKeyBasedStorage:
folder="Naruto (2002)",
episodeDict={1: [1, 2]}
)
serie_list.add(serie2)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
serie_list.add(serie2)
all_series = serie_list.get_all()
assert len(all_series) == 2
@ -151,8 +187,10 @@ class TestSerieListKeyBasedStorage:
episodeDict={}
)
serie_list.add(serie_with_episodes)
serie_list.add(serie_without_episodes)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(serie_with_episodes)
serie_list.add(serie_without_episodes)
missing = serie_list.get_missing_episodes()
assert len(missing) == 1
@ -184,8 +222,10 @@ class TestSerieListPublicAPI:
"""Test that all public methods work correctly after refactoring."""
serie_list = SerieList(temp_directory)
# Test add
serie_list.add(sample_serie)
# Test add (suppress deprecation warning for test)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
# Test contains
assert serie_list.contains(sample_serie.key)
@ -200,4 +240,297 @@ class TestSerieListPublicAPI:
# Test new helper methods
assert serie_list.get_by_key(sample_serie.key) is not None
assert serie_list.get_by_folder(sample_serie.folder) is not None
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
assert serie_list.get_by_folder(sample_serie.folder) is not None
class TestSerieListDatabaseMode:
"""Test SerieList database-backed storage functionality."""
def test_init_with_db_session_skips_file_load(
self, temp_directory, mock_db_session
):
"""Test initialization with db_session skips file-based loading."""
# Create a data file that should NOT be loaded
folder_path = os.path.join(temp_directory, "Test Folder")
os.makedirs(folder_path, exist_ok=True)
data_path = os.path.join(folder_path, "data")
serie = Serie(
key="test-key",
name="Test",
site="https://test.com",
folder="Test Folder",
episodeDict={}
)
serie.save_to_file(data_path)
# Initialize with db_session - should skip file loading
serie_list = SerieList(
temp_directory,
db_session=mock_db_session
)
# Should have empty keyDict (file loading skipped)
assert len(serie_list.keyDict) == 0
def test_init_with_skip_load(self, temp_directory):
"""Test initialization with skip_load=True skips loading."""
serie_list = SerieList(temp_directory, skip_load=True)
assert len(serie_list.keyDict) == 0
def test_convert_from_db_basic(self, mock_anime_series):
"""Test _convert_from_db converts AnimeSeries to Serie correctly."""
serie = SerieList._convert_from_db(mock_anime_series)
assert serie.key == mock_anime_series.key
assert serie.name == mock_anime_series.name
assert serie.site == mock_anime_series.site
assert serie.folder == mock_anime_series.folder
# Season keys should be converted from string to int
assert 1 in serie.episodeDict
assert 2 in serie.episodeDict
assert serie.episodeDict[1] == [1, 2, 3]
assert serie.episodeDict[2] == [1, 2]
def test_convert_from_db_empty_episode_dict(self, mock_anime_series):
"""Test _convert_from_db handles empty episode_dict."""
mock_anime_series.episode_dict = None
serie = SerieList._convert_from_db(mock_anime_series)
assert serie.episodeDict == {}
def test_convert_from_db_handles_invalid_season_keys(
self, mock_anime_series
):
"""Test _convert_from_db handles invalid season keys gracefully."""
mock_anime_series.episode_dict = {
"1": [1, 2],
"invalid": [3, 4], # Invalid key - not an integer
"2": [5, 6]
}
serie = SerieList._convert_from_db(mock_anime_series)
# Valid keys should be converted
assert 1 in serie.episodeDict
assert 2 in serie.episodeDict
# Invalid key should be skipped
assert "invalid" not in serie.episodeDict
def test_convert_to_db_dict(self, sample_serie):
"""Test _convert_to_db_dict creates correct dictionary."""
result = SerieList._convert_to_db_dict(sample_serie)
assert result["key"] == sample_serie.key
assert result["name"] == sample_serie.name
assert result["site"] == sample_serie.site
assert result["folder"] == sample_serie.folder
# Keys should be converted to strings for JSON
assert "1" in result["episode_dict"]
assert result["episode_dict"]["1"] == [1, 2, 3]
def test_convert_to_db_dict_empty_episode_dict(self):
"""Test _convert_to_db_dict handles empty episode_dict."""
serie = Serie(
key="test",
name="Test",
site="https://test.com",
folder="Test",
episodeDict={}
)
result = SerieList._convert_to_db_dict(serie)
assert result["episode_dict"] is None
class TestSerieListDatabaseAsync:
"""Test async database methods of SerieList."""
@pytest.mark.asyncio
async def test_load_series_from_db(
self, temp_directory, mock_db_session, mock_anime_series
):
"""Test load_series_from_db loads from database."""
# Setup mock to return list of anime series
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_all = AsyncMock(return_value=[mock_anime_series])
serie_list = SerieList(temp_directory, skip_load=True)
count = await serie_list.load_series_from_db(mock_db_session)
assert count == 1
assert mock_anime_series.key in serie_list.keyDict
@pytest.mark.asyncio
async def test_load_series_from_db_clears_existing(
self, temp_directory, mock_db_session, mock_anime_series
):
"""Test load_series_from_db clears existing data."""
serie_list = SerieList(temp_directory, skip_load=True)
# Add an existing entry
serie_list.keyDict["old-key"] = MagicMock()
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_all = AsyncMock(return_value=[mock_anime_series])
await serie_list.load_series_from_db(mock_db_session)
# Old entry should be cleared
assert "old-key" not in serie_list.keyDict
assert mock_anime_series.key in serie_list.keyDict
@pytest.mark.asyncio
async def test_add_to_db_creates_new_series(
self, temp_directory, mock_db_session, sample_serie
):
"""Test add_to_db creates new series in database."""
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=None)
mock_created = MagicMock()
mock_created.id = 1
mock_service.create = AsyncMock(return_value=mock_created)
serie_list = SerieList(temp_directory, skip_load=True)
result = await serie_list.add_to_db(sample_serie, mock_db_session)
assert result is mock_created
mock_service.create.assert_called_once()
# Should also add to in-memory collection
assert sample_serie.key in serie_list.keyDict
@pytest.mark.asyncio
async def test_add_to_db_skips_existing(
self, temp_directory, mock_db_session, sample_serie
):
"""Test add_to_db skips if series already exists."""
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
existing = MagicMock()
mock_service.get_by_key = AsyncMock(return_value=existing)
serie_list = SerieList(temp_directory, skip_load=True)
result = await serie_list.add_to_db(sample_serie, mock_db_session)
assert result is None
mock_service.create.assert_not_called()
@pytest.mark.asyncio
async def test_contains_in_db_returns_true_when_exists(
self, temp_directory, mock_db_session
):
"""Test contains_in_db returns True when series exists."""
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=MagicMock())
serie_list = SerieList(temp_directory, skip_load=True)
result = await serie_list.contains_in_db(
"test-key", mock_db_session
)
assert result is True
@pytest.mark.asyncio
async def test_contains_in_db_returns_false_when_not_exists(
self, temp_directory, mock_db_session
):
"""Test contains_in_db returns False when series doesn't exist."""
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=None)
serie_list = SerieList(temp_directory, skip_load=True)
result = await serie_list.contains_in_db(
"nonexistent", mock_db_session
)
assert result is False
class TestSerieListDeprecationWarnings:
"""Test deprecation warnings are raised for file-based methods."""
def test_add_raises_deprecation_warning(
self, temp_directory, sample_serie
):
"""Test add() raises deprecation warning."""
serie_list = SerieList(temp_directory, skip_load=True)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
serie_list.add(sample_serie)
# Check deprecation warning was raised
assert len(w) == 1
assert issubclass(w[0].category, DeprecationWarning)
assert "add_to_db()" in str(w[0].message)
def test_get_by_folder_raises_deprecation_warning(
self, temp_directory, sample_serie
):
"""Test get_by_folder() raises deprecation warning."""
serie_list = SerieList(temp_directory, skip_load=True)
serie_list.keyDict[sample_serie.key] = sample_serie
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
serie_list.get_by_folder(sample_serie.folder)
# Check deprecation warning was raised
assert len(w) == 1
assert issubclass(w[0].category, DeprecationWarning)
assert "get_by_key()" in str(w[0].message)
class TestSerieListBackwardCompatibility:
"""Test backward compatibility of file-based operations."""
def test_file_based_mode_still_works(
self, temp_directory, sample_serie
):
"""Test file-based mode still works without db_session."""
serie_list = SerieList(temp_directory)
# Add should still work (with deprecation warning)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
serie_list.add(sample_serie)
# File should be created
data_path = os.path.join(
temp_directory, sample_serie.folder, "data"
)
assert os.path.isfile(data_path)
# Series should be in memory
assert serie_list.contains(sample_serie.key)
def test_load_from_file_still_works(
self, temp_directory, sample_serie
):
"""Test loading from files still works."""
# Create directory and save file
folder_path = os.path.join(temp_directory, sample_serie.folder)
os.makedirs(folder_path, exist_ok=True)
data_path = os.path.join(folder_path, "data")
sample_serie.save_to_file(data_path)
# New SerieList should load it
serie_list = SerieList(temp_directory)
assert serie_list.contains(sample_serie.key)
loaded = serie_list.get_by_key(sample_serie.key)
assert loaded.name == sample_serie.name