Add database loading methods to SerieList

- Add load_all_from_db() for bulk loading series from DB
- Add _load_single_series_from_db() for loading single series by folder
- Add invalidate_cache() to clear in-memory cache
- Add tests for all new methods

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-26 18:26:25 +02:00
parent 102d83e947
commit 53d6da5dac
2 changed files with 818 additions and 392 deletions

View File

@@ -1,392 +1,527 @@
"""Utilities for loading and managing stored anime series metadata.
This module provides the SerieList class for managing collections of anime
series metadata. It uses file-based storage as fallback when database
is not available.
Note:
This module is part of the core domain layer. Database operations
are handled by the service layer via add_to_db().
"""
from __future__ import annotations
import asyncio
import logging
import os
import warnings
from json import JSONDecodeError
from typing import Dict, Iterable, List, Optional
from src.core.entities.series import Serie
logger = logging.getLogger(__name__)
class SerieList:
"""
Represents the collection of cached series stored on disk.
Series are identified by their unique 'key' (provider identifier).
The 'folder' is metadata only and not used for lookups.
This class manages in-memory series data loaded from filesystem.
It has no database dependencies - all persistence is handled by
the service layer.
Example:
# File-based mode
serie_list = SerieList("/path/to/anime")
series = serie_list.get_all()
Attributes:
directory: Path to the anime directory
keyDict: Internal dictionary mapping serie.key to Serie objects
"""
def __init__(
self,
base_path: str,
skip_load: bool = False
) -> None:
"""Initialize the SerieList.
Args:
base_path: Path to the anime directory
skip_load: If True, skip automatic loading of series from files.
Useful when planning to load from database instead.
"""
self.directory: str = base_path
# Internal storage using serie.key as the dictionary key
self.keyDict: Dict[str, Serie] = {}
# Only auto-load from files if not skipping
if not skip_load:
self.load_series()
def add(self, serie: Serie, use_sanitized_folder: bool = True) -> str:
"""
Persist a new series if it is not already present (file-based mode).
Uses serie.key for identification. Creates the filesystem folder
using either the sanitized display name (default) or the existing
folder property.
Args:
serie: The Serie instance to add
use_sanitized_folder: If True (default), use serie.sanitized_folder
for the filesystem folder name based on display name.
If False, use serie.folder as-is for backward compatibility.
Returns:
str: The folder path that was created/used
Note:
This method creates data files on disk. For database storage,
use add_to_db() instead.
"""
if self.contains(serie.key):
# Return existing folder path
existing = self.keyDict[serie.key]
return os.path.join(self.directory, existing.folder)
# Determine folder name to use
if use_sanitized_folder:
folder_name = serie.sanitized_folder
# Update the serie's folder property to match what we create
serie.folder = folder_name
else:
folder_name = serie.folder
data_path = os.path.join(self.directory, folder_name, "data")
anime_path = os.path.join(self.directory, folder_name)
os.makedirs(anime_path, exist_ok=True)
if not os.path.isfile(data_path):
serie.save_to_file(data_path)
# Store by key, not folder
self.keyDict[serie.key] = serie
return anime_path
async def add_to_db(self, serie: Serie) -> bool:
"""Persist a new series to the database.
Creates the filesystem folder using serie.folder, then persists
the series metadata to the database.
Args:
serie: The Serie instance to add
Returns:
True if successful, False otherwise
"""
try:
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService, EpisodeService
folder_name = serie.folder
anime_path = os.path.join(self.directory, folder_name)
os.makedirs(anime_path, exist_ok=True)
session_factory = get_async_session_factory()
db = session_factory()
try:
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
logger.debug(
"Series '%s' (key=%s) already exists in DB, skipping",
serie.name, serie.key
)
return True
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=folder_name,
year=serie.year
)
for season, eps in serie.episodeDict.items():
for ep in eps:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=season,
episode_number=ep
)
await db.commit()
self.keyDict[serie.key] = serie
logger.info(
"Persisted series '%s' to database",
serie.name
)
return True
except Exception as e:
await db.rollback()
logger.error(
"Failed to persist series '%s' to DB: %s",
serie.key, e, exc_info=True
)
return False
finally:
await db.close()
except Exception as e:
logger.error(
"Could not add series '%s' to DB (DB unavailable?): %s",
serie.key, e
)
return False
def contains(self, key: str) -> bool:
"""
Return True when a series identified by ``key`` already exists.
Args:
key: The unique provider identifier for the series
Returns:
True if the series exists in the collection
"""
return key in self.keyDict
def load_series(self) -> None:
"""Populate the in-memory map with metadata discovered on disk."""
logger.info("Scanning anime folders in %s", self.directory)
try:
entries: Iterable[str] = os.listdir(self.directory)
except OSError as error:
logger.error(
"Unable to scan directory %s: %s",
self.directory,
error,
)
return
nfo_stats = {"total": 0, "with_nfo": 0, "without_nfo": 0}
media_stats = {
"with_poster": 0,
"without_poster": 0,
"with_logo": 0,
"without_logo": 0,
"with_fanart": 0,
"without_fanart": 0
}
for anime_folder in entries:
anime_path = os.path.join(self.directory, anime_folder, "data")
if os.path.isfile(anime_path):
logger.debug("Found data file for folder %s", anime_folder)
serie = self._load_data(anime_folder, anime_path)
if serie:
nfo_stats["total"] += 1
# Check for NFO file
nfo_file_path = os.path.join(
self.directory, anime_folder, "tvshow.nfo"
)
if os.path.isfile(nfo_file_path):
serie.nfo_path = nfo_file_path
nfo_stats["with_nfo"] += 1
else:
nfo_stats["without_nfo"] += 1
logger.debug(
"Series '%s' (key: %s) is missing tvshow.nfo",
serie.name,
serie.key
)
# Check for media files
folder_path = os.path.join(self.directory, anime_folder)
poster_path = os.path.join(folder_path, "poster.jpg")
if os.path.isfile(poster_path):
media_stats["with_poster"] += 1
else:
media_stats["without_poster"] += 1
logger.debug(
"Series '%s' (key: %s) is missing poster.jpg",
serie.name,
serie.key
)
logo_path = os.path.join(folder_path, "logo.png")
if os.path.isfile(logo_path):
media_stats["with_logo"] += 1
else:
media_stats["without_logo"] += 1
logger.debug(
"Series '%s' (key: %s) is missing logo.png",
serie.name,
serie.key
)
fanart_path = os.path.join(folder_path, "fanart.jpg")
if os.path.isfile(fanart_path):
media_stats["with_fanart"] += 1
else:
media_stats["without_fanart"] += 1
logger.debug(
"Series '%s' (key: %s) is missing fanart.jpg",
serie.name,
serie.key
)
continue
logger.warning(
"Skipping folder %s because no metadata file was found",
anime_folder,
)
# Log summary statistics
if nfo_stats["total"] > 0:
logger.info(
"NFO scan complete: %d series total, %d with NFO, %d without NFO",
nfo_stats["total"],
nfo_stats["with_nfo"],
nfo_stats["without_nfo"]
)
logger.info(
"Media scan complete: Poster (%d/%d), Logo (%d/%d), Fanart (%d/%d)",
media_stats["with_poster"],
nfo_stats["total"],
media_stats["with_logo"],
nfo_stats["total"],
media_stats["with_fanart"],
nfo_stats["total"]
)
def _load_data(self, anime_folder: str, data_path: str) -> Optional[Serie]:
"""
Load a single series metadata file into the in-memory collection.
Args:
anime_folder: The folder name (for logging only)
data_path: Path to the metadata file
Returns:
Serie: The loaded Serie object, or None if loading failed
"""
try:
serie = Serie.load_from_file(data_path)
# Store by key, not folder
self.keyDict[serie.key] = serie
logger.debug(
"Successfully loaded metadata for %s (key: %s)",
anime_folder,
serie.key
)
return serie
except (OSError, JSONDecodeError, KeyError, ValueError) as error:
logger.error(
"Failed to load metadata for folder %s from %s: %s",
anime_folder,
data_path,
error,
)
return None
def GetMissingEpisode(self) -> List[Serie]:
"""Return all series that still contain missing episodes."""
return [
serie
for serie in self.keyDict.values()
if serie.episodeDict
]
def get_missing_episodes(self) -> List[Serie]:
"""PEP8-friendly alias for :meth:`GetMissingEpisode`."""
return self.GetMissingEpisode()
def GetList(self) -> List[Serie]:
"""Return all series instances stored in the list."""
return list(self.keyDict.values())
def get_all(self) -> List[Serie]:
"""PEP8-friendly alias for :meth:`GetList`."""
return self.GetList()
def get_by_key(self, key: str) -> Optional[Serie]:
"""
Get a series by its unique provider key.
This is the primary method for series lookup.
Args:
key: The unique provider identifier (e.g., "attack-on-titan")
Returns:
The Serie instance if found, None otherwise
"""
return self.keyDict.get(key)
def get_by_folder(self, folder: str) -> Optional[Serie]:
"""
Get a series by its folder name.
.. deprecated:: 2.0.0
Use :meth:`get_by_key` instead. Folder-based lookups will be
removed in version 3.0.0. The `folder` field is metadata only
and should not be used for identification.
This method is provided for backward compatibility only.
Prefer using get_by_key() for new code.
Args:
folder: The filesystem folder name (e.g., "Attack on Titan (2013)")
Returns:
The Serie instance if found, None otherwise
"""
warnings.warn(
"get_by_folder() is deprecated and will be removed in v3.0.0. "
"Use get_by_key() instead. The 'folder' field is metadata only.",
DeprecationWarning,
stacklevel=2
)
for serie in self.keyDict.values():
if serie.folder == folder:
return serie
return None
"""Utilities for loading and managing stored anime series metadata.
This module provides the SerieList class for managing collections of anime
series metadata. It supports loading from both filesystem (legacy) and
database (primary).
Note:
This module is part of the core domain layer. Database operations
are handled by the service layer via add_to_db().
"""
from __future__ import annotations
import logging
import os
import warnings
from json import JSONDecodeError
from typing import Dict, Iterable, List, Optional
from src.core.entities.series import Serie
logger = logging.getLogger(__name__)
class SerieList:
"""
Represents the collection of cached series stored on disk.
Series are identified by their unique 'key' (provider identifier).
The 'folder' is metadata only and not used for lookups.
This class manages in-memory series data loaded from filesystem.
It has no database dependencies - all persistence is handled by
the service layer.
Example:
# File-based mode
serie_list = SerieList("/path/to/anime")
series = serie_list.get_all()
Attributes:
directory: Path to the anime directory
keyDict: Internal dictionary mapping serie.key to Serie objects
"""
def __init__(
self,
base_path: str,
skip_load: bool = False
) -> None:
"""Initialize the SerieList.
Args:
base_path: Path to the anime directory
skip_load: If True, skip automatic loading of series from files.
Useful when planning to load from database instead.
"""
self.directory: str = base_path
# Internal storage using serie.key as the dictionary key
self.keyDict: Dict[str, Serie] = {}
# Only auto-load from files if not skipping
if not skip_load:
self.load_series()
def add(self, serie: Serie, use_sanitized_folder: bool = True) -> str:
"""
Persist a new series if it is not already present (file-based mode).
Uses serie.key for identification. Creates the filesystem folder
using either the sanitized display name (default) or the existing
folder property.
Args:
serie: The Serie instance to add
use_sanitized_folder: If True (default), use serie.sanitized_folder
for the filesystem folder name based on display name.
If False, use serie.folder as-is for backward compatibility.
Returns:
str: The folder path that was created/used
Note:
This method creates data files on disk. For database storage,
use add_to_db() instead.
"""
if self.contains(serie.key):
# Return existing folder path
existing = self.keyDict[serie.key]
return os.path.join(self.directory, existing.folder)
# Determine folder name to use
if use_sanitized_folder:
folder_name = serie.sanitized_folder
# Update the serie's folder property to match what we create
serie.folder = folder_name
else:
folder_name = serie.folder
data_path = os.path.join(self.directory, folder_name, "data")
anime_path = os.path.join(self.directory, folder_name)
os.makedirs(anime_path, exist_ok=True)
if not os.path.isfile(data_path):
serie.save_to_file(data_path)
# Store by key, not folder
self.keyDict[serie.key] = serie
return anime_path
async def add_to_db(self, serie: Serie) -> bool:
"""Persist a new series to the database.
Creates the filesystem folder using serie.folder, then persists
the series metadata to the database.
Args:
serie: The Serie instance to add
Returns:
True if successful, False otherwise
"""
try:
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService, EpisodeService
folder_name = serie.folder
anime_path = os.path.join(self.directory, folder_name)
os.makedirs(anime_path, exist_ok=True)
session_factory = get_async_session_factory()
db = session_factory()
try:
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
logger.debug(
"Series '%s' (key=%s) already exists in DB, skipping",
serie.name, serie.key
)
return True
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=folder_name,
year=serie.year
)
for season, eps in serie.episodeDict.items():
for ep in eps:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=season,
episode_number=ep
)
await db.commit()
self.keyDict[serie.key] = serie
logger.info(
"Persisted series '%s' to database",
serie.name
)
return True
except Exception as e:
await db.rollback()
logger.error(
"Failed to persist series '%s' to DB: %s",
serie.key, e, exc_info=True
)
return False
finally:
await db.close()
except Exception as e:
logger.error(
"Could not add series '%s' to DB (DB unavailable?): %s",
serie.key, e
)
return False
def contains(self, key: str) -> bool:
"""
Return True when a series identified by ``key`` already exists.
Args:
key: The unique provider identifier for the series
Returns:
True if the series exists in the collection
"""
return key in self.keyDict
def load_series(self) -> None:
"""Populate the in-memory map with metadata discovered on disk."""
logger.info("Scanning anime folders in %s", self.directory)
try:
entries: Iterable[str] = os.listdir(self.directory)
except OSError as error:
logger.error(
"Unable to scan directory %s: %s",
self.directory,
error,
)
return
nfo_stats = {"total": 0, "with_nfo": 0, "without_nfo": 0}
media_stats = {
"with_poster": 0,
"without_poster": 0,
"with_logo": 0,
"without_logo": 0,
"with_fanart": 0,
"without_fanart": 0
}
for anime_folder in entries:
anime_path = os.path.join(self.directory, anime_folder, "data")
if os.path.isfile(anime_path):
logger.debug("Found data file for folder %s", anime_folder)
serie = self._load_data(anime_folder, anime_path)
if serie:
nfo_stats["total"] += 1
# Check for NFO file
nfo_file_path = os.path.join(
self.directory, anime_folder, "tvshow.nfo"
)
if os.path.isfile(nfo_file_path):
serie.nfo_path = nfo_file_path
nfo_stats["with_nfo"] += 1
else:
nfo_stats["without_nfo"] += 1
logger.debug(
"Series '%s' (key: %s) is missing tvshow.nfo",
serie.name,
serie.key
)
# Check for media files
folder_path = os.path.join(self.directory, anime_folder)
poster_path = os.path.join(folder_path, "poster.jpg")
if os.path.isfile(poster_path):
media_stats["with_poster"] += 1
else:
media_stats["without_poster"] += 1
logger.debug(
"Series '%s' (key: %s) is missing poster.jpg",
serie.name,
serie.key
)
logo_path = os.path.join(folder_path, "logo.png")
if os.path.isfile(logo_path):
media_stats["with_logo"] += 1
else:
media_stats["without_logo"] += 1
logger.debug(
"Series '%s' (key: %s) is missing logo.png",
serie.name,
serie.key
)
fanart_path = os.path.join(folder_path, "fanart.jpg")
if os.path.isfile(fanart_path):
media_stats["with_fanart"] += 1
else:
media_stats["without_fanart"] += 1
logger.debug(
"Series '%s' (key: %s) is missing fanart.jpg",
serie.name,
serie.key
)
continue
logger.warning(
"Skipping folder %s because no metadata file was found",
anime_folder,
)
# Log summary statistics
if nfo_stats["total"] > 0:
logger.info(
"NFO scan complete: %d series total, %d with NFO, %d without NFO",
nfo_stats["total"],
nfo_stats["with_nfo"],
nfo_stats["without_nfo"]
)
logger.info(
"Media scan complete: Poster (%d/%d), Logo (%d/%d), Fanart (%d/%d)",
media_stats["with_poster"],
nfo_stats["total"],
media_stats["with_logo"],
nfo_stats["total"],
media_stats["with_fanart"],
nfo_stats["total"]
)
def _load_data(self, anime_folder: str, data_path: str) -> Optional[Serie]:
"""
Load a single series metadata file into the in-memory collection.
Args:
anime_folder: The folder name (for logging only)
data_path: Path to the metadata file
Returns:
Serie: The loaded Serie object, or None if loading failed
"""
try:
serie = Serie.load_from_file(data_path)
# Store by key, not folder
self.keyDict[serie.key] = serie
logger.debug(
"Successfully loaded metadata for %s (key: %s)",
anime_folder,
serie.key
)
return serie
except (OSError, JSONDecodeError, KeyError, ValueError) as error:
logger.error(
"Failed to load metadata for folder %s from %s: %s",
anime_folder,
data_path,
error,
)
return None
def GetMissingEpisode(self) -> List[Serie]:
"""Return all series that still contain missing episodes."""
return [
serie
for serie in self.keyDict.values()
if serie.episodeDict
]
def get_missing_episodes(self) -> List[Serie]:
"""PEP8-friendly alias for :meth:`GetMissingEpisode`."""
return self.GetMissingEpisode()
def GetList(self) -> List[Serie]:
"""Return all series instances stored in the list."""
return list(self.keyDict.values())
def get_all(self) -> List[Serie]:
"""PEP8-friendly alias for :meth:`GetList`."""
return self.GetList()
def get_by_key(self, key: str) -> Optional[Serie]:
"""
Get a series by its unique provider key.
This is the primary method for series lookup.
Args:
key: The unique provider identifier (e.g., "attack-on-titan")
Returns:
The Serie instance if found, None otherwise
"""
return self.keyDict.get(key)
def get_by_folder(self, folder: str) -> Optional[Serie]:
"""
Get a series by its folder name.
.. deprecated:: 2.0.0
Use :meth:`get_by_key` instead. Folder-based lookups will be
removed in version 3.0.0. The `folder` field is metadata only
and should not be used for identification.
This method is provided for backward compatibility only.
Prefer using get_by_key() for new code.
Args:
folder: The filesystem folder name (e.g., "Attack on Titan (2013)")
Returns:
The Serie instance if found, None otherwise
"""
warnings.warn(
"get_by_folder() is deprecated and will be removed in v3.0.0. "
"Use get_by_key() instead. The 'folder' field is metadata only.",
DeprecationWarning,
stacklevel=2
)
for serie in self.keyDict.values():
if serie.folder == folder:
return serie
return None
async def load_all_from_db(self) -> int:
"""Load all series from database into in-memory cache.
Retrieves all anime series from the database with their episodes
and populates the in-memory keyDict for fast access.
This method replaces file-based loading. Use after initialization
when database is ready.
Returns:
int: Number of series loaded into cache
"""
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService
try:
session_factory = get_async_session_factory()
db = session_factory()
try:
anime_series_list = await AnimeSeriesService.get_all(
db, with_episodes=True
)
count = 0
for anime_series in anime_series_list:
episode_dict: Dict[int, List[int]] = {}
if anime_series.episodes:
for ep in anime_series.episodes:
if ep.season not in episode_dict:
episode_dict[ep.season] = []
episode_dict[ep.season].append(ep.episode_number)
serie = Serie(
key=anime_series.key,
name=anime_series.name,
site=anime_series.site,
folder=anime_series.folder,
episodeDict=episode_dict,
year=anime_series.year
)
self.keyDict[serie.key] = serie
count += 1
logger.info(
"Loaded %d series from database into in-memory cache",
count
)
return count
finally:
await db.close()
except RuntimeError:
logger.warning(
"Database not available, skipping DB load"
)
return 0
async def _load_single_series_from_db(
self,
anime_folder: str
) -> Optional[Serie]:
"""Load a single series from database by folder name.
Looks up a series in the database by its folder name and adds
it to the in-memory cache.
Args:
anime_folder: The filesystem folder name to look up
Returns:
Serie if found and loaded, None otherwise
"""
from src.server.database.connection import get_async_session_factory
from src.server.database.service import AnimeSeriesService
try:
session_factory = get_async_session_factory()
db = session_factory()
try:
anime_series = await AnimeSeriesService.get_by_folder(
db, anime_folder
)
if not anime_series:
logger.debug(
"Series with folder '%s' not found in DB",
anime_folder
)
return None
episode_dict: Dict[int, List[int]] = {}
if anime_series.episodes:
for ep in anime_series.episodes:
if ep.season not in episode_dict:
episode_dict[ep.season] = []
episode_dict[ep.season].append(ep.episode_number)
serie = Serie(
key=anime_series.key,
name=anime_series.name,
site=anime_series.site,
folder=anime_series.folder,
episodeDict=episode_dict,
year=anime_series.year
)
self.keyDict[serie.key] = serie
logger.debug(
"Loaded series '%s' (key=%s) from DB",
serie.name, serie.key
)
return serie
finally:
await db.close()
except RuntimeError:
logger.warning(
"Database not available, cannot load series '%s'",
anime_folder
)
return None
def invalidate_cache(self) -> None:
"""Clear the in-memory cache.
Use after database modifications to force reload from DB
on next access.
"""
self.keyDict.clear()
logger.debug("SerieList in-memory cache invalidated")
def reload(self) -> None:
"""Reload series from filesystem (legacy mode).
Warning:
This method uses file-based loading and should only be
used as fallback when database is not available.
"""
self.load_series()