Task 5: Update SerieScanner to use database storage
- Add db_session parameter to SerieScanner.__init__ - Add async scan_async() method for database-backed scanning - Add _save_serie_to_db() helper for creating/updating series - Add _update_serie_in_db() helper for updating existing series - Add deprecation warning to file-based scan() method - Maintain backward compatibility for CLI usage - Add comprehensive unit tests (15 tests, all passing) - Update instructions.md to mark Task 5 complete
This commit is contained in:
@@ -3,14 +3,23 @@ SerieScanner - Scans directories for anime series and missing episodes.
|
||||
|
||||
This module provides functionality to scan anime directories, identify
|
||||
missing episodes, and report progress through callback interfaces.
|
||||
|
||||
The scanner supports two modes of operation:
|
||||
1. File-based mode (legacy): Saves scan results to data files
|
||||
2. Database mode (preferred): Saves scan results to SQLite database
|
||||
|
||||
Database mode is preferred for new code. File-based mode is kept for
|
||||
backward compatibility with CLI usage.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import traceback
|
||||
import uuid
|
||||
from typing import Callable, Iterable, Iterator, Optional
|
||||
import warnings
|
||||
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Optional
|
||||
|
||||
from src.core.entities.series import Serie
|
||||
from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundException
|
||||
@@ -24,6 +33,10 @@ from src.core.interfaces.callbacks import (
|
||||
)
|
||||
from src.core.providers.base_provider import Loader
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from src.server.database.models import AnimeSeries
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
error_logger = logging.getLogger("error")
|
||||
no_key_found_logger = logging.getLogger("series.nokey")
|
||||
@@ -34,13 +47,28 @@ class SerieScanner:
|
||||
Scans directories for anime series and identifies missing episodes.
|
||||
|
||||
Supports progress callbacks for real-time scanning updates.
|
||||
|
||||
The scanner supports two modes:
|
||||
1. File-based (legacy): Set db_session=None, saves to data files
|
||||
2. Database mode: Provide db_session, saves to SQLite database
|
||||
|
||||
Example:
|
||||
# File-based mode (legacy)
|
||||
scanner = SerieScanner("/path/to/anime", loader)
|
||||
scanner.scan()
|
||||
|
||||
# Database mode (preferred)
|
||||
async with get_db_session() as db:
|
||||
scanner = SerieScanner("/path/to/anime", loader, db_session=db)
|
||||
await scanner.scan_async()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
basePath: str,
|
||||
loader: Loader,
|
||||
callback_manager: Optional[CallbackManager] = None
|
||||
callback_manager: Optional[CallbackManager] = None,
|
||||
db_session: Optional["AsyncSession"] = None
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the SerieScanner.
|
||||
@@ -49,6 +77,8 @@ class SerieScanner:
|
||||
basePath: Base directory containing anime series
|
||||
loader: Loader instance for fetching series information
|
||||
callback_manager: Optional callback manager for progress updates
|
||||
db_session: Optional database session for database mode.
|
||||
If provided, scan_async() should be used instead of scan().
|
||||
|
||||
Raises:
|
||||
ValueError: If basePath is invalid or doesn't exist
|
||||
@@ -71,6 +101,7 @@ class SerieScanner:
|
||||
callback_manager or CallbackManager()
|
||||
)
|
||||
self._current_operation_id: Optional[str] = None
|
||||
self._db_session: Optional["AsyncSession"] = db_session
|
||||
|
||||
logger.info("Initialized SerieScanner with base path: %s", abs_path)
|
||||
|
||||
@@ -97,7 +128,14 @@ class SerieScanner:
|
||||
callback: Optional[Callable[[str, int], None]] = None
|
||||
) -> None:
|
||||
"""
|
||||
Scan directories for anime series and missing episodes.
|
||||
Scan directories for anime series and missing episodes (file-based).
|
||||
|
||||
This method saves results to data files. For database storage,
|
||||
use scan_async() instead.
|
||||
|
||||
.. deprecated:: 2.0.0
|
||||
Use :meth:`scan_async` for database-backed storage.
|
||||
File-based storage will be removed in a future version.
|
||||
|
||||
Args:
|
||||
callback: Optional legacy callback function (folder, count)
|
||||
@@ -105,6 +143,12 @@ class SerieScanner:
|
||||
Raises:
|
||||
Exception: If scan fails critically
|
||||
"""
|
||||
warnings.warn(
|
||||
"File-based scan() is deprecated. Use scan_async() for "
|
||||
"database storage.",
|
||||
DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
# Generate unique operation ID
|
||||
self._current_operation_id = str(uuid.uuid4())
|
||||
|
||||
@@ -291,6 +335,304 @@ class SerieScanner:
|
||||
|
||||
raise
|
||||
|
||||
async def scan_async(
|
||||
self,
|
||||
db: "AsyncSession",
|
||||
callback: Optional[Callable[[str, int], None]] = None
|
||||
) -> None:
|
||||
"""
|
||||
Scan directories for anime series and save to database.
|
||||
|
||||
This is the preferred method for scanning when using database
|
||||
storage. Results are saved to the database instead of files.
|
||||
|
||||
Args:
|
||||
db: Database session for async operations
|
||||
callback: Optional legacy callback function (folder, count)
|
||||
|
||||
Raises:
|
||||
Exception: If scan fails critically
|
||||
|
||||
Example:
|
||||
async with get_db_session() as db:
|
||||
scanner = SerieScanner("/path/to/anime", loader)
|
||||
await scanner.scan_async(db)
|
||||
"""
|
||||
# Generate unique operation ID
|
||||
self._current_operation_id = str(uuid.uuid4())
|
||||
|
||||
logger.info("Starting async scan for missing episodes (database mode)")
|
||||
|
||||
# Notify scan starting
|
||||
self._callback_manager.notify_progress(
|
||||
ProgressContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
phase=ProgressPhase.STARTING,
|
||||
current=0,
|
||||
total=0,
|
||||
percentage=0.0,
|
||||
message="Initializing scan (database mode)"
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
# Get total items to process
|
||||
total_to_scan = self.get_total_to_scan()
|
||||
logger.info("Total folders to scan: %d", total_to_scan)
|
||||
|
||||
result = self.__find_mp4_files()
|
||||
counter = 0
|
||||
saved_to_db = 0
|
||||
|
||||
for folder, mp4_files in result:
|
||||
try:
|
||||
counter += 1
|
||||
|
||||
# Calculate progress
|
||||
if total_to_scan > 0:
|
||||
percentage = (counter / total_to_scan) * 100
|
||||
else:
|
||||
percentage = 0.0
|
||||
|
||||
# Notify progress
|
||||
self._callback_manager.notify_progress(
|
||||
ProgressContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
phase=ProgressPhase.IN_PROGRESS,
|
||||
current=counter,
|
||||
total=total_to_scan,
|
||||
percentage=percentage,
|
||||
message=f"Scanning: {folder}",
|
||||
details=f"Found {len(mp4_files)} episodes"
|
||||
)
|
||||
)
|
||||
|
||||
# Call legacy callback if provided
|
||||
if callback:
|
||||
callback(folder, counter)
|
||||
|
||||
serie = self.__read_data_from_file(folder)
|
||||
if (
|
||||
serie is not None
|
||||
and serie.key
|
||||
and serie.key.strip()
|
||||
):
|
||||
# Get missing episodes from provider
|
||||
missing_episodes, _site = (
|
||||
self.__get_missing_episodes_and_season(
|
||||
serie.key, mp4_files
|
||||
)
|
||||
)
|
||||
serie.episodeDict = missing_episodes
|
||||
serie.folder = folder
|
||||
|
||||
# Save to database instead of file
|
||||
await self._save_serie_to_db(serie, db)
|
||||
saved_to_db += 1
|
||||
|
||||
# Store by key in memory cache
|
||||
if serie.key in self.keyDict:
|
||||
logger.error(
|
||||
"Duplicate series found with key '%s' "
|
||||
"(folder: '%s')",
|
||||
serie.key,
|
||||
folder
|
||||
)
|
||||
else:
|
||||
self.keyDict[serie.key] = serie
|
||||
logger.debug(
|
||||
"Stored series with key '%s' (folder: '%s')",
|
||||
serie.key,
|
||||
folder
|
||||
)
|
||||
|
||||
except NoKeyFoundException as nkfe:
|
||||
error_msg = f"Error processing folder '{folder}': {nkfe}"
|
||||
logger.error(error_msg)
|
||||
self._callback_manager.notify_error(
|
||||
ErrorContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
error=nkfe,
|
||||
message=error_msg,
|
||||
recoverable=True,
|
||||
metadata={"folder": folder, "key": None}
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
error_msg = (
|
||||
f"Folder: '{folder}' - Unexpected error: {e}"
|
||||
)
|
||||
error_logger.error(
|
||||
"%s\n%s",
|
||||
error_msg,
|
||||
traceback.format_exc()
|
||||
)
|
||||
self._callback_manager.notify_error(
|
||||
ErrorContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
error=e,
|
||||
message=error_msg,
|
||||
recoverable=True,
|
||||
metadata={"folder": folder, "key": None}
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
# Notify scan completion
|
||||
self._callback_manager.notify_completion(
|
||||
CompletionContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
success=True,
|
||||
message=f"Scan completed. Processed {counter} folders.",
|
||||
statistics={
|
||||
"total_folders": counter,
|
||||
"series_found": len(self.keyDict),
|
||||
"saved_to_db": saved_to_db
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Async scan completed. Processed %d folders, "
|
||||
"found %d series, saved %d to database",
|
||||
counter,
|
||||
len(self.keyDict),
|
||||
saved_to_db
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Critical async scan error: {e}"
|
||||
logger.error("%s\n%s", error_msg, traceback.format_exc())
|
||||
|
||||
self._callback_manager.notify_error(
|
||||
ErrorContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
error=e,
|
||||
message=error_msg,
|
||||
recoverable=False
|
||||
)
|
||||
)
|
||||
|
||||
self._callback_manager.notify_completion(
|
||||
CompletionContext(
|
||||
operation_type=OperationType.SCAN,
|
||||
operation_id=self._current_operation_id,
|
||||
success=False,
|
||||
message=error_msg
|
||||
)
|
||||
)
|
||||
|
||||
raise
|
||||
|
||||
async def _save_serie_to_db(
|
||||
self,
|
||||
serie: Serie,
|
||||
db: "AsyncSession"
|
||||
) -> Optional["AnimeSeries"]:
|
||||
"""
|
||||
Save or update a series in the database.
|
||||
|
||||
Creates a new record if the series doesn't exist, or updates
|
||||
the episode_dict if it has changed.
|
||||
|
||||
Args:
|
||||
serie: Serie instance to save
|
||||
db: Database session for async operations
|
||||
|
||||
Returns:
|
||||
Created or updated AnimeSeries instance, or None if unchanged
|
||||
"""
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
# Check if series already exists
|
||||
existing = await AnimeSeriesService.get_by_key(db, serie.key)
|
||||
|
||||
if existing:
|
||||
# Update episode_dict if changed
|
||||
if existing.episode_dict != serie.episodeDict:
|
||||
updated = await AnimeSeriesService.update(
|
||||
db,
|
||||
existing.id,
|
||||
episode_dict=serie.episodeDict,
|
||||
folder=serie.folder
|
||||
)
|
||||
logger.info(
|
||||
"Updated series in database: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
return updated
|
||||
else:
|
||||
logger.debug(
|
||||
"Series unchanged in database: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
return None
|
||||
else:
|
||||
# Create new series
|
||||
anime_series = await AnimeSeriesService.create(
|
||||
db=db,
|
||||
key=serie.key,
|
||||
name=serie.name,
|
||||
site=serie.site,
|
||||
folder=serie.folder,
|
||||
episode_dict=serie.episodeDict,
|
||||
)
|
||||
logger.info(
|
||||
"Created series in database: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
return anime_series
|
||||
|
||||
async def _update_serie_in_db(
|
||||
self,
|
||||
serie: Serie,
|
||||
db: "AsyncSession"
|
||||
) -> Optional["AnimeSeries"]:
|
||||
"""
|
||||
Update an existing series in the database.
|
||||
|
||||
Args:
|
||||
serie: Serie instance to update
|
||||
db: Database session for async operations
|
||||
|
||||
Returns:
|
||||
Updated AnimeSeries instance, or None if not found
|
||||
"""
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
existing = await AnimeSeriesService.get_by_key(db, serie.key)
|
||||
if not existing:
|
||||
logger.warning(
|
||||
"Cannot update non-existent series: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
return None
|
||||
|
||||
updated = await AnimeSeriesService.update(
|
||||
db,
|
||||
existing.id,
|
||||
name=serie.name,
|
||||
site=serie.site,
|
||||
folder=serie.folder,
|
||||
episode_dict=serie.episodeDict,
|
||||
)
|
||||
logger.info(
|
||||
"Updated series in database: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
return updated
|
||||
|
||||
def __find_mp4_files(self) -> Iterator[tuple[str, list[str]]]:
|
||||
"""Find all .mp4 files in the directory structure."""
|
||||
logger.info("Scanning for .mp4 files")
|
||||
|
||||
Reference in New Issue
Block a user