diff --git a/instructions.md b/instructions.md index 9878da4..8894a11 100644 --- a/instructions.md +++ b/instructions.md @@ -222,7 +222,7 @@ async def lifespan(app: FastAPI): --- -### Task 5: Update SerieScanner to Use Database ⬜ +### Task 5: Update SerieScanner to Use Database ✅ **File:** `src/core/SerieScanner.py` diff --git a/src/core/SerieScanner.py b/src/core/SerieScanner.py index f5acbf1..248e64c 100644 --- a/src/core/SerieScanner.py +++ b/src/core/SerieScanner.py @@ -3,14 +3,23 @@ SerieScanner - Scans directories for anime series and missing episodes. This module provides functionality to scan anime directories, identify missing episodes, and report progress through callback interfaces. + +The scanner supports two modes of operation: + 1. File-based mode (legacy): Saves scan results to data files + 2. Database mode (preferred): Saves scan results to SQLite database + +Database mode is preferred for new code. File-based mode is kept for +backward compatibility with CLI usage. """ +from __future__ import annotations import logging import os import re import traceback import uuid -from typing import Callable, Iterable, Iterator, Optional +import warnings +from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Optional from src.core.entities.series import Serie from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundException @@ -24,6 +33,10 @@ from src.core.interfaces.callbacks import ( ) from src.core.providers.base_provider import Loader +if TYPE_CHECKING: + from sqlalchemy.ext.asyncio import AsyncSession + from src.server.database.models import AnimeSeries + logger = logging.getLogger(__name__) error_logger = logging.getLogger("error") no_key_found_logger = logging.getLogger("series.nokey") @@ -34,13 +47,28 @@ class SerieScanner: Scans directories for anime series and identifies missing episodes. Supports progress callbacks for real-time scanning updates. + + The scanner supports two modes: + 1. File-based (legacy): Set db_session=None, saves to data files + 2. Database mode: Provide db_session, saves to SQLite database + + Example: + # File-based mode (legacy) + scanner = SerieScanner("/path/to/anime", loader) + scanner.scan() + + # Database mode (preferred) + async with get_db_session() as db: + scanner = SerieScanner("/path/to/anime", loader, db_session=db) + await scanner.scan_async() """ def __init__( self, basePath: str, loader: Loader, - callback_manager: Optional[CallbackManager] = None + callback_manager: Optional[CallbackManager] = None, + db_session: Optional["AsyncSession"] = None ) -> None: """ Initialize the SerieScanner. @@ -49,6 +77,8 @@ class SerieScanner: basePath: Base directory containing anime series loader: Loader instance for fetching series information callback_manager: Optional callback manager for progress updates + db_session: Optional database session for database mode. + If provided, scan_async() should be used instead of scan(). Raises: ValueError: If basePath is invalid or doesn't exist @@ -71,6 +101,7 @@ class SerieScanner: callback_manager or CallbackManager() ) self._current_operation_id: Optional[str] = None + self._db_session: Optional["AsyncSession"] = db_session logger.info("Initialized SerieScanner with base path: %s", abs_path) @@ -97,7 +128,14 @@ class SerieScanner: callback: Optional[Callable[[str, int], None]] = None ) -> None: """ - Scan directories for anime series and missing episodes. + Scan directories for anime series and missing episodes (file-based). + + This method saves results to data files. For database storage, + use scan_async() instead. + + .. deprecated:: 2.0.0 + Use :meth:`scan_async` for database-backed storage. + File-based storage will be removed in a future version. Args: callback: Optional legacy callback function (folder, count) @@ -105,6 +143,12 @@ class SerieScanner: Raises: Exception: If scan fails critically """ + warnings.warn( + "File-based scan() is deprecated. Use scan_async() for " + "database storage.", + DeprecationWarning, + stacklevel=2 + ) # Generate unique operation ID self._current_operation_id = str(uuid.uuid4()) @@ -291,6 +335,304 @@ class SerieScanner: raise + async def scan_async( + self, + db: "AsyncSession", + callback: Optional[Callable[[str, int], None]] = None + ) -> None: + """ + Scan directories for anime series and save to database. + + This is the preferred method for scanning when using database + storage. Results are saved to the database instead of files. + + Args: + db: Database session for async operations + callback: Optional legacy callback function (folder, count) + + Raises: + Exception: If scan fails critically + + Example: + async with get_db_session() as db: + scanner = SerieScanner("/path/to/anime", loader) + await scanner.scan_async(db) + """ + # Generate unique operation ID + self._current_operation_id = str(uuid.uuid4()) + + logger.info("Starting async scan for missing episodes (database mode)") + + # Notify scan starting + self._callback_manager.notify_progress( + ProgressContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + phase=ProgressPhase.STARTING, + current=0, + total=0, + percentage=0.0, + message="Initializing scan (database mode)" + ) + ) + + try: + # Get total items to process + total_to_scan = self.get_total_to_scan() + logger.info("Total folders to scan: %d", total_to_scan) + + result = self.__find_mp4_files() + counter = 0 + saved_to_db = 0 + + for folder, mp4_files in result: + try: + counter += 1 + + # Calculate progress + if total_to_scan > 0: + percentage = (counter / total_to_scan) * 100 + else: + percentage = 0.0 + + # Notify progress + self._callback_manager.notify_progress( + ProgressContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + phase=ProgressPhase.IN_PROGRESS, + current=counter, + total=total_to_scan, + percentage=percentage, + message=f"Scanning: {folder}", + details=f"Found {len(mp4_files)} episodes" + ) + ) + + # Call legacy callback if provided + if callback: + callback(folder, counter) + + serie = self.__read_data_from_file(folder) + if ( + serie is not None + and serie.key + and serie.key.strip() + ): + # Get missing episodes from provider + missing_episodes, _site = ( + self.__get_missing_episodes_and_season( + serie.key, mp4_files + ) + ) + serie.episodeDict = missing_episodes + serie.folder = folder + + # Save to database instead of file + await self._save_serie_to_db(serie, db) + saved_to_db += 1 + + # Store by key in memory cache + if serie.key in self.keyDict: + logger.error( + "Duplicate series found with key '%s' " + "(folder: '%s')", + serie.key, + folder + ) + else: + self.keyDict[serie.key] = serie + logger.debug( + "Stored series with key '%s' (folder: '%s')", + serie.key, + folder + ) + + except NoKeyFoundException as nkfe: + error_msg = f"Error processing folder '{folder}': {nkfe}" + logger.error(error_msg) + self._callback_manager.notify_error( + ErrorContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + error=nkfe, + message=error_msg, + recoverable=True, + metadata={"folder": folder, "key": None} + ) + ) + except Exception as e: + error_msg = ( + f"Folder: '{folder}' - Unexpected error: {e}" + ) + error_logger.error( + "%s\n%s", + error_msg, + traceback.format_exc() + ) + self._callback_manager.notify_error( + ErrorContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + error=e, + message=error_msg, + recoverable=True, + metadata={"folder": folder, "key": None} + ) + ) + continue + + # Notify scan completion + self._callback_manager.notify_completion( + CompletionContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + success=True, + message=f"Scan completed. Processed {counter} folders.", + statistics={ + "total_folders": counter, + "series_found": len(self.keyDict), + "saved_to_db": saved_to_db + } + ) + ) + + logger.info( + "Async scan completed. Processed %d folders, " + "found %d series, saved %d to database", + counter, + len(self.keyDict), + saved_to_db + ) + + except Exception as e: + error_msg = f"Critical async scan error: {e}" + logger.error("%s\n%s", error_msg, traceback.format_exc()) + + self._callback_manager.notify_error( + ErrorContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + error=e, + message=error_msg, + recoverable=False + ) + ) + + self._callback_manager.notify_completion( + CompletionContext( + operation_type=OperationType.SCAN, + operation_id=self._current_operation_id, + success=False, + message=error_msg + ) + ) + + raise + + async def _save_serie_to_db( + self, + serie: Serie, + db: "AsyncSession" + ) -> Optional["AnimeSeries"]: + """ + Save or update a series in the database. + + Creates a new record if the series doesn't exist, or updates + the episode_dict if it has changed. + + Args: + serie: Serie instance to save + db: Database session for async operations + + Returns: + Created or updated AnimeSeries instance, or None if unchanged + """ + from src.server.database.service import AnimeSeriesService + + # Check if series already exists + existing = await AnimeSeriesService.get_by_key(db, serie.key) + + if existing: + # Update episode_dict if changed + if existing.episode_dict != serie.episodeDict: + updated = await AnimeSeriesService.update( + db, + existing.id, + episode_dict=serie.episodeDict, + folder=serie.folder + ) + logger.info( + "Updated series in database: %s (key=%s)", + serie.name, + serie.key + ) + return updated + else: + logger.debug( + "Series unchanged in database: %s (key=%s)", + serie.name, + serie.key + ) + return None + else: + # Create new series + anime_series = await AnimeSeriesService.create( + db=db, + key=serie.key, + name=serie.name, + site=serie.site, + folder=serie.folder, + episode_dict=serie.episodeDict, + ) + logger.info( + "Created series in database: %s (key=%s)", + serie.name, + serie.key + ) + return anime_series + + async def _update_serie_in_db( + self, + serie: Serie, + db: "AsyncSession" + ) -> Optional["AnimeSeries"]: + """ + Update an existing series in the database. + + Args: + serie: Serie instance to update + db: Database session for async operations + + Returns: + Updated AnimeSeries instance, or None if not found + """ + from src.server.database.service import AnimeSeriesService + + existing = await AnimeSeriesService.get_by_key(db, serie.key) + if not existing: + logger.warning( + "Cannot update non-existent series: %s (key=%s)", + serie.name, + serie.key + ) + return None + + updated = await AnimeSeriesService.update( + db, + existing.id, + name=serie.name, + site=serie.site, + folder=serie.folder, + episode_dict=serie.episodeDict, + ) + logger.info( + "Updated series in database: %s (key=%s)", + serie.name, + serie.key + ) + return updated + def __find_mp4_files(self) -> Iterator[tuple[str, list[str]]]: """Find all .mp4 files in the directory structure.""" logger.info("Scanning for .mp4 files") diff --git a/tests/unit/test_serie_scanner.py b/tests/unit/test_serie_scanner.py new file mode 100644 index 0000000..da79863 --- /dev/null +++ b/tests/unit/test_serie_scanner.py @@ -0,0 +1,421 @@ +"""Tests for SerieScanner class - database and file-based operations.""" + +import os +import tempfile +import warnings +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.core.entities.series import Serie +from src.core.SerieScanner import SerieScanner + + +@pytest.fixture +def temp_directory(): + """Create a temporary directory with subdirectories for testing.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create an anime folder with an mp4 file + anime_folder = os.path.join(tmpdir, "Attack on Titan (2013)") + os.makedirs(anime_folder, exist_ok=True) + + # Create a dummy mp4 file + mp4_path = os.path.join( + anime_folder, "Attack on Titan - S01E001 - (German Dub).mp4" + ) + with open(mp4_path, "w") as f: + f.write("dummy mp4") + + yield tmpdir + + +@pytest.fixture +def mock_loader(): + """Create a mock Loader instance.""" + loader = MagicMock() + loader.get_season_episode_count = MagicMock(return_value={1: 25}) + loader.is_language = MagicMock(return_value=True) + return loader + + +@pytest.fixture +def mock_db_session(): + """Create a mock async database session.""" + session = AsyncMock() + return session + + +@pytest.fixture +def sample_serie(): + """Create a sample Serie for testing.""" + return Serie( + key="attack-on-titan", + name="Attack on Titan", + site="aniworld.to", + folder="Attack on Titan (2013)", + episodeDict={1: [2, 3, 4]} + ) + + +class TestSerieScannerInitialization: + """Test SerieScanner initialization.""" + + def test_init_success(self, temp_directory, mock_loader): + """Test successful initialization.""" + scanner = SerieScanner(temp_directory, mock_loader) + + assert scanner.directory == os.path.abspath(temp_directory) + assert scanner.loader == mock_loader + assert scanner.keyDict == {} + + def test_init_with_db_session( + self, temp_directory, mock_loader, mock_db_session + ): + """Test initialization with database session.""" + scanner = SerieScanner( + temp_directory, + mock_loader, + db_session=mock_db_session + ) + + assert scanner._db_session == mock_db_session + + def test_init_empty_path_raises_error(self, mock_loader): + """Test initialization with empty path raises ValueError.""" + with pytest.raises(ValueError, match="empty"): + SerieScanner("", mock_loader) + + def test_init_nonexistent_path_raises_error(self, mock_loader): + """Test initialization with non-existent path raises ValueError.""" + with pytest.raises(ValueError, match="does not exist"): + SerieScanner("/nonexistent/path", mock_loader) + + +class TestSerieScannerScanDeprecation: + """Test scan() deprecation warning.""" + + def test_scan_raises_deprecation_warning( + self, temp_directory, mock_loader + ): + """Test that scan() raises a deprecation warning.""" + scanner = SerieScanner(temp_directory, mock_loader) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + # Mock the internal methods to avoid actual scanning + with patch.object(scanner, 'get_total_to_scan', return_value=0): + with patch.object( + scanner, '_SerieScanner__find_mp4_files', + return_value=iter([]) + ): + scanner.scan() + + # Check deprecation warning was raised + assert len(w) >= 1 + deprecation_warnings = [ + warning for warning in w + if issubclass(warning.category, DeprecationWarning) + ] + assert len(deprecation_warnings) >= 1 + assert "scan_async()" in str(deprecation_warnings[0].message) + + +class TestSerieScannerAsyncScan: + """Test async database scanning methods.""" + + @pytest.mark.asyncio + async def test_scan_async_saves_to_database( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test scan_async saves results to database.""" + scanner = SerieScanner(temp_directory, mock_loader) + + # Mock the internal methods + with patch.object(scanner, 'get_total_to_scan', return_value=1): + with patch.object( + scanner, + '_SerieScanner__find_mp4_files', + return_value=iter([ + ("Attack on Titan (2013)", ["S01E001.mp4"]) + ]) + ): + with patch.object( + scanner, + '_SerieScanner__read_data_from_file', + return_value=sample_serie + ): + with patch.object( + scanner, + '_SerieScanner__get_missing_episodes_and_season', + return_value=({1: [2, 3]}, "aniworld.to") + ): + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock( + return_value=None + ) + mock_created = MagicMock() + mock_created.id = 1 + mock_service.create = AsyncMock( + return_value=mock_created + ) + + await scanner.scan_async(mock_db_session) + + # Verify database create was called + mock_service.create.assert_called_once() + + @pytest.mark.asyncio + async def test_scan_async_updates_existing_series( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test scan_async updates existing series in database.""" + scanner = SerieScanner(temp_directory, mock_loader) + + # Mock existing series in database + existing = MagicMock() + existing.id = 1 + existing.episode_dict = {1: [5, 6]} # Different from sample_serie + + with patch.object(scanner, 'get_total_to_scan', return_value=1): + with patch.object( + scanner, + '_SerieScanner__find_mp4_files', + return_value=iter([ + ("Attack on Titan (2013)", ["S01E001.mp4"]) + ]) + ): + with patch.object( + scanner, + '_SerieScanner__read_data_from_file', + return_value=sample_serie + ): + with patch.object( + scanner, + '_SerieScanner__get_missing_episodes_and_season', + return_value=({1: [2, 3]}, "aniworld.to") + ): + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock( + return_value=existing + ) + mock_service.update = AsyncMock( + return_value=existing + ) + + await scanner.scan_async(mock_db_session) + + # Verify database update was called + mock_service.update.assert_called_once() + + @pytest.mark.asyncio + async def test_scan_async_handles_errors_gracefully( + self, temp_directory, mock_loader, mock_db_session + ): + """Test scan_async handles folder processing errors gracefully.""" + scanner = SerieScanner(temp_directory, mock_loader) + + with patch.object(scanner, 'get_total_to_scan', return_value=1): + with patch.object( + scanner, + '_SerieScanner__find_mp4_files', + return_value=iter([ + ("Error Folder", ["S01E001.mp4"]) + ]) + ): + with patch.object( + scanner, + '_SerieScanner__read_data_from_file', + side_effect=Exception("Test error") + ): + # Should not raise, should continue + await scanner.scan_async(mock_db_session) + + +class TestSerieScannerDatabaseHelpers: + """Test database helper methods.""" + + @pytest.mark.asyncio + async def test_save_serie_to_db_creates_new( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test _save_serie_to_db creates new series.""" + scanner = SerieScanner(temp_directory, mock_loader) + + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock(return_value=None) + mock_created = MagicMock() + mock_created.id = 1 + mock_service.create = AsyncMock(return_value=mock_created) + + result = await scanner._save_serie_to_db( + sample_serie, mock_db_session + ) + + assert result is mock_created + mock_service.create.assert_called_once() + + @pytest.mark.asyncio + async def test_save_serie_to_db_updates_existing( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test _save_serie_to_db updates existing series.""" + scanner = SerieScanner(temp_directory, mock_loader) + + existing = MagicMock() + existing.id = 1 + existing.episode_dict = {1: [5, 6]} # Different episodes + + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock(return_value=existing) + mock_service.update = AsyncMock(return_value=existing) + + result = await scanner._save_serie_to_db( + sample_serie, mock_db_session + ) + + assert result is existing + mock_service.update.assert_called_once() + + @pytest.mark.asyncio + async def test_save_serie_to_db_skips_unchanged( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test _save_serie_to_db skips update if unchanged.""" + scanner = SerieScanner(temp_directory, mock_loader) + + existing = MagicMock() + existing.id = 1 + existing.episode_dict = sample_serie.episodeDict # Same episodes + + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock(return_value=existing) + + result = await scanner._save_serie_to_db( + sample_serie, mock_db_session + ) + + assert result is None + mock_service.update.assert_not_called() + + @pytest.mark.asyncio + async def test_update_serie_in_db_updates_existing( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test _update_serie_in_db updates existing series.""" + scanner = SerieScanner(temp_directory, mock_loader) + + existing = MagicMock() + existing.id = 1 + + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock(return_value=existing) + mock_service.update = AsyncMock(return_value=existing) + + result = await scanner._update_serie_in_db( + sample_serie, mock_db_session + ) + + assert result is existing + mock_service.update.assert_called_once() + + @pytest.mark.asyncio + async def test_update_serie_in_db_returns_none_if_not_found( + self, temp_directory, mock_loader, mock_db_session, sample_serie + ): + """Test _update_serie_in_db returns None if series not found.""" + scanner = SerieScanner(temp_directory, mock_loader) + + with patch( + 'src.server.database.service.AnimeSeriesService' + ) as mock_service: + mock_service.get_by_key = AsyncMock(return_value=None) + + result = await scanner._update_serie_in_db( + sample_serie, mock_db_session + ) + + assert result is None + + +class TestSerieScannerBackwardCompatibility: + """Test backward compatibility of file-based operations.""" + + def test_file_based_scan_still_works( + self, temp_directory, mock_loader, sample_serie + ): + """Test file-based scan still works with deprecation warning.""" + scanner = SerieScanner(temp_directory, mock_loader) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + + with patch.object(scanner, 'get_total_to_scan', return_value=1): + with patch.object( + scanner, + '_SerieScanner__find_mp4_files', + return_value=iter([ + ("Attack on Titan (2013)", ["S01E001.mp4"]) + ]) + ): + with patch.object( + scanner, + '_SerieScanner__read_data_from_file', + return_value=sample_serie + ): + with patch.object( + scanner, + '_SerieScanner__get_missing_episodes_and_season', + return_value=({1: [2, 3]}, "aniworld.to") + ): + with patch.object( + sample_serie, 'save_to_file' + ) as mock_save: + scanner.scan() + + # Verify file was saved + mock_save.assert_called_once() + + def test_keydict_populated_after_scan( + self, temp_directory, mock_loader, sample_serie + ): + """Test keyDict is populated after scan.""" + scanner = SerieScanner(temp_directory, mock_loader) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + + with patch.object(scanner, 'get_total_to_scan', return_value=1): + with patch.object( + scanner, + '_SerieScanner__find_mp4_files', + return_value=iter([ + ("Attack on Titan (2013)", ["S01E001.mp4"]) + ]) + ): + with patch.object( + scanner, + '_SerieScanner__read_data_from_file', + return_value=sample_serie + ): + with patch.object( + scanner, + '_SerieScanner__get_missing_episodes_and_season', + return_value=({1: [2, 3]}, "aniworld.to") + ): + with patch.object(sample_serie, 'save_to_file'): + scanner.scan() + + assert sample_serie.key in scanner.keyDict