NoDataFile #1

Merged
lukas.pupkalipinski merged 70 commits from NoDataFile into main 2026-01-09 18:42:18 +01:00
3 changed files with 767 additions and 4 deletions
Showing only changes of commit 46ca4c9aac - Show all commits

View File

@ -222,7 +222,7 @@ async def lifespan(app: FastAPI):
---
### Task 5: Update SerieScanner to Use Database
### Task 5: Update SerieScanner to Use Database
**File:** `src/core/SerieScanner.py`

View File

@ -3,14 +3,23 @@ SerieScanner - Scans directories for anime series and missing episodes.
This module provides functionality to scan anime directories, identify
missing episodes, and report progress through callback interfaces.
The scanner supports two modes of operation:
1. File-based mode (legacy): Saves scan results to data files
2. Database mode (preferred): Saves scan results to SQLite database
Database mode is preferred for new code. File-based mode is kept for
backward compatibility with CLI usage.
"""
from __future__ import annotations
import logging
import os
import re
import traceback
import uuid
from typing import Callable, Iterable, Iterator, Optional
import warnings
from typing import TYPE_CHECKING, Callable, Iterable, Iterator, Optional
from src.core.entities.series import Serie
from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundException
@ -24,6 +33,10 @@ from src.core.interfaces.callbacks import (
)
from src.core.providers.base_provider import Loader
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.database.models import AnimeSeries
logger = logging.getLogger(__name__)
error_logger = logging.getLogger("error")
no_key_found_logger = logging.getLogger("series.nokey")
@ -34,13 +47,28 @@ class SerieScanner:
Scans directories for anime series and identifies missing episodes.
Supports progress callbacks for real-time scanning updates.
The scanner supports two modes:
1. File-based (legacy): Set db_session=None, saves to data files
2. Database mode: Provide db_session, saves to SQLite database
Example:
# File-based mode (legacy)
scanner = SerieScanner("/path/to/anime", loader)
scanner.scan()
# Database mode (preferred)
async with get_db_session() as db:
scanner = SerieScanner("/path/to/anime", loader, db_session=db)
await scanner.scan_async()
"""
def __init__(
self,
basePath: str,
loader: Loader,
callback_manager: Optional[CallbackManager] = None
callback_manager: Optional[CallbackManager] = None,
db_session: Optional["AsyncSession"] = None
) -> None:
"""
Initialize the SerieScanner.
@ -49,6 +77,8 @@ class SerieScanner:
basePath: Base directory containing anime series
loader: Loader instance for fetching series information
callback_manager: Optional callback manager for progress updates
db_session: Optional database session for database mode.
If provided, scan_async() should be used instead of scan().
Raises:
ValueError: If basePath is invalid or doesn't exist
@ -71,6 +101,7 @@ class SerieScanner:
callback_manager or CallbackManager()
)
self._current_operation_id: Optional[str] = None
self._db_session: Optional["AsyncSession"] = db_session
logger.info("Initialized SerieScanner with base path: %s", abs_path)
@ -97,7 +128,14 @@ class SerieScanner:
callback: Optional[Callable[[str, int], None]] = None
) -> None:
"""
Scan directories for anime series and missing episodes.
Scan directories for anime series and missing episodes (file-based).
This method saves results to data files. For database storage,
use scan_async() instead.
.. deprecated:: 2.0.0
Use :meth:`scan_async` for database-backed storage.
File-based storage will be removed in a future version.
Args:
callback: Optional legacy callback function (folder, count)
@ -105,6 +143,12 @@ class SerieScanner:
Raises:
Exception: If scan fails critically
"""
warnings.warn(
"File-based scan() is deprecated. Use scan_async() for "
"database storage.",
DeprecationWarning,
stacklevel=2
)
# Generate unique operation ID
self._current_operation_id = str(uuid.uuid4())
@ -291,6 +335,304 @@ class SerieScanner:
raise
async def scan_async(
self,
db: "AsyncSession",
callback: Optional[Callable[[str, int], None]] = None
) -> None:
"""
Scan directories for anime series and save to database.
This is the preferred method for scanning when using database
storage. Results are saved to the database instead of files.
Args:
db: Database session for async operations
callback: Optional legacy callback function (folder, count)
Raises:
Exception: If scan fails critically
Example:
async with get_db_session() as db:
scanner = SerieScanner("/path/to/anime", loader)
await scanner.scan_async(db)
"""
# Generate unique operation ID
self._current_operation_id = str(uuid.uuid4())
logger.info("Starting async scan for missing episodes (database mode)")
# Notify scan starting
self._callback_manager.notify_progress(
ProgressContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
phase=ProgressPhase.STARTING,
current=0,
total=0,
percentage=0.0,
message="Initializing scan (database mode)"
)
)
try:
# Get total items to process
total_to_scan = self.get_total_to_scan()
logger.info("Total folders to scan: %d", total_to_scan)
result = self.__find_mp4_files()
counter = 0
saved_to_db = 0
for folder, mp4_files in result:
try:
counter += 1
# Calculate progress
if total_to_scan > 0:
percentage = (counter / total_to_scan) * 100
else:
percentage = 0.0
# Notify progress
self._callback_manager.notify_progress(
ProgressContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
phase=ProgressPhase.IN_PROGRESS,
current=counter,
total=total_to_scan,
percentage=percentage,
message=f"Scanning: {folder}",
details=f"Found {len(mp4_files)} episodes"
)
)
# Call legacy callback if provided
if callback:
callback(folder, counter)
serie = self.__read_data_from_file(folder)
if (
serie is not None
and serie.key
and serie.key.strip()
):
# Get missing episodes from provider
missing_episodes, _site = (
self.__get_missing_episodes_and_season(
serie.key, mp4_files
)
)
serie.episodeDict = missing_episodes
serie.folder = folder
# Save to database instead of file
await self._save_serie_to_db(serie, db)
saved_to_db += 1
# Store by key in memory cache
if serie.key in self.keyDict:
logger.error(
"Duplicate series found with key '%s' "
"(folder: '%s')",
serie.key,
folder
)
else:
self.keyDict[serie.key] = serie
logger.debug(
"Stored series with key '%s' (folder: '%s')",
serie.key,
folder
)
except NoKeyFoundException as nkfe:
error_msg = f"Error processing folder '{folder}': {nkfe}"
logger.error(error_msg)
self._callback_manager.notify_error(
ErrorContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
error=nkfe,
message=error_msg,
recoverable=True,
metadata={"folder": folder, "key": None}
)
)
except Exception as e:
error_msg = (
f"Folder: '{folder}' - Unexpected error: {e}"
)
error_logger.error(
"%s\n%s",
error_msg,
traceback.format_exc()
)
self._callback_manager.notify_error(
ErrorContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
error=e,
message=error_msg,
recoverable=True,
metadata={"folder": folder, "key": None}
)
)
continue
# Notify scan completion
self._callback_manager.notify_completion(
CompletionContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
success=True,
message=f"Scan completed. Processed {counter} folders.",
statistics={
"total_folders": counter,
"series_found": len(self.keyDict),
"saved_to_db": saved_to_db
}
)
)
logger.info(
"Async scan completed. Processed %d folders, "
"found %d series, saved %d to database",
counter,
len(self.keyDict),
saved_to_db
)
except Exception as e:
error_msg = f"Critical async scan error: {e}"
logger.error("%s\n%s", error_msg, traceback.format_exc())
self._callback_manager.notify_error(
ErrorContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
error=e,
message=error_msg,
recoverable=False
)
)
self._callback_manager.notify_completion(
CompletionContext(
operation_type=OperationType.SCAN,
operation_id=self._current_operation_id,
success=False,
message=error_msg
)
)
raise
async def _save_serie_to_db(
self,
serie: Serie,
db: "AsyncSession"
) -> Optional["AnimeSeries"]:
"""
Save or update a series in the database.
Creates a new record if the series doesn't exist, or updates
the episode_dict if it has changed.
Args:
serie: Serie instance to save
db: Database session for async operations
Returns:
Created or updated AnimeSeries instance, or None if unchanged
"""
from src.server.database.service import AnimeSeriesService
# Check if series already exists
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
# Update episode_dict if changed
if existing.episode_dict != serie.episodeDict:
updated = await AnimeSeriesService.update(
db,
existing.id,
episode_dict=serie.episodeDict,
folder=serie.folder
)
logger.info(
"Updated series in database: %s (key=%s)",
serie.name,
serie.key
)
return updated
else:
logger.debug(
"Series unchanged in database: %s (key=%s)",
serie.name,
serie.key
)
return None
else:
# Create new series
anime_series = await AnimeSeriesService.create(
db=db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=serie.folder,
episode_dict=serie.episodeDict,
)
logger.info(
"Created series in database: %s (key=%s)",
serie.name,
serie.key
)
return anime_series
async def _update_serie_in_db(
self,
serie: Serie,
db: "AsyncSession"
) -> Optional["AnimeSeries"]:
"""
Update an existing series in the database.
Args:
serie: Serie instance to update
db: Database session for async operations
Returns:
Updated AnimeSeries instance, or None if not found
"""
from src.server.database.service import AnimeSeriesService
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if not existing:
logger.warning(
"Cannot update non-existent series: %s (key=%s)",
serie.name,
serie.key
)
return None
updated = await AnimeSeriesService.update(
db,
existing.id,
name=serie.name,
site=serie.site,
folder=serie.folder,
episode_dict=serie.episodeDict,
)
logger.info(
"Updated series in database: %s (key=%s)",
serie.name,
serie.key
)
return updated
def __find_mp4_files(self) -> Iterator[tuple[str, list[str]]]:
"""Find all .mp4 files in the directory structure."""
logger.info("Scanning for .mp4 files")

View File

@ -0,0 +1,421 @@
"""Tests for SerieScanner class - database and file-based operations."""
import os
import tempfile
import warnings
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.core.entities.series import Serie
from src.core.SerieScanner import SerieScanner
@pytest.fixture
def temp_directory():
"""Create a temporary directory with subdirectories for testing."""
with tempfile.TemporaryDirectory() as tmpdir:
# Create an anime folder with an mp4 file
anime_folder = os.path.join(tmpdir, "Attack on Titan (2013)")
os.makedirs(anime_folder, exist_ok=True)
# Create a dummy mp4 file
mp4_path = os.path.join(
anime_folder, "Attack on Titan - S01E001 - (German Dub).mp4"
)
with open(mp4_path, "w") as f:
f.write("dummy mp4")
yield tmpdir
@pytest.fixture
def mock_loader():
"""Create a mock Loader instance."""
loader = MagicMock()
loader.get_season_episode_count = MagicMock(return_value={1: 25})
loader.is_language = MagicMock(return_value=True)
return loader
@pytest.fixture
def mock_db_session():
"""Create a mock async database session."""
session = AsyncMock()
return session
@pytest.fixture
def sample_serie():
"""Create a sample Serie for testing."""
return Serie(
key="attack-on-titan",
name="Attack on Titan",
site="aniworld.to",
folder="Attack on Titan (2013)",
episodeDict={1: [2, 3, 4]}
)
class TestSerieScannerInitialization:
"""Test SerieScanner initialization."""
def test_init_success(self, temp_directory, mock_loader):
"""Test successful initialization."""
scanner = SerieScanner(temp_directory, mock_loader)
assert scanner.directory == os.path.abspath(temp_directory)
assert scanner.loader == mock_loader
assert scanner.keyDict == {}
def test_init_with_db_session(
self, temp_directory, mock_loader, mock_db_session
):
"""Test initialization with database session."""
scanner = SerieScanner(
temp_directory,
mock_loader,
db_session=mock_db_session
)
assert scanner._db_session == mock_db_session
def test_init_empty_path_raises_error(self, mock_loader):
"""Test initialization with empty path raises ValueError."""
with pytest.raises(ValueError, match="empty"):
SerieScanner("", mock_loader)
def test_init_nonexistent_path_raises_error(self, mock_loader):
"""Test initialization with non-existent path raises ValueError."""
with pytest.raises(ValueError, match="does not exist"):
SerieScanner("/nonexistent/path", mock_loader)
class TestSerieScannerScanDeprecation:
"""Test scan() deprecation warning."""
def test_scan_raises_deprecation_warning(
self, temp_directory, mock_loader
):
"""Test that scan() raises a deprecation warning."""
scanner = SerieScanner(temp_directory, mock_loader)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
# Mock the internal methods to avoid actual scanning
with patch.object(scanner, 'get_total_to_scan', return_value=0):
with patch.object(
scanner, '_SerieScanner__find_mp4_files',
return_value=iter([])
):
scanner.scan()
# Check deprecation warning was raised
assert len(w) >= 1
deprecation_warnings = [
warning for warning in w
if issubclass(warning.category, DeprecationWarning)
]
assert len(deprecation_warnings) >= 1
assert "scan_async()" in str(deprecation_warnings[0].message)
class TestSerieScannerAsyncScan:
"""Test async database scanning methods."""
@pytest.mark.asyncio
async def test_scan_async_saves_to_database(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test scan_async saves results to database."""
scanner = SerieScanner(temp_directory, mock_loader)
# Mock the internal methods
with patch.object(scanner, 'get_total_to_scan', return_value=1):
with patch.object(
scanner,
'_SerieScanner__find_mp4_files',
return_value=iter([
("Attack on Titan (2013)", ["S01E001.mp4"])
])
):
with patch.object(
scanner,
'_SerieScanner__read_data_from_file',
return_value=sample_serie
):
with patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({1: [2, 3]}, "aniworld.to")
):
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(
return_value=None
)
mock_created = MagicMock()
mock_created.id = 1
mock_service.create = AsyncMock(
return_value=mock_created
)
await scanner.scan_async(mock_db_session)
# Verify database create was called
mock_service.create.assert_called_once()
@pytest.mark.asyncio
async def test_scan_async_updates_existing_series(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test scan_async updates existing series in database."""
scanner = SerieScanner(temp_directory, mock_loader)
# Mock existing series in database
existing = MagicMock()
existing.id = 1
existing.episode_dict = {1: [5, 6]} # Different from sample_serie
with patch.object(scanner, 'get_total_to_scan', return_value=1):
with patch.object(
scanner,
'_SerieScanner__find_mp4_files',
return_value=iter([
("Attack on Titan (2013)", ["S01E001.mp4"])
])
):
with patch.object(
scanner,
'_SerieScanner__read_data_from_file',
return_value=sample_serie
):
with patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({1: [2, 3]}, "aniworld.to")
):
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(
return_value=existing
)
mock_service.update = AsyncMock(
return_value=existing
)
await scanner.scan_async(mock_db_session)
# Verify database update was called
mock_service.update.assert_called_once()
@pytest.mark.asyncio
async def test_scan_async_handles_errors_gracefully(
self, temp_directory, mock_loader, mock_db_session
):
"""Test scan_async handles folder processing errors gracefully."""
scanner = SerieScanner(temp_directory, mock_loader)
with patch.object(scanner, 'get_total_to_scan', return_value=1):
with patch.object(
scanner,
'_SerieScanner__find_mp4_files',
return_value=iter([
("Error Folder", ["S01E001.mp4"])
])
):
with patch.object(
scanner,
'_SerieScanner__read_data_from_file',
side_effect=Exception("Test error")
):
# Should not raise, should continue
await scanner.scan_async(mock_db_session)
class TestSerieScannerDatabaseHelpers:
"""Test database helper methods."""
@pytest.mark.asyncio
async def test_save_serie_to_db_creates_new(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test _save_serie_to_db creates new series."""
scanner = SerieScanner(temp_directory, mock_loader)
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=None)
mock_created = MagicMock()
mock_created.id = 1
mock_service.create = AsyncMock(return_value=mock_created)
result = await scanner._save_serie_to_db(
sample_serie, mock_db_session
)
assert result is mock_created
mock_service.create.assert_called_once()
@pytest.mark.asyncio
async def test_save_serie_to_db_updates_existing(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test _save_serie_to_db updates existing series."""
scanner = SerieScanner(temp_directory, mock_loader)
existing = MagicMock()
existing.id = 1
existing.episode_dict = {1: [5, 6]} # Different episodes
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=existing)
mock_service.update = AsyncMock(return_value=existing)
result = await scanner._save_serie_to_db(
sample_serie, mock_db_session
)
assert result is existing
mock_service.update.assert_called_once()
@pytest.mark.asyncio
async def test_save_serie_to_db_skips_unchanged(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test _save_serie_to_db skips update if unchanged."""
scanner = SerieScanner(temp_directory, mock_loader)
existing = MagicMock()
existing.id = 1
existing.episode_dict = sample_serie.episodeDict # Same episodes
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=existing)
result = await scanner._save_serie_to_db(
sample_serie, mock_db_session
)
assert result is None
mock_service.update.assert_not_called()
@pytest.mark.asyncio
async def test_update_serie_in_db_updates_existing(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test _update_serie_in_db updates existing series."""
scanner = SerieScanner(temp_directory, mock_loader)
existing = MagicMock()
existing.id = 1
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=existing)
mock_service.update = AsyncMock(return_value=existing)
result = await scanner._update_serie_in_db(
sample_serie, mock_db_session
)
assert result is existing
mock_service.update.assert_called_once()
@pytest.mark.asyncio
async def test_update_serie_in_db_returns_none_if_not_found(
self, temp_directory, mock_loader, mock_db_session, sample_serie
):
"""Test _update_serie_in_db returns None if series not found."""
scanner = SerieScanner(temp_directory, mock_loader)
with patch(
'src.server.database.service.AnimeSeriesService'
) as mock_service:
mock_service.get_by_key = AsyncMock(return_value=None)
result = await scanner._update_serie_in_db(
sample_serie, mock_db_session
)
assert result is None
class TestSerieScannerBackwardCompatibility:
"""Test backward compatibility of file-based operations."""
def test_file_based_scan_still_works(
self, temp_directory, mock_loader, sample_serie
):
"""Test file-based scan still works with deprecation warning."""
scanner = SerieScanner(temp_directory, mock_loader)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
with patch.object(scanner, 'get_total_to_scan', return_value=1):
with patch.object(
scanner,
'_SerieScanner__find_mp4_files',
return_value=iter([
("Attack on Titan (2013)", ["S01E001.mp4"])
])
):
with patch.object(
scanner,
'_SerieScanner__read_data_from_file',
return_value=sample_serie
):
with patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({1: [2, 3]}, "aniworld.to")
):
with patch.object(
sample_serie, 'save_to_file'
) as mock_save:
scanner.scan()
# Verify file was saved
mock_save.assert_called_once()
def test_keydict_populated_after_scan(
self, temp_directory, mock_loader, sample_serie
):
"""Test keyDict is populated after scan."""
scanner = SerieScanner(temp_directory, mock_loader)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
with patch.object(scanner, 'get_total_to_scan', return_value=1):
with patch.object(
scanner,
'_SerieScanner__find_mp4_files',
return_value=iter([
("Attack on Titan (2013)", ["S01E001.mp4"])
])
):
with patch.object(
scanner,
'_SerieScanner__read_data_from_file',
return_value=sample_serie
):
with patch.object(
scanner,
'_SerieScanner__get_missing_episodes_and_season',
return_value=({1: [2, 3]}, "aniworld.to")
):
with patch.object(sample_serie, 'save_to_file'):
scanner.scan()
assert sample_serie.key in scanner.keyDict