feat: scan anime folders to populate AnimeSeries DB

- Add _scan_folders_to_database() - iterates anime_directory subdirs
- Extract title/year from folder names via (YYYY) pattern
- Resolve provider key via search when single match found
- Create AnimeSeries records for new folders only
- Add corresponding unit tests

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-06-04 21:34:10 +02:00
parent 830f6b4c93
commit 2b5c969a83
2 changed files with 396 additions and 0 deletions

View File

@@ -1,17 +1,22 @@
"""Centralized initialization service for application startup and setup."""
import asyncio
import os
import re
from pathlib import Path
from typing import Callable, Optional
import structlog
from src.config.settings import settings
from src.server.database.service import AnimeSeriesService
from src.server.services.anime_service import sync_legacy_series_to_db
from src.server.services.legacy_file_migration import migrate_series_from_files_to_db
logger = structlog.get_logger(__name__)
# Provider site URL constant
ANIMEWORLD_URL = "https://aniworld.to"
async def _check_scan_status(
check_method: Callable,
@@ -299,6 +304,110 @@ async def _load_series_into_memory(progress_service=None) -> None:
)
async def _scan_folders_to_database(progress_service=None) -> int:
"""Scan anime folders and create AnimeSeries DB records.
This function runs during initial setup only. It:
1. Iterates subdirectories of anime_directory
2. Extracts title/year from folder names (year via (YYYY) pattern)
3. Uses provider search to resolve key field when single match found
4. Creates AnimeSeries records for new folders
Args:
progress_service: Optional ProgressService for progress updates
Returns:
int: Number of new series created
"""
from src.server.database.connection import get_db_session
from src.server.utils.dependencies import get_series_app
logger.info("Scanning anime folders for new series...")
if not settings.anime_directory or not os.path.isdir(settings.anime_directory):
logger.info(
"Anime directory not configured or does not exist, skipping folder scan"
)
return 0
created_count = 0
skipped_existing = 0
try:
series_app = get_series_app()
async with get_db_session() as db:
for folder in settings.anime_directory.iterdir():
if not folder.is_dir():
continue
folder_name = folder.name
# Skip if series already exists in DB
existing = await AnimeSeriesService.get_by_folder(db, folder_name)
if existing:
skipped_existing += 1
continue
# Extract year from folder name using (YYYY) pattern
year = None
match = re.search(r'\((\d{4})\)', folder_name)
if match:
year = int(match.group(1))
# Extract title by removing year suffix
title = re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
# Try to resolve key via provider search
resolved_key = ""
if title:
try:
results = await series_app.search(title)
if len(results) == 1:
result_name = results[0].get('name', '').lower()
if result_name == title.lower():
resolved_key = results[0].get('key', '')
except Exception as exc:
logger.warning(
"Provider search failed for folder",
folder=folder_name,
error=str(exc)
)
# Create AnimeSeries record
await AnimeSeriesService.create(
db=db,
key=resolved_key,
name=title,
site=ANIMEWORLD_URL,
folder=folder_name,
year=year,
)
created_count += 1
logger.debug(
"Created series from folder",
folder=folder_name,
title=title,
year=year,
key=resolved_key or "(unresolved)"
)
except Exception as exc:
logger.error(
"Folder scan failed",
error=str(exc),
exc_info=True
)
return created_count
logger.info(
"Folder scan complete",
created=created_count,
skipped_existing=skipped_existing
)
return created_count
async def _validate_anime_directory(progress_service=None) -> bool:
"""Validate that anime directory is configured.
@@ -373,6 +482,11 @@ async def perform_initial_setup(progress_service=None):
# Perform the actual initialization
try:
# Scan folders and create AnimeSeries records first
folder_scan_count = await _scan_folders_to_database(progress_service)
if folder_scan_count > 0:
logger.info("Created %d series from anime folders", folder_scan_count)
# First, run legacy file migration if needed (independent of initial scan)
is_legacy_migration_done = await _check_legacy_migration_status()
if not is_legacy_migration_done:

View File

@@ -23,6 +23,7 @@ from src.server.services.initialization_service import (
_mark_media_scan_completed,
_mark_nfo_scan_completed,
_mark_scan_completed,
_scan_folders_to_database,
_sync_anime_folders,
_validate_anime_directory,
perform_initial_setup,
@@ -738,3 +739,284 @@ class TestInitializationIntegration:
result2 = await perform_initial_setup()
assert result2 is False
class TestScanFoldersToDatabase:
"""Test folder scanning and AnimeSeries creation."""
@pytest.mark.asyncio
async def test_scan_folders_extracts_year(self, tmp_path):
"""Folder 'Attack on Titan (2013)' → title='Attack on Titan', year=2013."""
# Create a real directory structure
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
folder_path = anime_dir / "Attack on Titan (2013)"
folder_path.mkdir()
mock_series_app = AsyncMock()
mock_series_app.search.return_value = []
mock_db = AsyncMock()
mock_get_db = MagicMock()
mock_get_db.__aenter__.return_value = mock_db
mock_get_db.__aexit__.return_value = None
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings, \
patch(
'src.server.utils.dependencies.get_series_app',
return_value=mock_series_app
), \
patch(
'src.server.database.connection.get_db_session',
return_value=mock_get_db
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.get_by_folder',
new_callable=AsyncMock, return_value=None
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.create',
new_callable=AsyncMock
) as mock_create:
mock_settings.anime_directory = anime_dir
result = await _scan_folders_to_database()
assert result == 1
mock_create.assert_called_once()
call_kwargs = mock_create.call_args.kwargs
assert call_kwargs['name'] == "Attack on Titan"
assert call_kwargs['year'] == 2013
@pytest.mark.asyncio
async def test_scan_folders_no_year(self, tmp_path):
"""Folder 'OnePiece' → title='OnePiece', year=None."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
folder_path = anime_dir / "OnePiece"
folder_path.mkdir()
mock_series_app = AsyncMock()
mock_series_app.search.return_value = []
mock_db = AsyncMock()
mock_get_db = MagicMock()
mock_get_db.__aenter__.return_value = mock_db
mock_get_db.__aexit__.return_value = None
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings, \
patch(
'src.server.utils.dependencies.get_series_app',
return_value=mock_series_app
), \
patch(
'src.server.database.connection.get_db_session',
return_value=mock_get_db
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.get_by_folder',
new_callable=AsyncMock, return_value=None
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.create',
new_callable=AsyncMock
) as mock_create:
mock_settings.anime_directory = anime_dir
result = await _scan_folders_to_database()
assert result == 1
call_kwargs = mock_create.call_args.kwargs
assert call_kwargs['name'] == "OnePiece"
assert call_kwargs['year'] is None
@pytest.mark.asyncio
async def test_scan_folders_single_match_uses_key(self, tmp_path):
"""Search returns 1 match with same name → use its key."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
folder_path = anime_dir / "Attack on Titan (2013)"
folder_path.mkdir()
mock_series_app = AsyncMock()
mock_series_app.search.return_value = [
{'key': 'attack-on-titan', 'name': 'Attack on Titan'}
]
mock_db = AsyncMock()
mock_get_db = MagicMock()
mock_get_db.__aenter__.return_value = mock_db
mock_get_db.__aexit__.return_value = None
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings, \
patch(
'src.server.utils.dependencies.get_series_app',
return_value=mock_series_app
), \
patch(
'src.server.database.connection.get_db_session',
return_value=mock_get_db
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.get_by_folder',
new_callable=AsyncMock, return_value=None
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.create',
new_callable=AsyncMock
) as mock_create:
mock_settings.anime_directory = anime_dir
result = await _scan_folders_to_database()
assert result == 1
call_kwargs = mock_create.call_args.kwargs
assert call_kwargs['key'] == 'attack-on-titan'
@pytest.mark.asyncio
async def test_scan_folders_no_match_leaves_key_empty(self, tmp_path):
"""Search returns 0 results → key=''."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
folder_path = anime_dir / "Unknown Series (2020)"
folder_path.mkdir()
mock_series_app = AsyncMock()
mock_series_app.search.return_value = []
mock_db = AsyncMock()
mock_get_db = MagicMock()
mock_get_db.__aenter__.return_value = mock_db
mock_get_db.__aexit__.return_value = None
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings, \
patch(
'src.server.utils.dependencies.get_series_app',
return_value=mock_series_app
), \
patch(
'src.server.database.connection.get_db_session',
return_value=mock_get_db
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.get_by_folder',
new_callable=AsyncMock, return_value=None
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.create',
new_callable=AsyncMock
) as mock_create:
mock_settings.anime_directory = anime_dir
result = await _scan_folders_to_database()
assert result == 1
call_kwargs = mock_create.call_args.kwargs
assert call_kwargs['key'] == ''
@pytest.mark.asyncio
async def test_scan_folders_multiple_matches_leaves_key_empty(self, tmp_path):
"""Search returns >1 results → key=''."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
folder_path = anime_dir / "Attack on Titan (2013)"
folder_path.mkdir()
mock_series_app = AsyncMock()
mock_series_app.search.return_value = [
{'key': 'attack-on-titan', 'name': 'Attack on Titan'},
{'key': 'attack-on-titan-clone', 'name': 'Attack on Titan Clone'}
]
mock_db = AsyncMock()
mock_get_db = MagicMock()
mock_get_db.__aenter__.return_value = mock_db
mock_get_db.__aexit__.return_value = None
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings, \
patch(
'src.server.utils.dependencies.get_series_app',
return_value=mock_series_app
), \
patch(
'src.server.database.connection.get_db_session',
return_value=mock_get_db
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.get_by_folder',
new_callable=AsyncMock, return_value=None
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.create',
new_callable=AsyncMock
) as mock_create:
mock_settings.anime_directory = anime_dir
result = await _scan_folders_to_database()
assert result == 1
call_kwargs = mock_create.call_args.kwargs
assert call_kwargs['key'] == ''
@pytest.mark.asyncio
async def test_scan_folders_skips_existing(self, tmp_path):
"""Series with same folder already in DB → skip."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
folder_path = anime_dir / "Attack on Titan (2013)"
folder_path.mkdir()
mock_series_app = AsyncMock()
mock_db = AsyncMock()
mock_get_db = MagicMock()
mock_get_db.__aenter__.return_value = mock_db
mock_get_db.__aexit__.return_value = None
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings, \
patch(
'src.server.utils.dependencies.get_series_app',
return_value=mock_series_app
), \
patch(
'src.server.database.connection.get_db_session',
return_value=mock_get_db
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.get_by_folder',
new_callable=AsyncMock, return_value=MagicMock() # existing series
), \
patch(
'src.server.services.initialization_service.AnimeSeriesService.create',
new_callable=AsyncMock
) as mock_create:
mock_settings.anime_directory = anime_dir
result = await _scan_folders_to_database()
assert result == 0
mock_create.assert_not_called()
@pytest.mark.asyncio
async def test_scan_folders_empty_anime_directory(self):
"""No anime directory configured → return 0."""
with patch(
'src.server.services.initialization_service.settings'
) as mock_settings:
mock_settings.anime_directory = None
result = await _scan_folders_to_database()
assert result == 0