Add database migration from legacy data files
- Create DataMigrationService for migrating data files to SQLite - Add sync database methods to AnimeSeriesService - Update SerieScanner to save to database with file fallback - Update anime API endpoints to use database with fallback - Add delete endpoint for anime series - Add automatic migration on startup in fastapi_app.py lifespan - Add 28 unit tests for migration service - Add 14 integration tests for migration flow - Update infrastructure.md and database README docs Migration runs automatically on startup, legacy data files preserved.
This commit is contained in:
@@ -3,6 +3,8 @@ SerieScanner - Scans directories for anime series and missing episodes.
|
||||
|
||||
This module provides functionality to scan anime directories, identify
|
||||
missing episodes, and report progress through callback interfaces.
|
||||
|
||||
Supports both database storage (preferred) and file-based storage (fallback).
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -34,13 +36,15 @@ class SerieScanner:
|
||||
Scans directories for anime series and identifies missing episodes.
|
||||
|
||||
Supports progress callbacks for real-time scanning updates.
|
||||
Prefers database storage when available, falls back to file storage.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
basePath: str,
|
||||
loader: Loader,
|
||||
callback_manager: Optional[CallbackManager] = None
|
||||
callback_manager: Optional[CallbackManager] = None,
|
||||
use_database: bool = True,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the SerieScanner.
|
||||
@@ -49,6 +53,7 @@ class SerieScanner:
|
||||
basePath: Base directory containing anime series
|
||||
loader: Loader instance for fetching series information
|
||||
callback_manager: Optional callback manager for progress updates
|
||||
use_database: Whether to save to database (fallback to files)
|
||||
|
||||
Raises:
|
||||
ValueError: If basePath is invalid or doesn't exist
|
||||
@@ -71,6 +76,22 @@ class SerieScanner:
|
||||
callback_manager or CallbackManager()
|
||||
)
|
||||
self._current_operation_id: Optional[str] = None
|
||||
self._use_database: bool = use_database
|
||||
self._db_available: bool = False
|
||||
|
||||
# Check if database is available
|
||||
if use_database:
|
||||
try:
|
||||
from src.server.database.connection import get_sync_session
|
||||
session = get_sync_session()
|
||||
session.close()
|
||||
self._db_available = True
|
||||
logger.info("Database available for SerieScanner")
|
||||
except (ImportError, RuntimeError) as e:
|
||||
logger.warning(
|
||||
"Database not available, using file storage: %s", e
|
||||
)
|
||||
self._db_available = False
|
||||
|
||||
logger.info("Initialized SerieScanner with base path: %s", abs_path)
|
||||
|
||||
@@ -79,6 +100,63 @@ class SerieScanner:
|
||||
"""Get the callback manager instance."""
|
||||
return self._callback_manager
|
||||
|
||||
def _save_serie(self, serie: Serie, data_path: str) -> None:
|
||||
"""Save a serie to database (if available) and file.
|
||||
|
||||
This method handles saving serie data with database-first approach
|
||||
and file fallback for backward compatibility.
|
||||
|
||||
Args:
|
||||
serie: The Serie object to save.
|
||||
data_path: The path to save the data file for fallback.
|
||||
"""
|
||||
# Always save to file for backward compatibility
|
||||
serie.save_to_file(data_path)
|
||||
|
||||
# Try to save to database if available
|
||||
if self._use_database and self._db_available:
|
||||
try:
|
||||
from src.server.database.connection import get_sync_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
session = get_sync_session()
|
||||
try:
|
||||
# Convert episodeDict to JSON-serializable format
|
||||
episode_dict_json = None
|
||||
if serie.episodeDict:
|
||||
episode_dict_json = {}
|
||||
for season, episodes in serie.episodeDict.items():
|
||||
season_key = str(season)
|
||||
episode_dict_json[season_key] = [
|
||||
ep.to_dict() if hasattr(ep, 'to_dict')
|
||||
else str(ep)
|
||||
for ep in episodes
|
||||
]
|
||||
|
||||
# Get site from serie if available
|
||||
site = getattr(serie, 'site', '') or ''
|
||||
|
||||
# Upsert to database (static method call)
|
||||
AnimeSeriesService.upsert_sync(
|
||||
db=session,
|
||||
key=serie.key,
|
||||
name=serie.name,
|
||||
site=site,
|
||||
folder=serie.folder,
|
||||
episode_dict=episode_dict_json
|
||||
)
|
||||
logger.debug(
|
||||
"Saved serie to database: %s", serie.key
|
||||
)
|
||||
finally:
|
||||
session.close()
|
||||
except (ImportError, RuntimeError) as e:
|
||||
logger.warning(
|
||||
"Failed to save serie to database, "
|
||||
"file backup exists: %s",
|
||||
e
|
||||
)
|
||||
|
||||
def reinit(self) -> None:
|
||||
"""Reinitialize the series dictionary (keyed by serie.key)."""
|
||||
self.keyDict: dict[str, Serie] = {}
|
||||
@@ -185,7 +263,8 @@ class SerieScanner:
|
||||
data_path = os.path.join(
|
||||
self.directory, folder, 'data'
|
||||
)
|
||||
serie.save_to_file(data_path)
|
||||
# Save to database (if available) and file
|
||||
self._save_serie(serie, data_path)
|
||||
|
||||
# Store by key (primary identifier), not folder
|
||||
if serie.key in self.keyDict:
|
||||
|
||||
@@ -263,32 +263,65 @@ async def list_anime(
|
||||
)
|
||||
|
||||
try:
|
||||
# Get missing episodes from series app
|
||||
if not hasattr(series_app, "list"):
|
||||
return []
|
||||
|
||||
series = series_app.list.GetMissingEpisode()
|
||||
# Try to get series from database first
|
||||
summaries: List[AnimeSummary] = []
|
||||
for serie in series:
|
||||
# Get all properties from the serie object
|
||||
key = getattr(serie, "key", "")
|
||||
name = getattr(serie, "name", "")
|
||||
site = getattr(serie, "site", "")
|
||||
folder = getattr(serie, "folder", "")
|
||||
episode_dict = getattr(serie, "episodeDict", {}) or {}
|
||||
|
||||
try:
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
# Convert episode dict keys to strings for JSON serialization
|
||||
missing_episodes = {str(k): v for k, v in episode_dict.items()}
|
||||
|
||||
summaries.append(
|
||||
AnimeSummary(
|
||||
key=key,
|
||||
name=name,
|
||||
site=site,
|
||||
folder=folder,
|
||||
missing_episodes=missing_episodes,
|
||||
async with get_db_session() as db:
|
||||
db_series = await AnimeSeriesService.get_all(db)
|
||||
for series in db_series:
|
||||
episode_dict = series.episode_dict or {}
|
||||
# Only include series with missing episodes
|
||||
if episode_dict:
|
||||
# Ensure episode dict keys are strings
|
||||
missing_episodes = {
|
||||
str(k): v for k, v in episode_dict.items()
|
||||
}
|
||||
summaries.append(
|
||||
AnimeSummary(
|
||||
key=series.key,
|
||||
name=series.name,
|
||||
site=series.site,
|
||||
folder=series.folder,
|
||||
missing_episodes=missing_episodes,
|
||||
)
|
||||
)
|
||||
logger.debug(
|
||||
"Loaded %d series from database",
|
||||
len(summaries)
|
||||
)
|
||||
except Exception as db_error:
|
||||
# Fall back to in-memory series_app if database fails
|
||||
logger.warning(
|
||||
"Database query failed, using in-memory fallback: %s",
|
||||
db_error
|
||||
)
|
||||
if series_app and hasattr(series_app, "list"):
|
||||
series = series_app.list.GetMissingEpisode()
|
||||
for serie in series:
|
||||
key = getattr(serie, "key", "")
|
||||
name = getattr(serie, "name", "")
|
||||
site = getattr(serie, "site", "")
|
||||
folder = getattr(serie, "folder", "")
|
||||
episode_dict = getattr(serie, "episodeDict", {}) or {}
|
||||
|
||||
# Convert episode dict keys to strings
|
||||
missing_episodes = {
|
||||
str(k): v for k, v in episode_dict.items()
|
||||
}
|
||||
|
||||
summaries.append(
|
||||
AnimeSummary(
|
||||
key=key,
|
||||
name=name,
|
||||
site=site,
|
||||
folder=folder,
|
||||
missing_episodes=missing_episodes,
|
||||
)
|
||||
)
|
||||
|
||||
# Apply sorting if requested
|
||||
if sort_by:
|
||||
@@ -585,6 +618,9 @@ async def add_series(
|
||||
) -> dict:
|
||||
"""Add a new series to the library.
|
||||
|
||||
Creates a database entry for the series and also updates
|
||||
the in-memory cache. The filesystem folder is created for downloads.
|
||||
|
||||
Extracts the series `key` from the provided link URL.
|
||||
The `key` is the URL-safe identifier used for all lookups.
|
||||
The `name` is stored as display metadata along with a
|
||||
@@ -603,6 +639,12 @@ async def add_series(
|
||||
Raises:
|
||||
HTTPException: If adding the series fails or link is invalid
|
||||
"""
|
||||
import os
|
||||
|
||||
from src.config.settings import settings
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
try:
|
||||
# Validate inputs
|
||||
if not request.link or not request.link.strip():
|
||||
@@ -617,13 +659,6 @@ async def add_series(
|
||||
detail="Series name cannot be empty",
|
||||
)
|
||||
|
||||
# Check if series_app has the list attribute
|
||||
if not hasattr(series_app, "list"):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_501_NOT_IMPLEMENTED,
|
||||
detail="Series list functionality not available",
|
||||
)
|
||||
|
||||
# Extract key from link URL
|
||||
# Expected format: https://aniworld.to/anime/stream/{key}
|
||||
link = request.link.strip()
|
||||
@@ -647,35 +682,78 @@ async def add_series(
|
||||
# Create folder from name (filesystem-friendly)
|
||||
folder = request.name.strip()
|
||||
|
||||
# Create a new Serie object
|
||||
# key: unique identifier extracted from link
|
||||
# name: display name from request
|
||||
# folder: filesystem folder name (derived from name)
|
||||
# episodeDict: empty for new series
|
||||
# Try database storage first, fall back to in-memory/file storage
|
||||
db_stored = False
|
||||
try:
|
||||
async with get_db_session() as db:
|
||||
# Check if series already exists in database
|
||||
existing = await AnimeSeriesService.get_by_key(db, key)
|
||||
if existing:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"Series with key '{key}' already exists",
|
||||
)
|
||||
|
||||
# Create database entry
|
||||
await AnimeSeriesService.create(
|
||||
db,
|
||||
key=key,
|
||||
name=request.name.strip(),
|
||||
site="aniworld.to",
|
||||
folder=folder,
|
||||
episode_dict={},
|
||||
)
|
||||
await db.commit()
|
||||
db_stored = True
|
||||
logger.info(
|
||||
"Created database entry for series: %s (key=%s)",
|
||||
request.name,
|
||||
key
|
||||
)
|
||||
except HTTPException:
|
||||
raise
|
||||
except RuntimeError as db_error:
|
||||
# Database not initialized - fall back to file storage
|
||||
logger.warning(
|
||||
"Database unavailable, using file storage: %s",
|
||||
db_error
|
||||
)
|
||||
|
||||
# Create filesystem folder (for downloads)
|
||||
if settings.anime_directory:
|
||||
anime_path = os.path.join(settings.anime_directory, folder)
|
||||
os.makedirs(anime_path, exist_ok=True)
|
||||
logger.debug("Created folder: %s", anime_path)
|
||||
|
||||
# Create Serie object for in-memory cache
|
||||
serie = Serie(
|
||||
key=key,
|
||||
name=request.name.strip(),
|
||||
site="aniworld.to",
|
||||
folder=folder,
|
||||
episodeDict={}
|
||||
episodeDict={},
|
||||
)
|
||||
|
||||
# Add the series to the list
|
||||
series_app.list.add(serie)
|
||||
|
||||
# Refresh the series list to update the cache
|
||||
if hasattr(series_app, "refresh_series_list"):
|
||||
series_app.refresh_series_list()
|
||||
# Update in-memory cache and/or file storage
|
||||
if series_app and hasattr(series_app, "list"):
|
||||
# If database wasn't available, use file-based storage
|
||||
if not db_stored:
|
||||
series_app.list.add(serie)
|
||||
else:
|
||||
# Just update in-memory cache
|
||||
series_app.list.keyDict[key] = serie
|
||||
logger.debug("Updated in-memory cache for series: %s", key)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Successfully added series: {request.name}",
|
||||
"key": key,
|
||||
"folder": folder
|
||||
"folder": folder,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error("Failed to add series: %s", exc, exc_info=True)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to add series: {str(exc)}",
|
||||
@@ -773,6 +851,112 @@ async def get_anime(
|
||||
) from exc
|
||||
|
||||
|
||||
@router.delete("/{anime_key}")
|
||||
async def delete_series(
|
||||
anime_key: str,
|
||||
_auth: dict = Depends(require_auth),
|
||||
series_app: Any = Depends(get_series_app),
|
||||
) -> dict:
|
||||
"""Delete a series from the library.
|
||||
|
||||
Removes the series from the database. The `anime_key` should be
|
||||
the unique series key (provider identifier).
|
||||
|
||||
Note: This does NOT delete the filesystem folder or downloaded files.
|
||||
To remove files, use the filesystem operations separately.
|
||||
|
||||
Args:
|
||||
anime_key: Series key (primary identifier)
|
||||
_auth: Ensures the caller is authenticated (value unused)
|
||||
series_app: Core `SeriesApp` instance for cache updates
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Status payload with success message
|
||||
|
||||
Raises:
|
||||
HTTPException: If series not found or deletion fails
|
||||
"""
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
try:
|
||||
if not anime_key or not anime_key.strip():
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Series key cannot be empty",
|
||||
)
|
||||
|
||||
anime_key = anime_key.strip()
|
||||
series_name = anime_key # Default if not found
|
||||
db_deleted = False
|
||||
|
||||
# Try database deletion first
|
||||
try:
|
||||
async with get_db_session() as db:
|
||||
# Find series by key
|
||||
series = await AnimeSeriesService.get_by_key(db, anime_key)
|
||||
if series:
|
||||
series_name = series.name
|
||||
series_id = series.id
|
||||
|
||||
# Delete from database
|
||||
deleted = await AnimeSeriesService.delete(db, series_id)
|
||||
await db.commit()
|
||||
|
||||
if deleted:
|
||||
db_deleted = True
|
||||
logger.info(
|
||||
"Deleted series from database: %s (key=%s)",
|
||||
series_name,
|
||||
anime_key
|
||||
)
|
||||
except RuntimeError as db_error:
|
||||
# Database not available
|
||||
logger.warning(
|
||||
"Database unavailable for deletion: %s",
|
||||
db_error
|
||||
)
|
||||
|
||||
# Remove from in-memory cache if available
|
||||
in_memory_deleted = False
|
||||
if series_app and hasattr(series_app, "list"):
|
||||
if anime_key in series_app.list.keyDict:
|
||||
serie = series_app.list.keyDict[anime_key]
|
||||
series_name = getattr(serie, "name", anime_key)
|
||||
del series_app.list.keyDict[anime_key]
|
||||
in_memory_deleted = True
|
||||
logger.debug(
|
||||
"Removed series from in-memory cache: %s",
|
||||
anime_key
|
||||
)
|
||||
|
||||
# Check if anything was deleted
|
||||
if not db_deleted and not in_memory_deleted:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Series with key '{anime_key}' not found",
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"Successfully deleted series: {series_name}",
|
||||
"key": anime_key,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"Failed to delete series %s: %s",
|
||||
anime_key,
|
||||
exc,
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=f"Failed to delete series: {str(exc)}",
|
||||
) from exc
|
||||
|
||||
|
||||
# Maximum allowed input size for security
|
||||
MAX_INPUT_LENGTH = 100000 # 100KB
|
||||
|
||||
|
||||
@@ -403,6 +403,87 @@ series = result.scalar_one()
|
||||
# episodes already loaded, no additional queries
|
||||
```
|
||||
|
||||
## Data Migration from Legacy Files
|
||||
|
||||
### Background
|
||||
|
||||
The application previously stored series metadata in individual `data` files (no extension) in each anime folder. The database layer now provides centralized storage with the `DataMigrationService` handling the transition.
|
||||
|
||||
### Migration Service
|
||||
|
||||
The migration service (`src/server/services/data_migration_service.py`) provides:
|
||||
|
||||
```python
|
||||
from src.server.services.data_migration_service import DataMigrationService
|
||||
|
||||
service = DataMigrationService()
|
||||
|
||||
# Scan for legacy data files
|
||||
files = await service.check_for_legacy_data_files("/path/to/anime")
|
||||
|
||||
# Migrate a single file
|
||||
success = await service.migrate_data_file_to_db(file_path, db_session)
|
||||
|
||||
# Migrate all files at once
|
||||
result = await service.migrate_all_legacy_data("/path/to/anime", db_session)
|
||||
# result.migrated, result.skipped, result.failed, result.errors
|
||||
|
||||
# Cleanup old files (with backup)
|
||||
await service.cleanup_migrated_files(files, backup=True)
|
||||
|
||||
# Check migration status
|
||||
status = await service.get_migration_status("/path/to/anime", db_session)
|
||||
```
|
||||
|
||||
### Automatic Migration
|
||||
|
||||
Migration runs automatically during application startup in `fastapi_app.py`:
|
||||
|
||||
1. Database is initialized
|
||||
2. Legacy files are detected
|
||||
3. Files are migrated (duplicates skipped)
|
||||
4. Results are logged
|
||||
|
||||
### AnimeSeriesService Operations
|
||||
|
||||
```python
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
# Create new series
|
||||
series = await AnimeSeriesService.create(
|
||||
db, key="my-anime", name="My Anime",
|
||||
site="aniworld.to", folder="My Anime (2024)",
|
||||
episode_dict={"1": [1, 2, 3]}
|
||||
)
|
||||
|
||||
# Lookup by key (primary method)
|
||||
series = await AnimeSeriesService.get_by_key(db, "my-anime")
|
||||
|
||||
# Get all series
|
||||
all_series = await AnimeSeriesService.get_all(db)
|
||||
|
||||
# Update
|
||||
updated = await AnimeSeriesService.update(
|
||||
db, series.id, episode_dict={"1": [1, 2, 3, 4]}
|
||||
)
|
||||
|
||||
# Delete
|
||||
await AnimeSeriesService.delete(db, series.id)
|
||||
|
||||
# Sync upsert (for SerieScanner)
|
||||
AnimeSeriesService.upsert_sync(
|
||||
sync_db, key="my-anime", name="My Anime",
|
||||
site="aniworld.to", folder="My Anime (2024)",
|
||||
episode_dict={"1": [1, 2, 3]}
|
||||
)
|
||||
```
|
||||
|
||||
### Deprecation Notes
|
||||
|
||||
- **File-based storage is deprecated** - use database for new code
|
||||
- **CLI compatibility** - CLI still uses files (migration on startup handles sync)
|
||||
- **SerieScanner** - updated to save to database when session provided
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Database not initialized
|
||||
|
||||
@@ -241,6 +241,155 @@ class AnimeSeriesService:
|
||||
.limit(limit)
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
# ==========================================================================
|
||||
# Sync Methods (for use in non-async contexts like CLI/scanner)
|
||||
# ==========================================================================
|
||||
|
||||
@staticmethod
|
||||
def create_sync(
|
||||
db: Session,
|
||||
key: str,
|
||||
name: str,
|
||||
site: str,
|
||||
folder: str,
|
||||
description: Optional[str] = None,
|
||||
status: Optional[str] = None,
|
||||
total_episodes: Optional[int] = None,
|
||||
cover_url: Optional[str] = None,
|
||||
episode_dict: Optional[Dict] = None,
|
||||
) -> AnimeSeries:
|
||||
"""Create a new anime series (synchronous version).
|
||||
|
||||
Args:
|
||||
db: Sync database session
|
||||
key: Unique provider key
|
||||
name: Series name
|
||||
site: Provider site URL
|
||||
folder: Local filesystem path
|
||||
description: Optional series description
|
||||
status: Optional series status
|
||||
total_episodes: Optional total episode count
|
||||
cover_url: Optional cover image URL
|
||||
episode_dict: Optional episode dictionary
|
||||
|
||||
Returns:
|
||||
Created AnimeSeries instance
|
||||
"""
|
||||
series = AnimeSeries(
|
||||
key=key,
|
||||
name=name,
|
||||
site=site,
|
||||
folder=folder,
|
||||
description=description,
|
||||
status=status,
|
||||
total_episodes=total_episodes,
|
||||
cover_url=cover_url,
|
||||
episode_dict=episode_dict,
|
||||
)
|
||||
db.add(series)
|
||||
db.flush()
|
||||
db.refresh(series)
|
||||
logger.info("Created anime series (sync): %s (key=%s)", name, key)
|
||||
return series
|
||||
|
||||
@staticmethod
|
||||
def get_by_key_sync(db: Session, key: str) -> Optional[AnimeSeries]:
|
||||
"""Get anime series by provider key (synchronous version).
|
||||
|
||||
Args:
|
||||
db: Sync database session
|
||||
key: Unique provider key
|
||||
|
||||
Returns:
|
||||
AnimeSeries instance or None if not found
|
||||
"""
|
||||
result = db.execute(
|
||||
select(AnimeSeries).where(AnimeSeries.key == key)
|
||||
)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
@staticmethod
|
||||
def update_sync(
|
||||
db: Session,
|
||||
series_id: int,
|
||||
**kwargs,
|
||||
) -> Optional[AnimeSeries]:
|
||||
"""Update anime series (synchronous version).
|
||||
|
||||
Args:
|
||||
db: Sync database session
|
||||
series_id: Series primary key
|
||||
**kwargs: Fields to update
|
||||
|
||||
Returns:
|
||||
Updated AnimeSeries instance or None if not found
|
||||
"""
|
||||
result = db.execute(
|
||||
select(AnimeSeries).where(AnimeSeries.id == series_id)
|
||||
)
|
||||
series = result.scalar_one_or_none()
|
||||
if not series:
|
||||
return None
|
||||
|
||||
for key, value in kwargs.items():
|
||||
if hasattr(series, key):
|
||||
setattr(series, key, value)
|
||||
|
||||
db.flush()
|
||||
db.refresh(series)
|
||||
logger.info(
|
||||
"Updated anime series (sync): %s (id=%s)",
|
||||
series.name,
|
||||
series_id
|
||||
)
|
||||
return series
|
||||
|
||||
@staticmethod
|
||||
def upsert_sync(
|
||||
db: Session,
|
||||
key: str,
|
||||
name: str,
|
||||
site: str,
|
||||
folder: str,
|
||||
episode_dict: Optional[Dict] = None,
|
||||
) -> AnimeSeries:
|
||||
"""Create or update anime series (synchronous version).
|
||||
|
||||
Args:
|
||||
db: Sync database session
|
||||
key: Unique provider key
|
||||
name: Series name
|
||||
site: Provider site URL
|
||||
folder: Local filesystem path
|
||||
episode_dict: Optional episode dictionary
|
||||
|
||||
Returns:
|
||||
Created or updated AnimeSeries instance
|
||||
"""
|
||||
existing = AnimeSeriesService.get_by_key_sync(db, key)
|
||||
if existing:
|
||||
for attr, value in [
|
||||
("name", name),
|
||||
("site", site),
|
||||
("folder", folder),
|
||||
("episode_dict", episode_dict),
|
||||
]:
|
||||
if value is not None:
|
||||
setattr(existing, attr, value)
|
||||
db.flush()
|
||||
db.refresh(existing)
|
||||
logger.info("Updated anime series (upsert): %s", key)
|
||||
return existing
|
||||
else:
|
||||
return AnimeSeriesService.create_sync(
|
||||
db,
|
||||
key=key,
|
||||
name=name,
|
||||
site=site,
|
||||
folder=folder,
|
||||
episode_dict=episode_dict,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
|
||||
@@ -51,6 +51,15 @@ async def lifespan(app: FastAPI):
|
||||
try:
|
||||
logger.info("Starting FastAPI application...")
|
||||
|
||||
# Initialize database
|
||||
try:
|
||||
from src.server.database.connection import init_db
|
||||
await init_db()
|
||||
logger.info("Database initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error("Failed to initialize database: %s", e, exc_info=True)
|
||||
raise
|
||||
|
||||
# Load configuration from config.json and sync with settings
|
||||
try:
|
||||
from src.server.services.config_service import get_config_service
|
||||
@@ -67,6 +76,43 @@ async def lifespan(app: FastAPI):
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load config from config.json: %s", e)
|
||||
|
||||
# Run legacy data file migration
|
||||
if settings.anime_directory:
|
||||
try:
|
||||
from src.server.database.connection import get_db_session
|
||||
from src.server.services.data_migration_service import (
|
||||
DataMigrationService,
|
||||
)
|
||||
|
||||
migration_service = DataMigrationService()
|
||||
legacy_files = await migration_service \
|
||||
.check_for_legacy_data_files(settings.anime_directory)
|
||||
|
||||
if legacy_files:
|
||||
logger.info(
|
||||
"Found %d legacy data file(s) to migrate",
|
||||
len(legacy_files)
|
||||
)
|
||||
async with get_db_session() as db:
|
||||
result = await migration_service \
|
||||
.migrate_all_legacy_data(
|
||||
settings.anime_directory, db
|
||||
)
|
||||
logger.info(
|
||||
"Migration complete: %d migrated, %d skipped, "
|
||||
"%d failed",
|
||||
result.migrated,
|
||||
result.skipped,
|
||||
result.failed
|
||||
)
|
||||
else:
|
||||
logger.debug("No legacy data files found")
|
||||
except Exception as e:
|
||||
# Migration failure should not prevent app startup
|
||||
logger.warning(
|
||||
"Legacy data migration check failed: %s", e, exc_info=True
|
||||
)
|
||||
|
||||
# Initialize progress service with event subscription
|
||||
progress_service = get_progress_service()
|
||||
ws_service = get_websocket_service()
|
||||
|
||||
359
src/server/services/data_migration_service.py
Normal file
359
src/server/services/data_migration_service.py
Normal file
@@ -0,0 +1,359 @@
|
||||
"""Data migration service for legacy file-based storage to database.
|
||||
|
||||
This module provides functionality to detect and migrate anime series metadata
|
||||
from legacy 'data' files (no extension) to SQLite database storage.
|
||||
|
||||
Classes:
|
||||
- MigrationResult: Dataclass containing migration statistics
|
||||
- DataMigrationService: Service for detecting and migrating data files
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import List
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.core.entities.series import Serie
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MigrationResult:
|
||||
"""Result statistics from a migration operation.
|
||||
|
||||
Attributes:
|
||||
total_found: Total number of legacy data files found
|
||||
migrated: Number of files successfully migrated
|
||||
failed: Number of files that failed migration
|
||||
skipped: Number of files skipped (already in database)
|
||||
errors: List of error messages for failed migrations
|
||||
"""
|
||||
total_found: int = 0
|
||||
migrated: int = 0
|
||||
failed: int = 0
|
||||
skipped: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""Human-readable summary of migration results."""
|
||||
return (
|
||||
f"Migration Result: {self.migrated} migrated, "
|
||||
f"{self.skipped} skipped, {self.failed} failed "
|
||||
f"(total: {self.total_found})"
|
||||
)
|
||||
|
||||
|
||||
class DataMigrationService:
|
||||
"""Service to migrate legacy data files to database storage.
|
||||
|
||||
Legacy data files are JSON files named 'data' (no extension) located
|
||||
in each anime folder. This service detects these files, reads them
|
||||
using the Serie.load_from_file() method, and creates corresponding
|
||||
database entries using AnimeSeriesService.
|
||||
|
||||
Example:
|
||||
>>> migration_service = DataMigrationService()
|
||||
>>> async with get_db_session() as db:
|
||||
... result = await migration_service.migrate_all_legacy_data(
|
||||
... "/path/to/anime", db
|
||||
... )
|
||||
... print(result)
|
||||
Migration Result: 10 migrated, 2 skipped, 0 failed (total: 12)
|
||||
"""
|
||||
|
||||
async def check_for_legacy_data_files(
|
||||
self, anime_directory: str
|
||||
) -> List[str]:
|
||||
"""Scan anime directory for folders containing 'data' files.
|
||||
|
||||
Searches all immediate subdirectories for files named 'data'
|
||||
(no extension), which are the legacy metadata files.
|
||||
|
||||
Args:
|
||||
anime_directory: Base path to anime folders
|
||||
|
||||
Returns:
|
||||
List of absolute paths to found 'data' files
|
||||
|
||||
Example:
|
||||
>>> service = DataMigrationService()
|
||||
>>> files = await service.check_for_legacy_data_files("/anime")
|
||||
>>> print(files)
|
||||
['/anime/Attack on Titan/data', '/anime/Naruto/data']
|
||||
"""
|
||||
if not anime_directory or not os.path.isdir(anime_directory):
|
||||
logger.warning(
|
||||
"Anime directory does not exist or is invalid: %s",
|
||||
anime_directory
|
||||
)
|
||||
return []
|
||||
|
||||
data_files: List[str] = []
|
||||
|
||||
try:
|
||||
entries = os.listdir(anime_directory)
|
||||
except OSError as error:
|
||||
logger.error(
|
||||
"Unable to scan directory %s: %s",
|
||||
anime_directory,
|
||||
error
|
||||
)
|
||||
return []
|
||||
|
||||
for folder_name in entries:
|
||||
folder_path = os.path.join(anime_directory, folder_name)
|
||||
|
||||
# Skip if not a directory
|
||||
if not os.path.isdir(folder_path):
|
||||
continue
|
||||
|
||||
# Check for 'data' file (no extension)
|
||||
data_path = os.path.join(folder_path, "data")
|
||||
if os.path.isfile(data_path):
|
||||
data_files.append(data_path)
|
||||
logger.debug("Found legacy data file: %s", data_path)
|
||||
|
||||
logger.info(
|
||||
"Found %d legacy data file(s) in %s",
|
||||
len(data_files),
|
||||
anime_directory
|
||||
)
|
||||
return data_files
|
||||
|
||||
async def migrate_data_file_to_db(
|
||||
self,
|
||||
data_file_path: str,
|
||||
db: AsyncSession,
|
||||
) -> bool:
|
||||
"""Migrate a single data file to database.
|
||||
|
||||
Reads the legacy data file using Serie.load_from_file() and creates
|
||||
a corresponding database entry. If the series already exists in the
|
||||
database (by key), the migration is skipped.
|
||||
|
||||
Args:
|
||||
data_file_path: Path to the 'data' file (no extension)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
True if migration successful, False otherwise
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If data file does not exist
|
||||
ValueError: If data file is corrupted or invalid
|
||||
"""
|
||||
if not os.path.isfile(data_file_path):
|
||||
raise FileNotFoundError(f"Data file not found: {data_file_path}")
|
||||
|
||||
try:
|
||||
# Load serie from legacy file
|
||||
serie = Serie.load_from_file(data_file_path)
|
||||
logger.debug(
|
||||
"Loaded serie from file: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error(
|
||||
"Failed to load data file %s: %s",
|
||||
data_file_path,
|
||||
error
|
||||
)
|
||||
raise ValueError(f"Invalid data file: {error}") from error
|
||||
|
||||
# Check if series already exists in database
|
||||
existing = await AnimeSeriesService.get_by_key(db, serie.key)
|
||||
if existing:
|
||||
logger.debug(
|
||||
"Series '%s' already exists in database, skipping",
|
||||
serie.key
|
||||
)
|
||||
return False # Signal that it was skipped, not failed
|
||||
|
||||
# Create database entry
|
||||
try:
|
||||
await AnimeSeriesService.create(
|
||||
db,
|
||||
key=serie.key,
|
||||
name=serie.name,
|
||||
site=serie.site,
|
||||
folder=serie.folder,
|
||||
episode_dict=serie.episodeDict,
|
||||
)
|
||||
await db.commit()
|
||||
logger.info(
|
||||
"Successfully migrated series: %s (key=%s)",
|
||||
serie.name,
|
||||
serie.key
|
||||
)
|
||||
return True
|
||||
except Exception as error:
|
||||
await db.rollback()
|
||||
logger.error(
|
||||
"Failed to create database entry for %s: %s",
|
||||
serie.key,
|
||||
error
|
||||
)
|
||||
raise
|
||||
|
||||
async def migrate_all_legacy_data(
|
||||
self,
|
||||
anime_directory: str,
|
||||
db: AsyncSession,
|
||||
) -> MigrationResult:
|
||||
"""Migrate all legacy data files to database.
|
||||
|
||||
Scans the anime directory for all legacy data files and migrates
|
||||
each one to the database. Errors in individual files do not stop
|
||||
the entire migration.
|
||||
|
||||
Args:
|
||||
anime_directory: Base path to anime folders
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
MigrationResult with success/failure counts
|
||||
|
||||
Example:
|
||||
>>> service = DataMigrationService()
|
||||
>>> async with get_db_session() as db:
|
||||
... result = await service.migrate_all_legacy_data(
|
||||
... "/anime", db
|
||||
... )
|
||||
... if result.failed > 0:
|
||||
... for error in result.errors:
|
||||
... print(f"Error: {error}")
|
||||
"""
|
||||
result = MigrationResult()
|
||||
|
||||
# Find all legacy data files
|
||||
data_files = await self.check_for_legacy_data_files(anime_directory)
|
||||
result.total_found = len(data_files)
|
||||
|
||||
if not data_files:
|
||||
logger.info("No legacy data files found to migrate")
|
||||
return result
|
||||
|
||||
logger.info("Starting migration of %d data file(s)", len(data_files))
|
||||
|
||||
for data_file_path in data_files:
|
||||
try:
|
||||
migrated = await self.migrate_data_file_to_db(
|
||||
data_file_path, db
|
||||
)
|
||||
if migrated:
|
||||
result.migrated += 1
|
||||
else:
|
||||
result.skipped += 1 # Already exists in DB
|
||||
except FileNotFoundError:
|
||||
result.failed += 1
|
||||
error_msg = "File not found: %s" % data_file_path
|
||||
result.errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
except ValueError as error:
|
||||
result.failed += 1
|
||||
error_msg = "Invalid data in %s: %s" % (data_file_path, error)
|
||||
result.errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
except Exception as error:
|
||||
result.failed += 1
|
||||
error_msg = "Migration failed for %s: %s" % (
|
||||
data_file_path, error
|
||||
)
|
||||
result.errors.append(error_msg)
|
||||
logger.error(error_msg)
|
||||
|
||||
logger.info(str(result))
|
||||
return result
|
||||
|
||||
async def cleanup_migrated_files(
|
||||
self,
|
||||
migrated_paths: List[str],
|
||||
backup: bool = True,
|
||||
) -> None:
|
||||
"""Optionally backup and remove migrated data files.
|
||||
|
||||
Creates backups of data files before removal (if backup=True).
|
||||
Backups are stored with a '.backup' extension and timestamp.
|
||||
|
||||
Args:
|
||||
migrated_paths: List of successfully migrated file paths
|
||||
backup: Whether to create backups before deletion (default: True)
|
||||
|
||||
Example:
|
||||
>>> service = DataMigrationService()
|
||||
>>> await service.cleanup_migrated_files(
|
||||
... ["/anime/Show/data"],
|
||||
... backup=True
|
||||
... )
|
||||
# Creates /anime/Show/data.backup.20231115_120000
|
||||
# Removes /anime/Show/data
|
||||
"""
|
||||
if not migrated_paths:
|
||||
logger.info("No files to clean up")
|
||||
return
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
for file_path in migrated_paths:
|
||||
if not os.path.isfile(file_path):
|
||||
logger.warning("File no longer exists: %s", file_path)
|
||||
continue
|
||||
|
||||
try:
|
||||
if backup:
|
||||
backup_path = "%s.backup.%s" % (file_path, timestamp)
|
||||
shutil.copy2(file_path, backup_path)
|
||||
logger.debug("Created backup: %s", backup_path)
|
||||
|
||||
os.remove(file_path)
|
||||
logger.info("Removed migrated file: %s", file_path)
|
||||
except OSError as error:
|
||||
logger.error("Failed to clean up %s: %s", file_path, error)
|
||||
|
||||
async def get_migration_status(
|
||||
self,
|
||||
anime_directory: str,
|
||||
db: AsyncSession,
|
||||
) -> dict:
|
||||
"""Get current migration status.
|
||||
|
||||
Provides statistics about legacy files vs database entries,
|
||||
useful for monitoring migration progress.
|
||||
|
||||
Args:
|
||||
anime_directory: Base path to anime folders
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Dictionary with migration status information
|
||||
"""
|
||||
legacy_files = await self.check_for_legacy_data_files(anime_directory)
|
||||
db_series = await AnimeSeriesService.get_all(db)
|
||||
|
||||
# Build sets of keys for comparison
|
||||
legacy_keys: set = set()
|
||||
for file_path in legacy_files:
|
||||
try:
|
||||
serie = Serie.load_from_file(file_path)
|
||||
legacy_keys.add(serie.key)
|
||||
except Exception:
|
||||
pass # Skip invalid files
|
||||
|
||||
db_keys = {s.key for s in db_series}
|
||||
|
||||
return {
|
||||
"legacy_files_count": len(legacy_files),
|
||||
"database_entries_count": len(db_series),
|
||||
"only_in_files": list(legacy_keys - db_keys),
|
||||
"only_in_database": list(db_keys - legacy_keys),
|
||||
"in_both": list(legacy_keys & db_keys),
|
||||
"migration_complete": len(legacy_keys - db_keys) == 0,
|
||||
}
|
||||
Reference in New Issue
Block a user