Add database migration from legacy data files

- Create DataMigrationService for migrating data files to SQLite
- Add sync database methods to AnimeSeriesService
- Update SerieScanner to save to database with file fallback
- Update anime API endpoints to use database with fallback
- Add delete endpoint for anime series
- Add automatic migration on startup in fastapi_app.py lifespan
- Add 28 unit tests for migration service
- Add 14 integration tests for migration flow
- Update infrastructure.md and database README docs

Migration runs automatically on startup, legacy data files preserved.
This commit is contained in:
2025-12-01 17:42:09 +01:00
parent 338e3feb4a
commit 17754a86f0
9 changed files with 2209 additions and 45 deletions

View File

@@ -3,6 +3,8 @@ SerieScanner - Scans directories for anime series and missing episodes.
This module provides functionality to scan anime directories, identify
missing episodes, and report progress through callback interfaces.
Supports both database storage (preferred) and file-based storage (fallback).
"""
import logging
@@ -34,13 +36,15 @@ class SerieScanner:
Scans directories for anime series and identifies missing episodes.
Supports progress callbacks for real-time scanning updates.
Prefers database storage when available, falls back to file storage.
"""
def __init__(
self,
basePath: str,
loader: Loader,
callback_manager: Optional[CallbackManager] = None
callback_manager: Optional[CallbackManager] = None,
use_database: bool = True,
) -> None:
"""
Initialize the SerieScanner.
@@ -49,6 +53,7 @@ class SerieScanner:
basePath: Base directory containing anime series
loader: Loader instance for fetching series information
callback_manager: Optional callback manager for progress updates
use_database: Whether to save to database (fallback to files)
Raises:
ValueError: If basePath is invalid or doesn't exist
@@ -71,6 +76,22 @@ class SerieScanner:
callback_manager or CallbackManager()
)
self._current_operation_id: Optional[str] = None
self._use_database: bool = use_database
self._db_available: bool = False
# Check if database is available
if use_database:
try:
from src.server.database.connection import get_sync_session
session = get_sync_session()
session.close()
self._db_available = True
logger.info("Database available for SerieScanner")
except (ImportError, RuntimeError) as e:
logger.warning(
"Database not available, using file storage: %s", e
)
self._db_available = False
logger.info("Initialized SerieScanner with base path: %s", abs_path)
@@ -79,6 +100,63 @@ class SerieScanner:
"""Get the callback manager instance."""
return self._callback_manager
def _save_serie(self, serie: Serie, data_path: str) -> None:
"""Save a serie to database (if available) and file.
This method handles saving serie data with database-first approach
and file fallback for backward compatibility.
Args:
serie: The Serie object to save.
data_path: The path to save the data file for fallback.
"""
# Always save to file for backward compatibility
serie.save_to_file(data_path)
# Try to save to database if available
if self._use_database and self._db_available:
try:
from src.server.database.connection import get_sync_session
from src.server.database.service import AnimeSeriesService
session = get_sync_session()
try:
# Convert episodeDict to JSON-serializable format
episode_dict_json = None
if serie.episodeDict:
episode_dict_json = {}
for season, episodes in serie.episodeDict.items():
season_key = str(season)
episode_dict_json[season_key] = [
ep.to_dict() if hasattr(ep, 'to_dict')
else str(ep)
for ep in episodes
]
# Get site from serie if available
site = getattr(serie, 'site', '') or ''
# Upsert to database (static method call)
AnimeSeriesService.upsert_sync(
db=session,
key=serie.key,
name=serie.name,
site=site,
folder=serie.folder,
episode_dict=episode_dict_json
)
logger.debug(
"Saved serie to database: %s", serie.key
)
finally:
session.close()
except (ImportError, RuntimeError) as e:
logger.warning(
"Failed to save serie to database, "
"file backup exists: %s",
e
)
def reinit(self) -> None:
"""Reinitialize the series dictionary (keyed by serie.key)."""
self.keyDict: dict[str, Serie] = {}
@@ -185,7 +263,8 @@ class SerieScanner:
data_path = os.path.join(
self.directory, folder, 'data'
)
serie.save_to_file(data_path)
# Save to database (if available) and file
self._save_serie(serie, data_path)
# Store by key (primary identifier), not folder
if serie.key in self.keyDict:

View File

@@ -263,32 +263,65 @@ async def list_anime(
)
try:
# Get missing episodes from series app
if not hasattr(series_app, "list"):
return []
series = series_app.list.GetMissingEpisode()
# Try to get series from database first
summaries: List[AnimeSummary] = []
for serie in series:
# Get all properties from the serie object
key = getattr(serie, "key", "")
name = getattr(serie, "name", "")
site = getattr(serie, "site", "")
folder = getattr(serie, "folder", "")
episode_dict = getattr(serie, "episodeDict", {}) or {}
try:
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
# Convert episode dict keys to strings for JSON serialization
missing_episodes = {str(k): v for k, v in episode_dict.items()}
summaries.append(
AnimeSummary(
key=key,
name=name,
site=site,
folder=folder,
missing_episodes=missing_episodes,
async with get_db_session() as db:
db_series = await AnimeSeriesService.get_all(db)
for series in db_series:
episode_dict = series.episode_dict or {}
# Only include series with missing episodes
if episode_dict:
# Ensure episode dict keys are strings
missing_episodes = {
str(k): v for k, v in episode_dict.items()
}
summaries.append(
AnimeSummary(
key=series.key,
name=series.name,
site=series.site,
folder=series.folder,
missing_episodes=missing_episodes,
)
)
logger.debug(
"Loaded %d series from database",
len(summaries)
)
except Exception as db_error:
# Fall back to in-memory series_app if database fails
logger.warning(
"Database query failed, using in-memory fallback: %s",
db_error
)
if series_app and hasattr(series_app, "list"):
series = series_app.list.GetMissingEpisode()
for serie in series:
key = getattr(serie, "key", "")
name = getattr(serie, "name", "")
site = getattr(serie, "site", "")
folder = getattr(serie, "folder", "")
episode_dict = getattr(serie, "episodeDict", {}) or {}
# Convert episode dict keys to strings
missing_episodes = {
str(k): v for k, v in episode_dict.items()
}
summaries.append(
AnimeSummary(
key=key,
name=name,
site=site,
folder=folder,
missing_episodes=missing_episodes,
)
)
# Apply sorting if requested
if sort_by:
@@ -585,6 +618,9 @@ async def add_series(
) -> dict:
"""Add a new series to the library.
Creates a database entry for the series and also updates
the in-memory cache. The filesystem folder is created for downloads.
Extracts the series `key` from the provided link URL.
The `key` is the URL-safe identifier used for all lookups.
The `name` is stored as display metadata along with a
@@ -603,6 +639,12 @@ async def add_series(
Raises:
HTTPException: If adding the series fails or link is invalid
"""
import os
from src.config.settings import settings
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
try:
# Validate inputs
if not request.link or not request.link.strip():
@@ -617,13 +659,6 @@ async def add_series(
detail="Series name cannot be empty",
)
# Check if series_app has the list attribute
if not hasattr(series_app, "list"):
raise HTTPException(
status_code=status.HTTP_501_NOT_IMPLEMENTED,
detail="Series list functionality not available",
)
# Extract key from link URL
# Expected format: https://aniworld.to/anime/stream/{key}
link = request.link.strip()
@@ -647,35 +682,78 @@ async def add_series(
# Create folder from name (filesystem-friendly)
folder = request.name.strip()
# Create a new Serie object
# key: unique identifier extracted from link
# name: display name from request
# folder: filesystem folder name (derived from name)
# episodeDict: empty for new series
# Try database storage first, fall back to in-memory/file storage
db_stored = False
try:
async with get_db_session() as db:
# Check if series already exists in database
existing = await AnimeSeriesService.get_by_key(db, key)
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Series with key '{key}' already exists",
)
# Create database entry
await AnimeSeriesService.create(
db,
key=key,
name=request.name.strip(),
site="aniworld.to",
folder=folder,
episode_dict={},
)
await db.commit()
db_stored = True
logger.info(
"Created database entry for series: %s (key=%s)",
request.name,
key
)
except HTTPException:
raise
except RuntimeError as db_error:
# Database not initialized - fall back to file storage
logger.warning(
"Database unavailable, using file storage: %s",
db_error
)
# Create filesystem folder (for downloads)
if settings.anime_directory:
anime_path = os.path.join(settings.anime_directory, folder)
os.makedirs(anime_path, exist_ok=True)
logger.debug("Created folder: %s", anime_path)
# Create Serie object for in-memory cache
serie = Serie(
key=key,
name=request.name.strip(),
site="aniworld.to",
folder=folder,
episodeDict={}
episodeDict={},
)
# Add the series to the list
series_app.list.add(serie)
# Refresh the series list to update the cache
if hasattr(series_app, "refresh_series_list"):
series_app.refresh_series_list()
# Update in-memory cache and/or file storage
if series_app and hasattr(series_app, "list"):
# If database wasn't available, use file-based storage
if not db_stored:
series_app.list.add(serie)
else:
# Just update in-memory cache
series_app.list.keyDict[key] = serie
logger.debug("Updated in-memory cache for series: %s", key)
return {
"status": "success",
"message": f"Successfully added series: {request.name}",
"key": key,
"folder": folder
"folder": folder,
}
except HTTPException:
raise
except Exception as exc:
logger.error("Failed to add series: %s", exc, exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to add series: {str(exc)}",
@@ -773,6 +851,112 @@ async def get_anime(
) from exc
@router.delete("/{anime_key}")
async def delete_series(
anime_key: str,
_auth: dict = Depends(require_auth),
series_app: Any = Depends(get_series_app),
) -> dict:
"""Delete a series from the library.
Removes the series from the database. The `anime_key` should be
the unique series key (provider identifier).
Note: This does NOT delete the filesystem folder or downloaded files.
To remove files, use the filesystem operations separately.
Args:
anime_key: Series key (primary identifier)
_auth: Ensures the caller is authenticated (value unused)
series_app: Core `SeriesApp` instance for cache updates
Returns:
Dict[str, Any]: Status payload with success message
Raises:
HTTPException: If series not found or deletion fails
"""
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService
try:
if not anime_key or not anime_key.strip():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Series key cannot be empty",
)
anime_key = anime_key.strip()
series_name = anime_key # Default if not found
db_deleted = False
# Try database deletion first
try:
async with get_db_session() as db:
# Find series by key
series = await AnimeSeriesService.get_by_key(db, anime_key)
if series:
series_name = series.name
series_id = series.id
# Delete from database
deleted = await AnimeSeriesService.delete(db, series_id)
await db.commit()
if deleted:
db_deleted = True
logger.info(
"Deleted series from database: %s (key=%s)",
series_name,
anime_key
)
except RuntimeError as db_error:
# Database not available
logger.warning(
"Database unavailable for deletion: %s",
db_error
)
# Remove from in-memory cache if available
in_memory_deleted = False
if series_app and hasattr(series_app, "list"):
if anime_key in series_app.list.keyDict:
serie = series_app.list.keyDict[anime_key]
series_name = getattr(serie, "name", anime_key)
del series_app.list.keyDict[anime_key]
in_memory_deleted = True
logger.debug(
"Removed series from in-memory cache: %s",
anime_key
)
# Check if anything was deleted
if not db_deleted and not in_memory_deleted:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Series with key '{anime_key}' not found",
)
return {
"status": "success",
"message": f"Successfully deleted series: {series_name}",
"key": anime_key,
}
except HTTPException:
raise
except Exception as exc:
logger.error(
"Failed to delete series %s: %s",
anime_key,
exc,
exc_info=True
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to delete series: {str(exc)}",
) from exc
# Maximum allowed input size for security
MAX_INPUT_LENGTH = 100000 # 100KB

View File

@@ -403,6 +403,87 @@ series = result.scalar_one()
# episodes already loaded, no additional queries
```
## Data Migration from Legacy Files
### Background
The application previously stored series metadata in individual `data` files (no extension) in each anime folder. The database layer now provides centralized storage with the `DataMigrationService` handling the transition.
### Migration Service
The migration service (`src/server/services/data_migration_service.py`) provides:
```python
from src.server.services.data_migration_service import DataMigrationService
service = DataMigrationService()
# Scan for legacy data files
files = await service.check_for_legacy_data_files("/path/to/anime")
# Migrate a single file
success = await service.migrate_data_file_to_db(file_path, db_session)
# Migrate all files at once
result = await service.migrate_all_legacy_data("/path/to/anime", db_session)
# result.migrated, result.skipped, result.failed, result.errors
# Cleanup old files (with backup)
await service.cleanup_migrated_files(files, backup=True)
# Check migration status
status = await service.get_migration_status("/path/to/anime", db_session)
```
### Automatic Migration
Migration runs automatically during application startup in `fastapi_app.py`:
1. Database is initialized
2. Legacy files are detected
3. Files are migrated (duplicates skipped)
4. Results are logged
### AnimeSeriesService Operations
```python
from src.server.database.service import AnimeSeriesService
# Create new series
series = await AnimeSeriesService.create(
db, key="my-anime", name="My Anime",
site="aniworld.to", folder="My Anime (2024)",
episode_dict={"1": [1, 2, 3]}
)
# Lookup by key (primary method)
series = await AnimeSeriesService.get_by_key(db, "my-anime")
# Get all series
all_series = await AnimeSeriesService.get_all(db)
# Update
updated = await AnimeSeriesService.update(
db, series.id, episode_dict={"1": [1, 2, 3, 4]}
)
# Delete
await AnimeSeriesService.delete(db, series.id)
# Sync upsert (for SerieScanner)
AnimeSeriesService.upsert_sync(
sync_db, key="my-anime", name="My Anime",
site="aniworld.to", folder="My Anime (2024)",
episode_dict={"1": [1, 2, 3]}
)
```
### Deprecation Notes
- **File-based storage is deprecated** - use database for new code
- **CLI compatibility** - CLI still uses files (migration on startup handles sync)
- **SerieScanner** - updated to save to database when session provided
## Troubleshooting
### Database not initialized

View File

@@ -241,6 +241,155 @@ class AnimeSeriesService:
.limit(limit)
)
return list(result.scalars().all())
# ==========================================================================
# Sync Methods (for use in non-async contexts like CLI/scanner)
# ==========================================================================
@staticmethod
def create_sync(
db: Session,
key: str,
name: str,
site: str,
folder: str,
description: Optional[str] = None,
status: Optional[str] = None,
total_episodes: Optional[int] = None,
cover_url: Optional[str] = None,
episode_dict: Optional[Dict] = None,
) -> AnimeSeries:
"""Create a new anime series (synchronous version).
Args:
db: Sync database session
key: Unique provider key
name: Series name
site: Provider site URL
folder: Local filesystem path
description: Optional series description
status: Optional series status
total_episodes: Optional total episode count
cover_url: Optional cover image URL
episode_dict: Optional episode dictionary
Returns:
Created AnimeSeries instance
"""
series = AnimeSeries(
key=key,
name=name,
site=site,
folder=folder,
description=description,
status=status,
total_episodes=total_episodes,
cover_url=cover_url,
episode_dict=episode_dict,
)
db.add(series)
db.flush()
db.refresh(series)
logger.info("Created anime series (sync): %s (key=%s)", name, key)
return series
@staticmethod
def get_by_key_sync(db: Session, key: str) -> Optional[AnimeSeries]:
"""Get anime series by provider key (synchronous version).
Args:
db: Sync database session
key: Unique provider key
Returns:
AnimeSeries instance or None if not found
"""
result = db.execute(
select(AnimeSeries).where(AnimeSeries.key == key)
)
return result.scalar_one_or_none()
@staticmethod
def update_sync(
db: Session,
series_id: int,
**kwargs,
) -> Optional[AnimeSeries]:
"""Update anime series (synchronous version).
Args:
db: Sync database session
series_id: Series primary key
**kwargs: Fields to update
Returns:
Updated AnimeSeries instance or None if not found
"""
result = db.execute(
select(AnimeSeries).where(AnimeSeries.id == series_id)
)
series = result.scalar_one_or_none()
if not series:
return None
for key, value in kwargs.items():
if hasattr(series, key):
setattr(series, key, value)
db.flush()
db.refresh(series)
logger.info(
"Updated anime series (sync): %s (id=%s)",
series.name,
series_id
)
return series
@staticmethod
def upsert_sync(
db: Session,
key: str,
name: str,
site: str,
folder: str,
episode_dict: Optional[Dict] = None,
) -> AnimeSeries:
"""Create or update anime series (synchronous version).
Args:
db: Sync database session
key: Unique provider key
name: Series name
site: Provider site URL
folder: Local filesystem path
episode_dict: Optional episode dictionary
Returns:
Created or updated AnimeSeries instance
"""
existing = AnimeSeriesService.get_by_key_sync(db, key)
if existing:
for attr, value in [
("name", name),
("site", site),
("folder", folder),
("episode_dict", episode_dict),
]:
if value is not None:
setattr(existing, attr, value)
db.flush()
db.refresh(existing)
logger.info("Updated anime series (upsert): %s", key)
return existing
else:
return AnimeSeriesService.create_sync(
db,
key=key,
name=name,
site=site,
folder=folder,
episode_dict=episode_dict,
)
# ============================================================================

View File

@@ -51,6 +51,15 @@ async def lifespan(app: FastAPI):
try:
logger.info("Starting FastAPI application...")
# Initialize database
try:
from src.server.database.connection import init_db
await init_db()
logger.info("Database initialized successfully")
except Exception as e:
logger.error("Failed to initialize database: %s", e, exc_info=True)
raise
# Load configuration from config.json and sync with settings
try:
from src.server.services.config_service import get_config_service
@@ -67,6 +76,43 @@ async def lifespan(app: FastAPI):
except Exception as e:
logger.warning("Failed to load config from config.json: %s", e)
# Run legacy data file migration
if settings.anime_directory:
try:
from src.server.database.connection import get_db_session
from src.server.services.data_migration_service import (
DataMigrationService,
)
migration_service = DataMigrationService()
legacy_files = await migration_service \
.check_for_legacy_data_files(settings.anime_directory)
if legacy_files:
logger.info(
"Found %d legacy data file(s) to migrate",
len(legacy_files)
)
async with get_db_session() as db:
result = await migration_service \
.migrate_all_legacy_data(
settings.anime_directory, db
)
logger.info(
"Migration complete: %d migrated, %d skipped, "
"%d failed",
result.migrated,
result.skipped,
result.failed
)
else:
logger.debug("No legacy data files found")
except Exception as e:
# Migration failure should not prevent app startup
logger.warning(
"Legacy data migration check failed: %s", e, exc_info=True
)
# Initialize progress service with event subscription
progress_service = get_progress_service()
ws_service = get_websocket_service()

View File

@@ -0,0 +1,359 @@
"""Data migration service for legacy file-based storage to database.
This module provides functionality to detect and migrate anime series metadata
from legacy 'data' files (no extension) to SQLite database storage.
Classes:
- MigrationResult: Dataclass containing migration statistics
- DataMigrationService: Service for detecting and migrating data files
"""
from __future__ import annotations
import logging
import os
import shutil
from dataclasses import dataclass, field
from datetime import datetime
from typing import List
from sqlalchemy.ext.asyncio import AsyncSession
from src.core.entities.series import Serie
from src.server.database.service import AnimeSeriesService
logger = logging.getLogger(__name__)
@dataclass
class MigrationResult:
"""Result statistics from a migration operation.
Attributes:
total_found: Total number of legacy data files found
migrated: Number of files successfully migrated
failed: Number of files that failed migration
skipped: Number of files skipped (already in database)
errors: List of error messages for failed migrations
"""
total_found: int = 0
migrated: int = 0
failed: int = 0
skipped: int = 0
errors: List[str] = field(default_factory=list)
def __str__(self) -> str:
"""Human-readable summary of migration results."""
return (
f"Migration Result: {self.migrated} migrated, "
f"{self.skipped} skipped, {self.failed} failed "
f"(total: {self.total_found})"
)
class DataMigrationService:
"""Service to migrate legacy data files to database storage.
Legacy data files are JSON files named 'data' (no extension) located
in each anime folder. This service detects these files, reads them
using the Serie.load_from_file() method, and creates corresponding
database entries using AnimeSeriesService.
Example:
>>> migration_service = DataMigrationService()
>>> async with get_db_session() as db:
... result = await migration_service.migrate_all_legacy_data(
... "/path/to/anime", db
... )
... print(result)
Migration Result: 10 migrated, 2 skipped, 0 failed (total: 12)
"""
async def check_for_legacy_data_files(
self, anime_directory: str
) -> List[str]:
"""Scan anime directory for folders containing 'data' files.
Searches all immediate subdirectories for files named 'data'
(no extension), which are the legacy metadata files.
Args:
anime_directory: Base path to anime folders
Returns:
List of absolute paths to found 'data' files
Example:
>>> service = DataMigrationService()
>>> files = await service.check_for_legacy_data_files("/anime")
>>> print(files)
['/anime/Attack on Titan/data', '/anime/Naruto/data']
"""
if not anime_directory or not os.path.isdir(anime_directory):
logger.warning(
"Anime directory does not exist or is invalid: %s",
anime_directory
)
return []
data_files: List[str] = []
try:
entries = os.listdir(anime_directory)
except OSError as error:
logger.error(
"Unable to scan directory %s: %s",
anime_directory,
error
)
return []
for folder_name in entries:
folder_path = os.path.join(anime_directory, folder_name)
# Skip if not a directory
if not os.path.isdir(folder_path):
continue
# Check for 'data' file (no extension)
data_path = os.path.join(folder_path, "data")
if os.path.isfile(data_path):
data_files.append(data_path)
logger.debug("Found legacy data file: %s", data_path)
logger.info(
"Found %d legacy data file(s) in %s",
len(data_files),
anime_directory
)
return data_files
async def migrate_data_file_to_db(
self,
data_file_path: str,
db: AsyncSession,
) -> bool:
"""Migrate a single data file to database.
Reads the legacy data file using Serie.load_from_file() and creates
a corresponding database entry. If the series already exists in the
database (by key), the migration is skipped.
Args:
data_file_path: Path to the 'data' file (no extension)
db: Database session
Returns:
True if migration successful, False otherwise
Raises:
FileNotFoundError: If data file does not exist
ValueError: If data file is corrupted or invalid
"""
if not os.path.isfile(data_file_path):
raise FileNotFoundError(f"Data file not found: {data_file_path}")
try:
# Load serie from legacy file
serie = Serie.load_from_file(data_file_path)
logger.debug(
"Loaded serie from file: %s (key=%s)",
serie.name,
serie.key
)
except Exception as error:
logger.error(
"Failed to load data file %s: %s",
data_file_path,
error
)
raise ValueError(f"Invalid data file: {error}") from error
# Check if series already exists in database
existing = await AnimeSeriesService.get_by_key(db, serie.key)
if existing:
logger.debug(
"Series '%s' already exists in database, skipping",
serie.key
)
return False # Signal that it was skipped, not failed
# Create database entry
try:
await AnimeSeriesService.create(
db,
key=serie.key,
name=serie.name,
site=serie.site,
folder=serie.folder,
episode_dict=serie.episodeDict,
)
await db.commit()
logger.info(
"Successfully migrated series: %s (key=%s)",
serie.name,
serie.key
)
return True
except Exception as error:
await db.rollback()
logger.error(
"Failed to create database entry for %s: %s",
serie.key,
error
)
raise
async def migrate_all_legacy_data(
self,
anime_directory: str,
db: AsyncSession,
) -> MigrationResult:
"""Migrate all legacy data files to database.
Scans the anime directory for all legacy data files and migrates
each one to the database. Errors in individual files do not stop
the entire migration.
Args:
anime_directory: Base path to anime folders
db: Database session
Returns:
MigrationResult with success/failure counts
Example:
>>> service = DataMigrationService()
>>> async with get_db_session() as db:
... result = await service.migrate_all_legacy_data(
... "/anime", db
... )
... if result.failed > 0:
... for error in result.errors:
... print(f"Error: {error}")
"""
result = MigrationResult()
# Find all legacy data files
data_files = await self.check_for_legacy_data_files(anime_directory)
result.total_found = len(data_files)
if not data_files:
logger.info("No legacy data files found to migrate")
return result
logger.info("Starting migration of %d data file(s)", len(data_files))
for data_file_path in data_files:
try:
migrated = await self.migrate_data_file_to_db(
data_file_path, db
)
if migrated:
result.migrated += 1
else:
result.skipped += 1 # Already exists in DB
except FileNotFoundError:
result.failed += 1
error_msg = "File not found: %s" % data_file_path
result.errors.append(error_msg)
logger.error(error_msg)
except ValueError as error:
result.failed += 1
error_msg = "Invalid data in %s: %s" % (data_file_path, error)
result.errors.append(error_msg)
logger.error(error_msg)
except Exception as error:
result.failed += 1
error_msg = "Migration failed for %s: %s" % (
data_file_path, error
)
result.errors.append(error_msg)
logger.error(error_msg)
logger.info(str(result))
return result
async def cleanup_migrated_files(
self,
migrated_paths: List[str],
backup: bool = True,
) -> None:
"""Optionally backup and remove migrated data files.
Creates backups of data files before removal (if backup=True).
Backups are stored with a '.backup' extension and timestamp.
Args:
migrated_paths: List of successfully migrated file paths
backup: Whether to create backups before deletion (default: True)
Example:
>>> service = DataMigrationService()
>>> await service.cleanup_migrated_files(
... ["/anime/Show/data"],
... backup=True
... )
# Creates /anime/Show/data.backup.20231115_120000
# Removes /anime/Show/data
"""
if not migrated_paths:
logger.info("No files to clean up")
return
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
for file_path in migrated_paths:
if not os.path.isfile(file_path):
logger.warning("File no longer exists: %s", file_path)
continue
try:
if backup:
backup_path = "%s.backup.%s" % (file_path, timestamp)
shutil.copy2(file_path, backup_path)
logger.debug("Created backup: %s", backup_path)
os.remove(file_path)
logger.info("Removed migrated file: %s", file_path)
except OSError as error:
logger.error("Failed to clean up %s: %s", file_path, error)
async def get_migration_status(
self,
anime_directory: str,
db: AsyncSession,
) -> dict:
"""Get current migration status.
Provides statistics about legacy files vs database entries,
useful for monitoring migration progress.
Args:
anime_directory: Base path to anime folders
db: Database session
Returns:
Dictionary with migration status information
"""
legacy_files = await self.check_for_legacy_data_files(anime_directory)
db_series = await AnimeSeriesService.get_all(db)
# Build sets of keys for comparison
legacy_keys: set = set()
for file_path in legacy_files:
try:
serie = Serie.load_from_file(file_path)
legacy_keys.add(serie.key)
except Exception:
pass # Skip invalid files
db_keys = {s.key for s in db_series}
return {
"legacy_files_count": len(legacy_files),
"database_entries_count": len(db_series),
"only_in_files": list(legacy_keys - db_keys),
"only_in_database": list(db_keys - legacy_keys),
"in_both": list(legacy_keys & db_keys),
"migration_complete": len(legacy_keys - db_keys) == 0,
}