"""Data migration service for migrating file-based storage to database. This module provides functionality to migrate anime series data from legacy file-based storage (data files without .json extension) to the SQLite database using the AnimeSeries model. The migration service: - Scans anime directories for existing data files - Reads Serie objects from data files - Migrates them to the database using AnimeSeriesService - Handles errors gracefully without stopping the migration - Provides detailed migration results """ from __future__ import annotations import logging from dataclasses import dataclass, field from pathlib import Path from typing import List, Optional from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.exc import IntegrityError from src.core.entities.series import Serie from src.server.database.service import AnimeSeriesService logger = logging.getLogger(__name__) @dataclass class MigrationResult: """Result of a data file migration operation. Attributes: total_found: Total number of data files found migrated: Number of files successfully migrated skipped: Number of files skipped (already in database) failed: Number of files that failed to migrate errors: List of error messages encountered """ total_found: int = 0 migrated: int = 0 skipped: int = 0 failed: int = 0 errors: List[str] = field(default_factory=list) def __post_init__(self): """Ensure errors is always a list.""" if self.errors is None: self.errors = [] class DataMigrationError(Exception): """Base exception for data migration errors.""" class DataFileReadError(DataMigrationError): """Raised when a data file cannot be read.""" class DataMigrationService: """Service for migrating data files to database. This service handles the migration of anime series data from file-based storage to the database. It scans directories for data files, reads Serie objects, and creates AnimeSeries records. Example: ```python service = DataMigrationService() # Check if migration is needed if await service.is_migration_needed("/path/to/anime"): async with get_db_session() as db: result = await service.migrate_all("/path/to/anime", db) print(f"Migrated {result.migrated} series") ``` """ def __init__(self) -> None: """Initialize the data migration service.""" pass def scan_for_data_files(self, anime_directory: str) -> List[Path]: """Scan for data files in the anime directory. Finds all 'data' files (JSON format without extension) in the anime directory structure. Each series folder may contain a 'data' file with series metadata. Args: anime_directory: Path to the anime directory containing series folders Returns: List of Path objects pointing to data files Raises: ValueError: If anime_directory is invalid """ if not anime_directory or not anime_directory.strip(): logger.warning("Empty anime directory provided") return [] base_path = Path(anime_directory) if not base_path.exists(): logger.warning( "Anime directory does not exist: %s", anime_directory ) return [] if not base_path.is_dir(): logger.warning( "Anime directory is not a directory: %s", anime_directory ) return [] data_files: List[Path] = [] try: # Iterate through all subdirectories (series folders) for folder in base_path.iterdir(): if not folder.is_dir(): continue # Check for 'data' file in each series folder data_file = folder / "data" if data_file.exists() and data_file.is_file(): data_files.append(data_file) logger.debug("Found data file: %s", data_file) except PermissionError as e: logger.error( "Permission denied scanning directory %s: %s", anime_directory, e ) except OSError as e: logger.error( "OS error scanning directory %s: %s", anime_directory, e ) logger.info( "Found %d data files in %s", len(data_files), anime_directory ) return data_files def _read_data_file(self, data_path: Path) -> Optional[Serie]: """Read a Serie object from a data file. Args: data_path: Path to the data file Returns: Serie object if successfully read, None otherwise Raises: DataFileReadError: If the file cannot be read or parsed """ try: serie = Serie.load_from_file(str(data_path)) # Validate the serie has required fields if not serie.key or not serie.key.strip(): raise DataFileReadError( f"Data file {data_path} has empty or missing key" ) logger.debug( "Successfully read serie '%s' from %s", serie.key, data_path ) return serie except FileNotFoundError as e: raise DataFileReadError( f"Data file not found: {data_path}" ) from e except PermissionError as e: raise DataFileReadError( f"Permission denied reading data file: {data_path}" ) from e except (ValueError, KeyError, TypeError) as e: raise DataFileReadError( f"Invalid data in file {data_path}: {e}" ) from e except Exception as e: raise DataFileReadError( f"Error reading data file {data_path}: {e}" ) from e async def migrate_data_file( self, data_path: Path, db: AsyncSession ) -> bool: """Migrate a single data file to the database. Reads the data file, checks if the series already exists in the database, and creates a new record if it doesn't exist. If the series exists, optionally updates the episode_dict if changed. Args: data_path: Path to the data file db: Async database session Returns: True if the series was migrated (created or updated), False if skipped (already exists with same data) Raises: DataFileReadError: If the file cannot be read DataMigrationError: If database operation fails """ # Read the data file serie = self._read_data_file(data_path) if serie is None: raise DataFileReadError(f"Could not read data file: {data_path}") # Check if series already exists in database existing = await AnimeSeriesService.get_by_key(db, serie.key) if existing is not None: # Check if episode_dict has changed existing_dict = existing.episode_dict or {} new_dict = serie.episodeDict or {} # Convert keys to strings for comparison (JSON stores keys as strings) new_dict_str_keys = { str(k): v for k, v in new_dict.items() } if existing_dict == new_dict_str_keys: logger.debug( "Series '%s' already exists with same data, skipping", serie.key ) return False # Update episode_dict if different await AnimeSeriesService.update( db, existing.id, episode_dict=new_dict_str_keys ) logger.info( "Updated episode_dict for existing series '%s'", serie.key ) return True # Create new series in database try: # Convert episode_dict keys to strings for JSON storage episode_dict_for_db = { str(k): v for k, v in (serie.episodeDict or {}).items() } await AnimeSeriesService.create( db, key=serie.key, name=serie.name, site=serie.site, folder=serie.folder, episode_dict=episode_dict_for_db, ) logger.info( "Migrated series '%s' to database", serie.key ) return True except IntegrityError as e: # Race condition - series was created by another process logger.warning( "Series '%s' was already created (race condition): %s", serie.key, e ) return False except Exception as e: raise DataMigrationError( f"Failed to create series '{serie.key}' in database: {e}" ) from e async def migrate_all( self, anime_directory: str, db: AsyncSession ) -> MigrationResult: """Migrate all data files from anime directory to database. Scans the anime directory for data files and migrates each one to the database. Errors are logged but do not stop the migration. Args: anime_directory: Path to the anime directory db: Async database session Returns: MigrationResult with counts and error messages """ result = MigrationResult() # Scan for data files data_files = self.scan_for_data_files(anime_directory) result.total_found = len(data_files) if result.total_found == 0: logger.info("No data files found to migrate") return result logger.info( "Starting migration of %d data files", result.total_found ) # Migrate each file for data_path in data_files: try: migrated = await self.migrate_data_file(data_path, db) if migrated: result.migrated += 1 else: result.skipped += 1 except DataFileReadError as e: result.failed += 1 error_msg = f"Failed to read {data_path}: {e}" result.errors.append(error_msg) logger.error(error_msg) except DataMigrationError as e: result.failed += 1 error_msg = f"Failed to migrate {data_path}: {e}" result.errors.append(error_msg) logger.error(error_msg) except Exception as e: result.failed += 1 error_msg = f"Unexpected error migrating {data_path}: {e}" result.errors.append(error_msg) logger.exception(error_msg) # Commit all changes try: await db.commit() except Exception as e: logger.error("Failed to commit migration: %s", e) result.errors.append(f"Failed to commit migration: {e}") logger.info( "Migration complete: %d migrated, %d skipped, %d failed", result.migrated, result.skipped, result.failed ) return result def is_migration_needed(self, anime_directory: str) -> bool: """Check if there are data files to migrate. Args: anime_directory: Path to the anime directory Returns: True if data files exist, False otherwise """ data_files = self.scan_for_data_files(anime_directory) needs_migration = len(data_files) > 0 if needs_migration: logger.info( "Migration needed: found %d data files", len(data_files) ) else: logger.debug("No migration needed: no data files found") return needs_migration # Singleton instance for the service _data_migration_service: Optional[DataMigrationService] = None def get_data_migration_service() -> DataMigrationService: """Get the singleton data migration service instance. Returns: DataMigrationService instance """ global _data_migration_service if _data_migration_service is None: _data_migration_service = DataMigrationService() return _data_migration_service def reset_data_migration_service() -> None: """Reset the singleton service instance (for testing).""" global _data_migration_service _data_migration_service = None