feat: add legacy key/data file migration to database

- Add migration_legacy_files_completed flag to SystemSettings model
- Create legacy_file_migration service to migrate series from key/data files
- Integrate legacy migration into initialization_service startup flow
- Add integration tests for legacy file migration
- Update DATABASE.md documentation with migration details
- Fix various test and service issues (nfo_repair, tmdb_client, download_service)
- Add test_database_schema unit tests
This commit is contained in:
2026-05-26 17:44:42 +02:00
parent 50a77976d5
commit cbd53ef2a0
18 changed files with 1274 additions and 89 deletions

View File

@@ -1122,6 +1122,7 @@ class DownloadService:
item.status = DownloadStatus.PENDING
item.error = None
item.progress = None
item.retry_count += 1
self._add_to_pending_queue(item)
retried_ids.append(item.id)

View File

@@ -7,6 +7,7 @@ import structlog
from src.config.settings import settings
from src.server.services.anime_service import sync_series_from_data_files
from src.server.services.legacy_file_migration import migrate_series_from_files_to_db
logger = structlog.get_logger(__name__)
@@ -99,6 +100,57 @@ async def _mark_initial_scan_completed() -> None:
)
async def _check_legacy_migration_status() -> bool:
"""Check if legacy key/data file migration has been completed.
Returns:
bool: True if migration was completed, False otherwise
"""
return await _check_scan_status(
check_method=lambda svc, db: svc.is_migration_legacy_files_completed(db),
scan_type="legacy_migration",
log_completed_msg="Legacy file migration already completed, skipping",
log_not_completed_msg="Legacy file migration not yet run, will check for files"
)
async def _mark_legacy_migration_completed() -> None:
"""Mark the legacy file migration as completed in system settings."""
await _mark_scan_completed(
mark_method=lambda svc, db: svc.mark_migration_legacy_files_completed(db),
scan_type="legacy_migration"
)
async def _migrate_legacy_files() -> int:
"""Migrate series from legacy key/data files to database.
Returns:
int: Number of series migrated
"""
from src.server.database.connection import get_db_session
logger.info("Checking for legacy key/data files to migrate...")
try:
async with get_db_session() as db:
migrated_count = await migrate_series_from_files_to_db(
settings.anime_directory,
db
)
if migrated_count > 0:
logger.info("Migrated %d series from legacy files", migrated_count)
else:
logger.info("No series found in legacy files to migrate")
return migrated_count
except Exception as e:
logger.warning("Failed to migrate legacy files: %s", e)
return 0
async def _sync_anime_folders(progress_service=None) -> int:
"""Scan anime folders and sync series to database.
@@ -181,18 +233,19 @@ async def _validate_anime_directory(progress_service=None) -> bool:
async def perform_initial_setup(progress_service=None):
"""Perform initial setup including series sync and scan completion marking.
This function is called both during application lifespan startup
and when the setup endpoint is completed. It ensures that:
1. Series are synced from data files to database
2. Initial scan is marked as completed
3. Series are loaded into memory
4. NFO scan is performed if configured
5. Media scan is performed
1. Legacy key/data files are migrated to database (one-time)
2. Series are synced from data files to database
3. Initial scan is marked as completed
4. Series are loaded into memory
5. NFO scan is performed if configured
6. Media scan is performed
Args:
progress_service: Optional ProgressService for emitting updates
Returns:
bool: True if initialization was performed, False if skipped
"""
@@ -225,17 +278,23 @@ async def perform_initial_setup(progress_service=None):
# Perform the actual initialization
try:
# First, run legacy file migration if needed (independent of initial scan)
is_legacy_migration_done = await _check_legacy_migration_status()
if not is_legacy_migration_done:
await _migrate_legacy_files()
await _mark_legacy_migration_completed()
# Sync series from anime folders to database
await _sync_anime_folders(progress_service)
# Mark the initial scan as completed
await _mark_initial_scan_completed()
# Load series into memory from database
await _load_series_into_memory(progress_service)
return True
except (OSError, RuntimeError, ValueError) as e:
logger.warning("Failed to perform initial setup: %s", e)
return False

View File

@@ -0,0 +1,233 @@
"""One-time migration service for legacy key and data files.
This module provides functionality to migrate series data from legacy
file-based storage (key/data files) to the database. The migration is
designed to be idempotent and run only once per environment.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from typing import Optional
import structlog
from sqlalchemy.ext.asyncio import AsyncSession
logger = structlog.get_logger(__name__)
async def migrate_series_from_files_to_db(
anime_dir: str,
db: AsyncSession,
) -> int:
"""Migrate series from legacy key/data files to database.
Scans for folders containing legacy 'key' or 'data' files and imports
any series not already in the database. The DB version wins if a series
exists in both places.
Args:
anime_dir: Path to the anime directory
db: Database session
Returns:
Number of series imported
"""
from src.server.database.service import AnimeSeriesService, EpisodeService
if not anime_dir or not os.path.isdir(anime_dir):
logger.warning(
"Anime directory does not exist, skipping legacy migration",
anime_dir=anime_dir
)
return 0
migrated_count = 0
scanned_count = 0
try:
for folder_name in os.listdir(anime_dir):
folder_path = os.path.join(anime_dir, folder_name)
if not os.path.isdir(folder_path):
continue
scanned_count += 1
# Check for 'key' file (single line with series key)
key_file = os.path.join(folder_path, "key")
# Check for 'data' file (JSON with series metadata)
data_file = os.path.join(folder_path, "data")
series_data: Optional[dict] = None
# Try to load from 'data' file first (more complete)
if os.path.isfile(data_file):
series_data = _load_data_file(data_file)
elif os.path.isfile(key_file):
# Fall back to 'key' file - just the key, need to infer other data
series_data = _load_key_file(key_file, folder_name)
if series_data is None:
continue
key = series_data.get("key")
if not key:
logger.warning(
"Skipping folder with no valid key",
folder=folder_name
)
continue
# Check if already in DB
existing = await AnimeSeriesService.get_by_key(db, key)
if existing:
logger.debug(
"Series already in database, skipping",
key=key,
folder=folder_name
)
continue
# Create the series in DB
try:
name = series_data.get("name") or folder_name
site = series_data.get("site", "https://aniworld.to")
folder = series_data.get("folder", folder_name)
year = series_data.get("year")
anime_series = await AnimeSeriesService.create(
db=db,
key=key,
name=name,
site=site,
folder=folder,
year=year,
)
# Create episodes if present
episode_dict = series_data.get("episodeDict", {})
if episode_dict:
for season, episode_numbers in episode_dict.items():
for episode_number in episode_numbers:
await EpisodeService.create(
db=db,
series_id=anime_series.id,
season=season,
episode_number=episode_number,
)
migrated_count += 1
logger.info(
"Migrated series from legacy file",
key=key,
name=name,
folder=folder_name
)
except Exception as e:
logger.warning(
"Failed to migrate series from legacy file",
key=key,
folder=folder_name,
error=str(e)
)
except Exception as e:
logger.error(
"Legacy migration failed",
anime_dir=anime_dir,
error=str(e),
exc_info=True
)
logger.info(
"Legacy file migration complete",
scanned_folders=scanned_count,
migrated=migrated_count
)
return migrated_count
def _load_data_file(data_file_path: str) -> Optional[dict]:
"""Load and parse a legacy 'data' file (JSON).
Args:
data_file_path: Path to the data file
Returns:
Parsed data dict or None if parsing fails
"""
try:
with open(data_file_path, "r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, dict):
logger.warning(
"Data file is not a dictionary",
file=data_file_path
)
return None
# Ensure episodeDict has int keys
if "episodeDict" in data and isinstance(data["episodeDict"], dict):
data["episodeDict"] = {
int(k): v for k, v in data["episodeDict"].items()
}
return data
except json.JSONDecodeError as e:
logger.warning(
"Failed to parse legacy data file (JSON error)",
file=data_file_path,
error=str(e)
)
return None
except Exception as e:
logger.warning(
"Failed to read legacy data file",
file=data_file_path,
error=str(e)
)
return None
def _load_key_file(key_file_path: str, folder_name: str) -> Optional[dict]:
"""Load a legacy 'key' file (single line with series key).
Args:
key_file_path: Path to the key file
folder_name: Folder name to use as fallback name
Returns:
Data dict with key and inferred fields, or None if loading fails
"""
try:
with open(key_file_path, "r", encoding="utf-8") as f:
key = f.read().strip()
if not key:
logger.warning(
"Key file is empty",
file=key_file_path
)
return None
# Infer basic data from key file
return {
"key": key,
"name": folder_name,
"site": "https://aniworld.to",
"folder": folder_name,
"episodeDict": {},
}
except Exception as e:
logger.warning(
"Failed to read legacy key file",
file=key_file_path,
error=str(e)
)
return None