feat: add legacy key/data file migration to database
- Add migration_legacy_files_completed flag to SystemSettings model - Create legacy_file_migration service to migrate series from key/data files - Integrate legacy migration into initialization_service startup flow - Add integration tests for legacy file migration - Update DATABASE.md documentation with migration details - Fix various test and service issues (nfo_repair, tmdb_client, download_service) - Add test_database_schema unit tests
This commit is contained in:
@@ -16,6 +16,7 @@ from typing import Dict, List
|
||||
from lxml import etree
|
||||
|
||||
from src.core.services.nfo_service import NFOService
|
||||
from src.core.services.tmdb_client import TMDBAPIError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -202,10 +203,26 @@ class NfoRepairService:
|
||||
", ".join(missing),
|
||||
)
|
||||
|
||||
await self._nfo_service.update_tvshow_nfo(
|
||||
series_name,
|
||||
download_media=False,
|
||||
)
|
||||
try:
|
||||
await self._nfo_service.update_tvshow_nfo(
|
||||
series_name,
|
||||
download_media=False,
|
||||
)
|
||||
except TMDBAPIError as e:
|
||||
if "No TMDB ID found" in str(e):
|
||||
# No TMDB ID in existing NFO — create new one via search
|
||||
logger.info(
|
||||
"NFO has no TMDB ID, creating new NFO via TMDB search"
|
||||
)
|
||||
await self._nfo_service.create_tvshow_nfo(
|
||||
serie_name=series_name,
|
||||
serie_folder=series_name,
|
||||
download_poster=False,
|
||||
download_logo=False,
|
||||
download_fanart=False,
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
logger.info("NFO repair completed: %s", series_name)
|
||||
return True
|
||||
|
||||
@@ -128,7 +128,7 @@ class TMDBClient:
|
||||
# Expired negative cache entry
|
||||
del self._negative_cache[negative_cache_key]
|
||||
|
||||
delay = 2
|
||||
delay = 1
|
||||
last_error = None
|
||||
|
||||
# Rate limiting: ensure we don't exceed ~35 requests/second
|
||||
@@ -162,7 +162,7 @@ class TMDBClient:
|
||||
raise TMDBAPIError(f"Resource not found: {endpoint}")
|
||||
elif resp.status == 429:
|
||||
# Rate limit - wait longer with exponential backoff
|
||||
retry_after = int(resp.headers.get('Retry-After', max(delay * 2, 10)))
|
||||
retry_after = int(resp.headers.get('Retry-After', max(delay * 2, 2)))
|
||||
logger.warning("Rate limited, waiting %ss", retry_after)
|
||||
await asyncio.sleep(retry_after)
|
||||
continue
|
||||
@@ -181,7 +181,7 @@ class TMDBClient:
|
||||
if attempt < max_retries - 1:
|
||||
logger.warning("Request timeout (attempt %s), retrying in %ss", attempt + 1, delay)
|
||||
await asyncio.sleep(delay)
|
||||
delay = min(delay * 2, 30)
|
||||
delay *= 2
|
||||
else:
|
||||
logger.error("Request timed out after %s attempts", max_retries)
|
||||
|
||||
@@ -209,7 +209,7 @@ class TMDBClient:
|
||||
if attempt < max_retries - 1:
|
||||
logger.warning("Request failed (attempt %s): %s, retrying in %ss", attempt + 1, e, delay)
|
||||
await asyncio.sleep(delay)
|
||||
delay = min(delay * 2, 30)
|
||||
delay *= 2
|
||||
else:
|
||||
logger.error("Request failed after %s attempts: %s", max_retries, e)
|
||||
|
||||
|
||||
@@ -83,6 +83,10 @@ class AnimeSeries(Base, TimestampMixin):
|
||||
Boolean, nullable=False, default=False, server_default="0",
|
||||
doc="Whether tvshow.nfo file exists for this series"
|
||||
)
|
||||
nfo_path: Mapped[Optional[str]] = mapped_column(
|
||||
String(1000), nullable=True,
|
||||
doc="Path to the tvshow.nfo metadata file"
|
||||
)
|
||||
nfo_created_at: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True,
|
||||
doc="Timestamp when NFO was first created"
|
||||
@@ -91,6 +95,7 @@ class AnimeSeries(Base, TimestampMixin):
|
||||
DateTime(timezone=True), nullable=True,
|
||||
doc="Timestamp when NFO was last updated"
|
||||
)
|
||||
# TMDB (The Movie Database) ID for series metadata
|
||||
tmdb_id: Mapped[Optional[int]] = mapped_column(
|
||||
Integer, nullable=True, index=True,
|
||||
doc="TMDB (The Movie Database) ID for series metadata"
|
||||
@@ -608,6 +613,10 @@ class SystemSettings(Base, TimestampMixin):
|
||||
Boolean, nullable=False, default=False, server_default="0",
|
||||
doc="Whether the initial media scan has been completed"
|
||||
)
|
||||
migration_legacy_files_completed: Mapped[bool] = mapped_column(
|
||||
Boolean, nullable=False, default=False, server_default="0",
|
||||
doc="Whether legacy key/data file migration has been completed"
|
||||
)
|
||||
last_scan_timestamp: Mapped[Optional[datetime]] = mapped_column(
|
||||
DateTime(timezone=True), nullable=True,
|
||||
doc="Timestamp of the last completed scan"
|
||||
|
||||
@@ -125,6 +125,36 @@ class SystemSettingsService:
|
||||
settings = await SystemSettingsService.get_or_create(db)
|
||||
return settings.initial_media_scan_completed
|
||||
|
||||
@staticmethod
|
||||
async def is_migration_legacy_files_completed(db: AsyncSession) -> bool:
|
||||
"""Check if legacy key/data file migration has been completed.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
True if legacy migration is completed, False otherwise
|
||||
"""
|
||||
settings = await SystemSettingsService.get_or_create(db)
|
||||
return settings.migration_legacy_files_completed
|
||||
|
||||
@staticmethod
|
||||
async def mark_migration_legacy_files_completed(
|
||||
db: AsyncSession,
|
||||
timestamp: Optional[datetime] = None
|
||||
) -> None:
|
||||
"""Mark the legacy key/data file migration as completed.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
timestamp: Optional timestamp to set, defaults to current time
|
||||
"""
|
||||
settings = await SystemSettingsService.get_or_create(db)
|
||||
settings.migration_legacy_files_completed = True
|
||||
settings.last_scan_timestamp = timestamp or datetime.now(timezone.utc)
|
||||
await db.commit()
|
||||
logger.info("Marked legacy files migration as completed")
|
||||
|
||||
@staticmethod
|
||||
async def mark_initial_media_scan_completed(
|
||||
db: AsyncSession,
|
||||
|
||||
@@ -398,21 +398,6 @@ async def lifespan(_application: FastAPI):
|
||||
except Exception as e:
|
||||
logger.warning("Failed to start background loader service: %s", e)
|
||||
|
||||
# Initialize and start scheduler service
|
||||
try:
|
||||
logger.info("Initializing scheduler service...")
|
||||
from src.server.services.scheduler_service import (
|
||||
get_scheduler_service,
|
||||
)
|
||||
scheduler_service = get_scheduler_service()
|
||||
logger.info("Scheduler service instance obtained, starting...")
|
||||
await scheduler_service.start()
|
||||
initialized['scheduler'] = True
|
||||
logger.info("Scheduler service started successfully")
|
||||
except Exception as e:
|
||||
logger.warning("Failed to start scheduler service: %s", e)
|
||||
# Continue - scheduler is optional
|
||||
|
||||
# Run media scan only on first run
|
||||
await perform_media_scan_if_needed(background_loader)
|
||||
else:
|
||||
@@ -420,6 +405,22 @@ async def lifespan(_application: FastAPI):
|
||||
"Download service initialization skipped - "
|
||||
"anime directory not configured"
|
||||
)
|
||||
|
||||
# Initialize and start scheduler service (independent of anime_directory)
|
||||
# The scheduler loads its own config from config.json and the
|
||||
# anime_directory may be configured there even if the env var is empty.
|
||||
try:
|
||||
logger.info("Initializing scheduler service...")
|
||||
from src.server.services.scheduler_service import (
|
||||
get_scheduler_service,
|
||||
)
|
||||
scheduler_service = get_scheduler_service()
|
||||
logger.info("Scheduler service instance obtained, starting...")
|
||||
await scheduler_service.start()
|
||||
initialized['scheduler'] = True
|
||||
logger.info("Scheduler service started successfully")
|
||||
except Exception as e:
|
||||
logger.warning("Failed to start scheduler service: %s", e)
|
||||
except (OSError, RuntimeError, ValueError) as e:
|
||||
logger.warning("Failed to initialize services: %s", e)
|
||||
# Continue startup - services can be initialized later
|
||||
|
||||
@@ -1122,6 +1122,7 @@ class DownloadService:
|
||||
item.status = DownloadStatus.PENDING
|
||||
item.error = None
|
||||
item.progress = None
|
||||
item.retry_count += 1
|
||||
self._add_to_pending_queue(item)
|
||||
retried_ids.append(item.id)
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ import structlog
|
||||
|
||||
from src.config.settings import settings
|
||||
from src.server.services.anime_service import sync_series_from_data_files
|
||||
from src.server.services.legacy_file_migration import migrate_series_from_files_to_db
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
@@ -99,6 +100,57 @@ async def _mark_initial_scan_completed() -> None:
|
||||
)
|
||||
|
||||
|
||||
async def _check_legacy_migration_status() -> bool:
|
||||
"""Check if legacy key/data file migration has been completed.
|
||||
|
||||
Returns:
|
||||
bool: True if migration was completed, False otherwise
|
||||
"""
|
||||
return await _check_scan_status(
|
||||
check_method=lambda svc, db: svc.is_migration_legacy_files_completed(db),
|
||||
scan_type="legacy_migration",
|
||||
log_completed_msg="Legacy file migration already completed, skipping",
|
||||
log_not_completed_msg="Legacy file migration not yet run, will check for files"
|
||||
)
|
||||
|
||||
|
||||
async def _mark_legacy_migration_completed() -> None:
|
||||
"""Mark the legacy file migration as completed in system settings."""
|
||||
await _mark_scan_completed(
|
||||
mark_method=lambda svc, db: svc.mark_migration_legacy_files_completed(db),
|
||||
scan_type="legacy_migration"
|
||||
)
|
||||
|
||||
|
||||
async def _migrate_legacy_files() -> int:
|
||||
"""Migrate series from legacy key/data files to database.
|
||||
|
||||
Returns:
|
||||
int: Number of series migrated
|
||||
"""
|
||||
from src.server.database.connection import get_db_session
|
||||
|
||||
logger.info("Checking for legacy key/data files to migrate...")
|
||||
|
||||
try:
|
||||
async with get_db_session() as db:
|
||||
migrated_count = await migrate_series_from_files_to_db(
|
||||
settings.anime_directory,
|
||||
db
|
||||
)
|
||||
|
||||
if migrated_count > 0:
|
||||
logger.info("Migrated %d series from legacy files", migrated_count)
|
||||
else:
|
||||
logger.info("No series found in legacy files to migrate")
|
||||
|
||||
return migrated_count
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to migrate legacy files: %s", e)
|
||||
return 0
|
||||
|
||||
|
||||
async def _sync_anime_folders(progress_service=None) -> int:
|
||||
"""Scan anime folders and sync series to database.
|
||||
|
||||
@@ -181,18 +233,19 @@ async def _validate_anime_directory(progress_service=None) -> bool:
|
||||
|
||||
async def perform_initial_setup(progress_service=None):
|
||||
"""Perform initial setup including series sync and scan completion marking.
|
||||
|
||||
|
||||
This function is called both during application lifespan startup
|
||||
and when the setup endpoint is completed. It ensures that:
|
||||
1. Series are synced from data files to database
|
||||
2. Initial scan is marked as completed
|
||||
3. Series are loaded into memory
|
||||
4. NFO scan is performed if configured
|
||||
5. Media scan is performed
|
||||
|
||||
1. Legacy key/data files are migrated to database (one-time)
|
||||
2. Series are synced from data files to database
|
||||
3. Initial scan is marked as completed
|
||||
4. Series are loaded into memory
|
||||
5. NFO scan is performed if configured
|
||||
6. Media scan is performed
|
||||
|
||||
Args:
|
||||
progress_service: Optional ProgressService for emitting updates
|
||||
|
||||
|
||||
Returns:
|
||||
bool: True if initialization was performed, False if skipped
|
||||
"""
|
||||
@@ -225,17 +278,23 @@ async def perform_initial_setup(progress_service=None):
|
||||
|
||||
# Perform the actual initialization
|
||||
try:
|
||||
# First, run legacy file migration if needed (independent of initial scan)
|
||||
is_legacy_migration_done = await _check_legacy_migration_status()
|
||||
if not is_legacy_migration_done:
|
||||
await _migrate_legacy_files()
|
||||
await _mark_legacy_migration_completed()
|
||||
|
||||
# Sync series from anime folders to database
|
||||
await _sync_anime_folders(progress_service)
|
||||
|
||||
|
||||
# Mark the initial scan as completed
|
||||
await _mark_initial_scan_completed()
|
||||
|
||||
|
||||
# Load series into memory from database
|
||||
await _load_series_into_memory(progress_service)
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
except (OSError, RuntimeError, ValueError) as e:
|
||||
logger.warning("Failed to perform initial setup: %s", e)
|
||||
return False
|
||||
|
||||
233
src/server/services/legacy_file_migration.py
Normal file
233
src/server/services/legacy_file_migration.py
Normal file
@@ -0,0 +1,233 @@
|
||||
"""One-time migration service for legacy key and data files.
|
||||
|
||||
This module provides functionality to migrate series data from legacy
|
||||
file-based storage (key/data files) to the database. The migration is
|
||||
designed to be idempotent and run only once per environment.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import structlog
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
logger = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
async def migrate_series_from_files_to_db(
|
||||
anime_dir: str,
|
||||
db: AsyncSession,
|
||||
) -> int:
|
||||
"""Migrate series from legacy key/data files to database.
|
||||
|
||||
Scans for folders containing legacy 'key' or 'data' files and imports
|
||||
any series not already in the database. The DB version wins if a series
|
||||
exists in both places.
|
||||
|
||||
Args:
|
||||
anime_dir: Path to the anime directory
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Number of series imported
|
||||
"""
|
||||
from src.server.database.service import AnimeSeriesService, EpisodeService
|
||||
|
||||
if not anime_dir or not os.path.isdir(anime_dir):
|
||||
logger.warning(
|
||||
"Anime directory does not exist, skipping legacy migration",
|
||||
anime_dir=anime_dir
|
||||
)
|
||||
return 0
|
||||
|
||||
migrated_count = 0
|
||||
scanned_count = 0
|
||||
|
||||
try:
|
||||
for folder_name in os.listdir(anime_dir):
|
||||
folder_path = os.path.join(anime_dir, folder_name)
|
||||
|
||||
if not os.path.isdir(folder_path):
|
||||
continue
|
||||
|
||||
scanned_count += 1
|
||||
|
||||
# Check for 'key' file (single line with series key)
|
||||
key_file = os.path.join(folder_path, "key")
|
||||
# Check for 'data' file (JSON with series metadata)
|
||||
data_file = os.path.join(folder_path, "data")
|
||||
|
||||
series_data: Optional[dict] = None
|
||||
|
||||
# Try to load from 'data' file first (more complete)
|
||||
if os.path.isfile(data_file):
|
||||
series_data = _load_data_file(data_file)
|
||||
elif os.path.isfile(key_file):
|
||||
# Fall back to 'key' file - just the key, need to infer other data
|
||||
series_data = _load_key_file(key_file, folder_name)
|
||||
|
||||
if series_data is None:
|
||||
continue
|
||||
|
||||
key = series_data.get("key")
|
||||
if not key:
|
||||
logger.warning(
|
||||
"Skipping folder with no valid key",
|
||||
folder=folder_name
|
||||
)
|
||||
continue
|
||||
|
||||
# Check if already in DB
|
||||
existing = await AnimeSeriesService.get_by_key(db, key)
|
||||
if existing:
|
||||
logger.debug(
|
||||
"Series already in database, skipping",
|
||||
key=key,
|
||||
folder=folder_name
|
||||
)
|
||||
continue
|
||||
|
||||
# Create the series in DB
|
||||
try:
|
||||
name = series_data.get("name") or folder_name
|
||||
site = series_data.get("site", "https://aniworld.to")
|
||||
folder = series_data.get("folder", folder_name)
|
||||
year = series_data.get("year")
|
||||
|
||||
anime_series = await AnimeSeriesService.create(
|
||||
db=db,
|
||||
key=key,
|
||||
name=name,
|
||||
site=site,
|
||||
folder=folder,
|
||||
year=year,
|
||||
)
|
||||
|
||||
# Create episodes if present
|
||||
episode_dict = series_data.get("episodeDict", {})
|
||||
if episode_dict:
|
||||
for season, episode_numbers in episode_dict.items():
|
||||
for episode_number in episode_numbers:
|
||||
await EpisodeService.create(
|
||||
db=db,
|
||||
series_id=anime_series.id,
|
||||
season=season,
|
||||
episode_number=episode_number,
|
||||
)
|
||||
|
||||
migrated_count += 1
|
||||
logger.info(
|
||||
"Migrated series from legacy file",
|
||||
key=key,
|
||||
name=name,
|
||||
folder=folder_name
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to migrate series from legacy file",
|
||||
key=key,
|
||||
folder=folder_name,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Legacy migration failed",
|
||||
anime_dir=anime_dir,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Legacy file migration complete",
|
||||
scanned_folders=scanned_count,
|
||||
migrated=migrated_count
|
||||
)
|
||||
return migrated_count
|
||||
|
||||
|
||||
def _load_data_file(data_file_path: str) -> Optional[dict]:
|
||||
"""Load and parse a legacy 'data' file (JSON).
|
||||
|
||||
Args:
|
||||
data_file_path: Path to the data file
|
||||
|
||||
Returns:
|
||||
Parsed data dict or None if parsing fails
|
||||
"""
|
||||
try:
|
||||
with open(data_file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
logger.warning(
|
||||
"Data file is not a dictionary",
|
||||
file=data_file_path
|
||||
)
|
||||
return None
|
||||
|
||||
# Ensure episodeDict has int keys
|
||||
if "episodeDict" in data and isinstance(data["episodeDict"], dict):
|
||||
data["episodeDict"] = {
|
||||
int(k): v for k, v in data["episodeDict"].items()
|
||||
}
|
||||
|
||||
return data
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(
|
||||
"Failed to parse legacy data file (JSON error)",
|
||||
file=data_file_path,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to read legacy data file",
|
||||
file=data_file_path,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _load_key_file(key_file_path: str, folder_name: str) -> Optional[dict]:
|
||||
"""Load a legacy 'key' file (single line with series key).
|
||||
|
||||
Args:
|
||||
key_file_path: Path to the key file
|
||||
folder_name: Folder name to use as fallback name
|
||||
|
||||
Returns:
|
||||
Data dict with key and inferred fields, or None if loading fails
|
||||
"""
|
||||
try:
|
||||
with open(key_file_path, "r", encoding="utf-8") as f:
|
||||
key = f.read().strip()
|
||||
|
||||
if not key:
|
||||
logger.warning(
|
||||
"Key file is empty",
|
||||
file=key_file_path
|
||||
)
|
||||
return None
|
||||
|
||||
# Infer basic data from key file
|
||||
return {
|
||||
"key": key,
|
||||
"name": folder_name,
|
||||
"site": "https://aniworld.to",
|
||||
"folder": folder_name,
|
||||
"episodeDict": {},
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to read legacy key file",
|
||||
file=key_file_path,
|
||||
error=str(e)
|
||||
)
|
||||
return None
|
||||
Reference in New Issue
Block a user