"""Setup service for first-time database initialization. This service runs during initial application setup to: 1. Scan anime folders in the data directory 2. Extract title and year from folder names 3. Create AnimeSeries records in the database 4. Resolve provider keys via search (if single match found) The run_once logic is handled by the caller (perform_initial_setup) via _check_initial_scan_status, not by this service itself. """ import os import re from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path from typing import Optional import structlog from src.config.settings import settings from src.server.database.connection import get_db_session from src.server.database.service import AnimeSeriesService, UnresolvedFolderService from src.server.utils.dependencies import get_series_app logger = structlog.get_logger(__name__) @dataclass class SeriesProperties: """Filesystem-derived properties for an AnimeSeries.""" has_nfo: bool = False nfo_path: Optional[str] = None nfo_created_at: Optional[datetime] = None nfo_updated_at: Optional[datetime] = None logo_loaded: bool = False images_loaded: bool = False class SetupService: """Service for setup operations during application initialization.""" @staticmethod def _extract_year_from_folder_name(folder_name: str) -> Optional[int]: """Extract year from folder name if present. Looks for year in format "(YYYY)" at the end of folder name. Args: folder_name: The folder name to parse Returns: Year as integer if found, None otherwise """ if not folder_name: return None match = re.search(r'\((\d{4})\)', folder_name) if match: year = int(match.group(1)) if 1900 <= year <= 2100: return year return None @staticmethod def _extract_title_from_folder_name(folder_name: str) -> str: """Extract title from folder name by removing year suffix. Args: folder_name: The folder name to parse Returns: Title with year suffix and surrounding whitespace removed """ return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip() @staticmethod def _normalize_title(title: str) -> str: """Normalize title for fuzzy matching. Strips common suffixes and lowercases for comparison. Args: title: The title to normalize Returns: Normalized title string """ # Remove common anime suffixes (case-insensitive) suffixes = [ r'\s*\(TV\)\s*$', r'\s*\(Anime\)\s*$', r'\s*\(OAD\)\s*$', r'\s*\(OVA\)\s*$', r'\s*\(Special\)\s*$', r'\s*\(Movie\)\s*$', r'\s*\(Spin-Off\)\s*$', ] normalized = title.lower().strip() for suffix_pattern in suffixes: normalized = re.sub(suffix_pattern, '', normalized, flags=re.IGNORECASE).strip() return normalized @staticmethod def _titles_match(title1: str, title2: str, threshold: float = 0.85) -> bool: """Check if two titles match using fuzzy comparison. Args: title1: First title title2: Second title threshold: Similarity threshold (0.0 to 1.0) Returns: True if titles match within threshold """ norm1 = SetupService._normalize_title(title1) norm2 = SetupService._normalize_title(title2) # Direct match after normalization if norm1 == norm2: return True # Containment check (e.g., "Attack on Titan" in "Attack on Titan (TV)") if norm1 in norm2 or norm2 in norm1: return True # Similarity ratio check using SequenceMatcher from difflib import SequenceMatcher ratio = SequenceMatcher(None, norm1, norm2).ratio() return ratio >= threshold @staticmethod async def _resolve_key_via_search(title: str) -> str: """Resolve provider key by searching for the title. Args: title: The title to search for Returns: Provider key if exactly one match with same name found, empty string otherwise """ if not title: return "" try: series_app = get_series_app() results = await series_app.search(title) if len(results) == 1: result_name = results[0].get('title', '') result_link = results[0].get('link', '') if SetupService._titles_match(result_name, title): if result_link and '/anime/stream/' in result_link: return result_link.split('/anime/stream/')[-1].split('/')[0] elif result_link: # Link is already the key (e.g., "shinobi-no-ittoki") return result_link else: logger.debug( "Series key resolved but link format unexpected", folder_title=title, result_title=result_name, link=result_link ) else: logger.debug( "Series search result title mismatch", folder_title=title, result_title=result_name, link=result_link ) elif len(results) > 1: logger.debug( "Multiple search results for title, skipping fuzzy match", title=title, result_count=len(results) ) except Exception as e: logger.warning( "Provider search failed for folder", title=title, error=str(e) ) return "" @staticmethod def _check_nfo_file(folder_path: Path) -> tuple[bool, Optional[str], Optional[datetime], Optional[datetime]]: """Check if tvshow.nfo exists and return its metadata. Args: folder_path: Path to the series folder Returns: Tuple of (has_nfo, nfo_path, nfo_created_at, nfo_updated_at) """ nfo_path = folder_path / "tvshow.nfo" if nfo_path.is_file(): stat = nfo_path.stat() created = datetime.fromtimestamp(stat.st_ctime, tz=timezone.utc) updated = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc) return True, str(nfo_path), created, updated return False, None, None, None @staticmethod def _check_logo_file(folder_path: Path) -> bool: """Check if logo.png exists. Args: folder_path: Path to the series folder Returns: True if logo.png exists, False otherwise """ return (folder_path / "logo.png").is_file() @staticmethod def _check_image_files(folder_path: Path) -> bool: """Check if any image files (poster, fanart) exist. Args: folder_path: Path to the series folder Returns: True if any poster.jpg/jpeg/png or fanart.jpg/jpeg/png exists """ image_extensions = {'.jpg', '.jpeg', '.png'} for child in folder_path.iterdir(): if child.is_file(): name_lower = child.name.lower() if name_lower.startswith(('poster', 'fanart')) and child.suffix.lower() in image_extensions: return True return False @classmethod def _get_series_properties(cls, folder_path: Path) -> SeriesProperties: """Get all filesystem-derived properties for a series folder. Args: folder_path: Path to the series folder Returns: SeriesProperties with all detected values """ has_nfo, nfo_path, nfo_created_at, nfo_updated_at = cls._check_nfo_file(folder_path) logo_loaded = cls._check_logo_file(folder_path) images_loaded = cls._check_image_files(folder_path) return SeriesProperties( has_nfo=has_nfo, nfo_path=nfo_path, nfo_created_at=nfo_created_at, nfo_updated_at=nfo_updated_at, logo_loaded=logo_loaded, images_loaded=images_loaded, ) @classmethod async def run(cls) -> int: """Run the setup service. Scans anime folders, creates AnimeSeries records, and resolves provider keys via search. Should only be called after checking that initial scan hasn't been completed yet (via _check_initial_scan_status). Returns: Number of new series created """ if not settings.anime_directory: logger.info("Anime directory not configured, skipping setup") return 0 anime_dir = Path(settings.anime_directory) if not anime_dir.is_dir(): logger.info( "Anime directory does not exist, skipping setup: %s", anime_dir ) return 0 logger.info("Running setup service...") created_count = 0 skipped_existing = 0 unresolved_count = 0 try: series_app = get_series_app() async with get_db_session() as db: for folder in anime_dir.iterdir(): if not folder.is_dir(): continue folder_name = folder.name # Check if series already exists in DB existing = await AnimeSeriesService.get_by_folder( db, folder_name ) if existing: skipped_existing += 1 continue # Check if already tracked as unresolved existing_unresolved = await UnresolvedFolderService.get_by_folder_name( db, folder_name ) if existing_unresolved and existing_unresolved.is_resolved: # Was previously unresolved but now resolved - create the series resolved_key = existing_unresolved.provider_key year = cls._extract_year_from_folder_name(folder_name) title = cls._extract_title_from_folder_name(folder_name) props = cls._get_series_properties(folder) series = await AnimeSeriesService.create( db=db, key=resolved_key, name=title, site="https://aniworld.to", folder=folder_name, year=year, loading_status="completed", episodes_loaded=True, logo_loaded=props.logo_loaded, images_loaded=props.images_loaded, has_nfo=props.has_nfo, nfo_path=props.nfo_path, nfo_created_at=props.nfo_created_at, nfo_updated_at=props.nfo_updated_at, ) created_count += 1 # Delete the unresolved tracking now that series is created await UnresolvedFolderService.delete(db, folder_name) continue elif existing_unresolved: # Already tracked as unresolved, skip unresolved_count += 1 continue # Extract title and year from folder name year = cls._extract_year_from_folder_name(folder_name) title = cls._extract_title_from_folder_name(folder_name) if not title: logger.warning( "Could not extract title from folder: %s", folder_name ) continue # Resolve key via provider search resolved_key = await cls._resolve_key_via_search(title) if not resolved_key: # Track unresolved folder for later manual resolution import json try: series_results = await series_app.search(title) search_result_json = json.dumps(series_results) if series_results else None except Exception: search_result_json = None await UnresolvedFolderService.create( db=db, folder_name=folder_name, title=title, year=year, search_attempts=1, last_search_result=search_result_json, ) logger.warning( "Could not resolve series key for folder, tracking as unresolved: %s", folder_name ) continue # Also check if a series with this key already exists (different folder, same anime) existing_by_key = await AnimeSeriesService.get_by_key(db, resolved_key) if existing_by_key: logger.debug( "Series with key already exists, skipping", folder=folder_name, key=resolved_key, existing_folder=existing_by_key.folder ) skipped_existing += 1 continue # Check filesystem properties props = cls._get_series_properties(folder) # Create AnimeSeries record series = await AnimeSeriesService.create( db=db, key=resolved_key, name=title, site="https://aniworld.to", folder=folder_name, year=year, loading_status="completed", episodes_loaded=True, logo_loaded=props.logo_loaded, images_loaded=props.images_loaded, has_nfo=props.has_nfo, nfo_path=props.nfo_path, nfo_created_at=props.nfo_created_at, nfo_updated_at=props.nfo_updated_at, ) created_count += 1 logger.debug( "Created series from folder", folder=folder_name, title=title, year=year, key=resolved_key or "(unresolved)", has_nfo=props.has_nfo, logo_loaded=props.logo_loaded, images_loaded=props.images_loaded, ) logger.info( "Setup complete", created=created_count, skipped_existing=skipped_existing, unresolved=unresolved_count ) except Exception as e: logger.error( "Setup failed", error=str(e), exc_info=True ) return created_count return created_count