- Add check for existing series by key in SetupService.run to skip duplicates - Fix Path construction in initialization_service.py cleanup function - Update unit tests to mock get_by_key and get_series_app
441 lines
16 KiB
Python
441 lines
16 KiB
Python
"""Setup service for first-time database initialization.
|
|
|
|
This service runs during initial application setup to:
|
|
1. Scan anime folders in the data directory
|
|
2. Extract title and year from folder names
|
|
3. Create AnimeSeries records in the database
|
|
4. Resolve provider keys via search (if single match found)
|
|
|
|
The run_once logic is handled by the caller (perform_initial_setup)
|
|
via _check_initial_scan_status, not by this service itself.
|
|
"""
|
|
import os
|
|
import re
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
import structlog
|
|
|
|
from src.config.settings import settings
|
|
from src.server.database.connection import get_db_session
|
|
from src.server.database.service import AnimeSeriesService, UnresolvedFolderService
|
|
from src.server.utils.dependencies import get_series_app
|
|
|
|
logger = structlog.get_logger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class SeriesProperties:
|
|
"""Filesystem-derived properties for an AnimeSeries."""
|
|
has_nfo: bool = False
|
|
nfo_path: Optional[str] = None
|
|
nfo_created_at: Optional[datetime] = None
|
|
nfo_updated_at: Optional[datetime] = None
|
|
logo_loaded: bool = False
|
|
images_loaded: bool = False
|
|
|
|
|
|
class SetupService:
|
|
"""Service for setup operations during application initialization."""
|
|
|
|
@staticmethod
|
|
def _extract_year_from_folder_name(folder_name: str) -> Optional[int]:
|
|
"""Extract year from folder name if present.
|
|
|
|
Looks for year in format "(YYYY)" at the end of folder name.
|
|
|
|
Args:
|
|
folder_name: The folder name to parse
|
|
|
|
Returns:
|
|
Year as integer if found, None otherwise
|
|
"""
|
|
if not folder_name:
|
|
return None
|
|
|
|
match = re.search(r'\((\d{4})\)', folder_name)
|
|
if match:
|
|
year = int(match.group(1))
|
|
if 1900 <= year <= 2100:
|
|
return year
|
|
return None
|
|
|
|
@staticmethod
|
|
def _extract_title_from_folder_name(folder_name: str) -> str:
|
|
"""Extract title from folder name by removing year suffix.
|
|
|
|
Args:
|
|
folder_name: The folder name to parse
|
|
|
|
Returns:
|
|
Title with year suffix and surrounding whitespace removed
|
|
"""
|
|
return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
|
|
|
|
@staticmethod
|
|
def _normalize_title(title: str) -> str:
|
|
"""Normalize title for fuzzy matching.
|
|
|
|
Strips common suffixes and lowercases for comparison.
|
|
|
|
Args:
|
|
title: The title to normalize
|
|
|
|
Returns:
|
|
Normalized title string
|
|
"""
|
|
# Remove common anime suffixes (case-insensitive)
|
|
suffixes = [
|
|
r'\s*\(TV\)\s*$',
|
|
r'\s*\(Anime\)\s*$',
|
|
r'\s*\(OAD\)\s*$',
|
|
r'\s*\(OVA\)\s*$',
|
|
r'\s*\(Special\)\s*$',
|
|
r'\s*\(Movie\)\s*$',
|
|
r'\s*\(Spin-Off\)\s*$',
|
|
]
|
|
normalized = title.lower().strip()
|
|
for suffix_pattern in suffixes:
|
|
normalized = re.sub(suffix_pattern, '', normalized, flags=re.IGNORECASE).strip()
|
|
return normalized
|
|
|
|
@staticmethod
|
|
def _titles_match(title1: str, title2: str, threshold: float = 0.85) -> bool:
|
|
"""Check if two titles match using fuzzy comparison.
|
|
|
|
Args:
|
|
title1: First title
|
|
title2: Second title
|
|
threshold: Similarity threshold (0.0 to 1.0)
|
|
|
|
Returns:
|
|
True if titles match within threshold
|
|
"""
|
|
norm1 = SetupService._normalize_title(title1)
|
|
norm2 = SetupService._normalize_title(title2)
|
|
|
|
# Direct match after normalization
|
|
if norm1 == norm2:
|
|
return True
|
|
|
|
# Containment check (e.g., "Attack on Titan" in "Attack on Titan (TV)")
|
|
if norm1 in norm2 or norm2 in norm1:
|
|
return True
|
|
|
|
# Similarity ratio check using SequenceMatcher
|
|
from difflib import SequenceMatcher
|
|
ratio = SequenceMatcher(None, norm1, norm2).ratio()
|
|
return ratio >= threshold
|
|
|
|
@staticmethod
|
|
async def _resolve_key_via_search(title: str) -> str:
|
|
"""Resolve provider key by searching for the title.
|
|
|
|
Args:
|
|
title: The title to search for
|
|
|
|
Returns:
|
|
Provider key if exactly one match with same name found,
|
|
empty string otherwise
|
|
"""
|
|
if not title:
|
|
return ""
|
|
|
|
try:
|
|
series_app = get_series_app()
|
|
results = await series_app.search(title)
|
|
|
|
if len(results) == 1:
|
|
result_name = results[0].get('title', '')
|
|
result_link = results[0].get('link', '')
|
|
|
|
if SetupService._titles_match(result_name, title):
|
|
if result_link and '/anime/stream/' in result_link:
|
|
return result_link.split('/anime/stream/')[-1].split('/')[0]
|
|
elif result_link:
|
|
# Link is already the key (e.g., "shinobi-no-ittoki")
|
|
return result_link
|
|
else:
|
|
logger.debug(
|
|
"Series key resolved but link format unexpected",
|
|
folder_title=title,
|
|
result_title=result_name,
|
|
link=result_link
|
|
)
|
|
else:
|
|
logger.debug(
|
|
"Series search result title mismatch",
|
|
folder_title=title,
|
|
result_title=result_name,
|
|
link=result_link
|
|
)
|
|
elif len(results) > 1:
|
|
logger.debug(
|
|
"Multiple search results for title, skipping fuzzy match",
|
|
title=title,
|
|
result_count=len(results)
|
|
)
|
|
except Exception as e:
|
|
logger.warning(
|
|
"Provider search failed for folder",
|
|
title=title,
|
|
error=str(e)
|
|
)
|
|
|
|
return ""
|
|
|
|
@staticmethod
|
|
def _check_nfo_file(folder_path: Path) -> tuple[bool, Optional[str], Optional[datetime], Optional[datetime]]:
|
|
"""Check if tvshow.nfo exists and return its metadata.
|
|
|
|
Args:
|
|
folder_path: Path to the series folder
|
|
|
|
Returns:
|
|
Tuple of (has_nfo, nfo_path, nfo_created_at, nfo_updated_at)
|
|
"""
|
|
nfo_path = folder_path / "tvshow.nfo"
|
|
if nfo_path.is_file():
|
|
stat = nfo_path.stat()
|
|
created = datetime.fromtimestamp(stat.st_ctime, tz=timezone.utc)
|
|
updated = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc)
|
|
return True, str(nfo_path), created, updated
|
|
return False, None, None, None
|
|
|
|
@staticmethod
|
|
def _check_logo_file(folder_path: Path) -> bool:
|
|
"""Check if logo.png exists.
|
|
|
|
Args:
|
|
folder_path: Path to the series folder
|
|
|
|
Returns:
|
|
True if logo.png exists, False otherwise
|
|
"""
|
|
return (folder_path / "logo.png").is_file()
|
|
|
|
@staticmethod
|
|
def _check_image_files(folder_path: Path) -> bool:
|
|
"""Check if any image files (poster, fanart) exist.
|
|
|
|
Args:
|
|
folder_path: Path to the series folder
|
|
|
|
Returns:
|
|
True if any poster.jpg/jpeg/png or fanart.jpg/jpeg/png exists
|
|
"""
|
|
image_extensions = {'.jpg', '.jpeg', '.png'}
|
|
for child in folder_path.iterdir():
|
|
if child.is_file():
|
|
name_lower = child.name.lower()
|
|
if name_lower.startswith(('poster', 'fanart')) and child.suffix.lower() in image_extensions:
|
|
return True
|
|
return False
|
|
|
|
@classmethod
|
|
def _get_series_properties(cls, folder_path: Path) -> SeriesProperties:
|
|
"""Get all filesystem-derived properties for a series folder.
|
|
|
|
Args:
|
|
folder_path: Path to the series folder
|
|
|
|
Returns:
|
|
SeriesProperties with all detected values
|
|
"""
|
|
has_nfo, nfo_path, nfo_created_at, nfo_updated_at = cls._check_nfo_file(folder_path)
|
|
logo_loaded = cls._check_logo_file(folder_path)
|
|
images_loaded = cls._check_image_files(folder_path)
|
|
|
|
return SeriesProperties(
|
|
has_nfo=has_nfo,
|
|
nfo_path=nfo_path,
|
|
nfo_created_at=nfo_created_at,
|
|
nfo_updated_at=nfo_updated_at,
|
|
logo_loaded=logo_loaded,
|
|
images_loaded=images_loaded,
|
|
)
|
|
|
|
@classmethod
|
|
async def run(cls) -> int:
|
|
"""Run the setup service.
|
|
|
|
Scans anime folders, creates AnimeSeries records, and resolves
|
|
provider keys via search. Should only be called after checking
|
|
that initial scan hasn't been completed yet (via _check_initial_scan_status).
|
|
|
|
Returns:
|
|
Number of new series created
|
|
"""
|
|
if not settings.anime_directory:
|
|
logger.info("Anime directory not configured, skipping setup")
|
|
return 0
|
|
|
|
anime_dir = Path(settings.anime_directory)
|
|
if not anime_dir.is_dir():
|
|
logger.info(
|
|
"Anime directory does not exist, skipping setup: %s",
|
|
anime_dir
|
|
)
|
|
return 0
|
|
|
|
logger.info("Running setup service...")
|
|
|
|
created_count = 0
|
|
skipped_existing = 0
|
|
unresolved_count = 0
|
|
|
|
try:
|
|
series_app = get_series_app()
|
|
|
|
async with get_db_session() as db:
|
|
for folder in anime_dir.iterdir():
|
|
if not folder.is_dir():
|
|
continue
|
|
|
|
folder_name = folder.name
|
|
|
|
# Check if series already exists in DB
|
|
existing = await AnimeSeriesService.get_by_folder(
|
|
db, folder_name
|
|
)
|
|
if existing:
|
|
skipped_existing += 1
|
|
continue
|
|
|
|
# Check if already tracked as unresolved
|
|
existing_unresolved = await UnresolvedFolderService.get_by_folder_name(
|
|
db, folder_name
|
|
)
|
|
if existing_unresolved and existing_unresolved.is_resolved:
|
|
# Was previously unresolved but now resolved - create the series
|
|
resolved_key = existing_unresolved.provider_key
|
|
year = cls._extract_year_from_folder_name(folder_name)
|
|
title = cls._extract_title_from_folder_name(folder_name)
|
|
props = cls._get_series_properties(folder)
|
|
|
|
series = await AnimeSeriesService.create(
|
|
db=db,
|
|
key=resolved_key,
|
|
name=title,
|
|
site="https://aniworld.to",
|
|
folder=folder_name,
|
|
year=year,
|
|
loading_status="completed",
|
|
episodes_loaded=True,
|
|
logo_loaded=props.logo_loaded,
|
|
images_loaded=props.images_loaded,
|
|
has_nfo=props.has_nfo,
|
|
nfo_path=props.nfo_path,
|
|
nfo_created_at=props.nfo_created_at,
|
|
nfo_updated_at=props.nfo_updated_at,
|
|
)
|
|
created_count += 1
|
|
|
|
# Delete the unresolved tracking now that series is created
|
|
await UnresolvedFolderService.delete(db, folder_name)
|
|
continue
|
|
elif existing_unresolved:
|
|
# Already tracked as unresolved, skip
|
|
unresolved_count += 1
|
|
continue
|
|
|
|
# Extract title and year from folder name
|
|
year = cls._extract_year_from_folder_name(folder_name)
|
|
title = cls._extract_title_from_folder_name(folder_name)
|
|
|
|
if not title:
|
|
logger.warning(
|
|
"Could not extract title from folder: %s",
|
|
folder_name
|
|
)
|
|
continue
|
|
|
|
# Resolve key via provider search
|
|
resolved_key = await cls._resolve_key_via_search(title)
|
|
|
|
if not resolved_key:
|
|
# Track unresolved folder for later manual resolution
|
|
import json
|
|
try:
|
|
series_results = await series_app.search(title)
|
|
search_result_json = json.dumps(series_results) if series_results else None
|
|
except Exception:
|
|
search_result_json = None
|
|
|
|
await UnresolvedFolderService.create(
|
|
db=db,
|
|
folder_name=folder_name,
|
|
title=title,
|
|
year=year,
|
|
search_attempts=1,
|
|
last_search_result=search_result_json,
|
|
)
|
|
logger.warning(
|
|
"Could not resolve series key for folder, tracking as unresolved: %s",
|
|
folder_name
|
|
)
|
|
continue
|
|
|
|
# Also check if a series with this key already exists (different folder, same anime)
|
|
existing_by_key = await AnimeSeriesService.get_by_key(db, resolved_key)
|
|
if existing_by_key:
|
|
logger.debug(
|
|
"Series with key already exists, skipping",
|
|
folder=folder_name,
|
|
key=resolved_key,
|
|
existing_folder=existing_by_key.folder
|
|
)
|
|
skipped_existing += 1
|
|
continue
|
|
|
|
# Check filesystem properties
|
|
props = cls._get_series_properties(folder)
|
|
|
|
# Create AnimeSeries record
|
|
series = await AnimeSeriesService.create(
|
|
db=db,
|
|
key=resolved_key,
|
|
name=title,
|
|
site="https://aniworld.to",
|
|
folder=folder_name,
|
|
year=year,
|
|
loading_status="completed",
|
|
episodes_loaded=True,
|
|
logo_loaded=props.logo_loaded,
|
|
images_loaded=props.images_loaded,
|
|
has_nfo=props.has_nfo,
|
|
nfo_path=props.nfo_path,
|
|
nfo_created_at=props.nfo_created_at,
|
|
nfo_updated_at=props.nfo_updated_at,
|
|
)
|
|
created_count += 1
|
|
|
|
logger.debug(
|
|
"Created series from folder",
|
|
folder=folder_name,
|
|
title=title,
|
|
year=year,
|
|
key=resolved_key or "(unresolved)",
|
|
has_nfo=props.has_nfo,
|
|
logo_loaded=props.logo_loaded,
|
|
images_loaded=props.images_loaded,
|
|
)
|
|
|
|
logger.info(
|
|
"Setup complete",
|
|
created=created_count,
|
|
skipped_existing=skipped_existing,
|
|
unresolved=unresolved_count
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Setup failed",
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
return created_count
|
|
|
|
return created_count |