Files
Aniworld/src/server/services/setup_service.py
Lukas 53fe09351f fix: prevent duplicate series when same anime key exists in different folder
- Add check for existing series by key in SetupService.run to skip duplicates
- Fix Path construction in initialization_service.py cleanup function
- Update unit tests to mock get_by_key and get_series_app
2026-06-06 19:39:32 +02:00

441 lines
16 KiB
Python

"""Setup service for first-time database initialization.
This service runs during initial application setup to:
1. Scan anime folders in the data directory
2. Extract title and year from folder names
3. Create AnimeSeries records in the database
4. Resolve provider keys via search (if single match found)
The run_once logic is handled by the caller (perform_initial_setup)
via _check_initial_scan_status, not by this service itself.
"""
import os
import re
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
import structlog
from src.config.settings import settings
from src.server.database.connection import get_db_session
from src.server.database.service import AnimeSeriesService, UnresolvedFolderService
from src.server.utils.dependencies import get_series_app
logger = structlog.get_logger(__name__)
@dataclass
class SeriesProperties:
"""Filesystem-derived properties for an AnimeSeries."""
has_nfo: bool = False
nfo_path: Optional[str] = None
nfo_created_at: Optional[datetime] = None
nfo_updated_at: Optional[datetime] = None
logo_loaded: bool = False
images_loaded: bool = False
class SetupService:
"""Service for setup operations during application initialization."""
@staticmethod
def _extract_year_from_folder_name(folder_name: str) -> Optional[int]:
"""Extract year from folder name if present.
Looks for year in format "(YYYY)" at the end of folder name.
Args:
folder_name: The folder name to parse
Returns:
Year as integer if found, None otherwise
"""
if not folder_name:
return None
match = re.search(r'\((\d{4})\)', folder_name)
if match:
year = int(match.group(1))
if 1900 <= year <= 2100:
return year
return None
@staticmethod
def _extract_title_from_folder_name(folder_name: str) -> str:
"""Extract title from folder name by removing year suffix.
Args:
folder_name: The folder name to parse
Returns:
Title with year suffix and surrounding whitespace removed
"""
return re.sub(r'\s*\(\d{4}\)\s*$', '', folder_name).strip()
@staticmethod
def _normalize_title(title: str) -> str:
"""Normalize title for fuzzy matching.
Strips common suffixes and lowercases for comparison.
Args:
title: The title to normalize
Returns:
Normalized title string
"""
# Remove common anime suffixes (case-insensitive)
suffixes = [
r'\s*\(TV\)\s*$',
r'\s*\(Anime\)\s*$',
r'\s*\(OAD\)\s*$',
r'\s*\(OVA\)\s*$',
r'\s*\(Special\)\s*$',
r'\s*\(Movie\)\s*$',
r'\s*\(Spin-Off\)\s*$',
]
normalized = title.lower().strip()
for suffix_pattern in suffixes:
normalized = re.sub(suffix_pattern, '', normalized, flags=re.IGNORECASE).strip()
return normalized
@staticmethod
def _titles_match(title1: str, title2: str, threshold: float = 0.85) -> bool:
"""Check if two titles match using fuzzy comparison.
Args:
title1: First title
title2: Second title
threshold: Similarity threshold (0.0 to 1.0)
Returns:
True if titles match within threshold
"""
norm1 = SetupService._normalize_title(title1)
norm2 = SetupService._normalize_title(title2)
# Direct match after normalization
if norm1 == norm2:
return True
# Containment check (e.g., "Attack on Titan" in "Attack on Titan (TV)")
if norm1 in norm2 or norm2 in norm1:
return True
# Similarity ratio check using SequenceMatcher
from difflib import SequenceMatcher
ratio = SequenceMatcher(None, norm1, norm2).ratio()
return ratio >= threshold
@staticmethod
async def _resolve_key_via_search(title: str) -> str:
"""Resolve provider key by searching for the title.
Args:
title: The title to search for
Returns:
Provider key if exactly one match with same name found,
empty string otherwise
"""
if not title:
return ""
try:
series_app = get_series_app()
results = await series_app.search(title)
if len(results) == 1:
result_name = results[0].get('title', '')
result_link = results[0].get('link', '')
if SetupService._titles_match(result_name, title):
if result_link and '/anime/stream/' in result_link:
return result_link.split('/anime/stream/')[-1].split('/')[0]
elif result_link:
# Link is already the key (e.g., "shinobi-no-ittoki")
return result_link
else:
logger.debug(
"Series key resolved but link format unexpected",
folder_title=title,
result_title=result_name,
link=result_link
)
else:
logger.debug(
"Series search result title mismatch",
folder_title=title,
result_title=result_name,
link=result_link
)
elif len(results) > 1:
logger.debug(
"Multiple search results for title, skipping fuzzy match",
title=title,
result_count=len(results)
)
except Exception as e:
logger.warning(
"Provider search failed for folder",
title=title,
error=str(e)
)
return ""
@staticmethod
def _check_nfo_file(folder_path: Path) -> tuple[bool, Optional[str], Optional[datetime], Optional[datetime]]:
"""Check if tvshow.nfo exists and return its metadata.
Args:
folder_path: Path to the series folder
Returns:
Tuple of (has_nfo, nfo_path, nfo_created_at, nfo_updated_at)
"""
nfo_path = folder_path / "tvshow.nfo"
if nfo_path.is_file():
stat = nfo_path.stat()
created = datetime.fromtimestamp(stat.st_ctime, tz=timezone.utc)
updated = datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc)
return True, str(nfo_path), created, updated
return False, None, None, None
@staticmethod
def _check_logo_file(folder_path: Path) -> bool:
"""Check if logo.png exists.
Args:
folder_path: Path to the series folder
Returns:
True if logo.png exists, False otherwise
"""
return (folder_path / "logo.png").is_file()
@staticmethod
def _check_image_files(folder_path: Path) -> bool:
"""Check if any image files (poster, fanart) exist.
Args:
folder_path: Path to the series folder
Returns:
True if any poster.jpg/jpeg/png or fanart.jpg/jpeg/png exists
"""
image_extensions = {'.jpg', '.jpeg', '.png'}
for child in folder_path.iterdir():
if child.is_file():
name_lower = child.name.lower()
if name_lower.startswith(('poster', 'fanart')) and child.suffix.lower() in image_extensions:
return True
return False
@classmethod
def _get_series_properties(cls, folder_path: Path) -> SeriesProperties:
"""Get all filesystem-derived properties for a series folder.
Args:
folder_path: Path to the series folder
Returns:
SeriesProperties with all detected values
"""
has_nfo, nfo_path, nfo_created_at, nfo_updated_at = cls._check_nfo_file(folder_path)
logo_loaded = cls._check_logo_file(folder_path)
images_loaded = cls._check_image_files(folder_path)
return SeriesProperties(
has_nfo=has_nfo,
nfo_path=nfo_path,
nfo_created_at=nfo_created_at,
nfo_updated_at=nfo_updated_at,
logo_loaded=logo_loaded,
images_loaded=images_loaded,
)
@classmethod
async def run(cls) -> int:
"""Run the setup service.
Scans anime folders, creates AnimeSeries records, and resolves
provider keys via search. Should only be called after checking
that initial scan hasn't been completed yet (via _check_initial_scan_status).
Returns:
Number of new series created
"""
if not settings.anime_directory:
logger.info("Anime directory not configured, skipping setup")
return 0
anime_dir = Path(settings.anime_directory)
if not anime_dir.is_dir():
logger.info(
"Anime directory does not exist, skipping setup: %s",
anime_dir
)
return 0
logger.info("Running setup service...")
created_count = 0
skipped_existing = 0
unresolved_count = 0
try:
series_app = get_series_app()
async with get_db_session() as db:
for folder in anime_dir.iterdir():
if not folder.is_dir():
continue
folder_name = folder.name
# Check if series already exists in DB
existing = await AnimeSeriesService.get_by_folder(
db, folder_name
)
if existing:
skipped_existing += 1
continue
# Check if already tracked as unresolved
existing_unresolved = await UnresolvedFolderService.get_by_folder_name(
db, folder_name
)
if existing_unresolved and existing_unresolved.is_resolved:
# Was previously unresolved but now resolved - create the series
resolved_key = existing_unresolved.provider_key
year = cls._extract_year_from_folder_name(folder_name)
title = cls._extract_title_from_folder_name(folder_name)
props = cls._get_series_properties(folder)
series = await AnimeSeriesService.create(
db=db,
key=resolved_key,
name=title,
site="https://aniworld.to",
folder=folder_name,
year=year,
loading_status="completed",
episodes_loaded=True,
logo_loaded=props.logo_loaded,
images_loaded=props.images_loaded,
has_nfo=props.has_nfo,
nfo_path=props.nfo_path,
nfo_created_at=props.nfo_created_at,
nfo_updated_at=props.nfo_updated_at,
)
created_count += 1
# Delete the unresolved tracking now that series is created
await UnresolvedFolderService.delete(db, folder_name)
continue
elif existing_unresolved:
# Already tracked as unresolved, skip
unresolved_count += 1
continue
# Extract title and year from folder name
year = cls._extract_year_from_folder_name(folder_name)
title = cls._extract_title_from_folder_name(folder_name)
if not title:
logger.warning(
"Could not extract title from folder: %s",
folder_name
)
continue
# Resolve key via provider search
resolved_key = await cls._resolve_key_via_search(title)
if not resolved_key:
# Track unresolved folder for later manual resolution
import json
try:
series_results = await series_app.search(title)
search_result_json = json.dumps(series_results) if series_results else None
except Exception:
search_result_json = None
await UnresolvedFolderService.create(
db=db,
folder_name=folder_name,
title=title,
year=year,
search_attempts=1,
last_search_result=search_result_json,
)
logger.warning(
"Could not resolve series key for folder, tracking as unresolved: %s",
folder_name
)
continue
# Also check if a series with this key already exists (different folder, same anime)
existing_by_key = await AnimeSeriesService.get_by_key(db, resolved_key)
if existing_by_key:
logger.debug(
"Series with key already exists, skipping",
folder=folder_name,
key=resolved_key,
existing_folder=existing_by_key.folder
)
skipped_existing += 1
continue
# Check filesystem properties
props = cls._get_series_properties(folder)
# Create AnimeSeries record
series = await AnimeSeriesService.create(
db=db,
key=resolved_key,
name=title,
site="https://aniworld.to",
folder=folder_name,
year=year,
loading_status="completed",
episodes_loaded=True,
logo_loaded=props.logo_loaded,
images_loaded=props.images_loaded,
has_nfo=props.has_nfo,
nfo_path=props.nfo_path,
nfo_created_at=props.nfo_created_at,
nfo_updated_at=props.nfo_updated_at,
)
created_count += 1
logger.debug(
"Created series from folder",
folder=folder_name,
title=title,
year=year,
key=resolved_key or "(unresolved)",
has_nfo=props.has_nfo,
logo_loaded=props.logo_loaded,
images_loaded=props.images_loaded,
)
logger.info(
"Setup complete",
created=created_count,
skipped_existing=skipped_existing,
unresolved=unresolved_count
)
except Exception as e:
logger.error(
"Setup failed",
error=str(e),
exc_info=True
)
return created_count
return created_count