diff --git a/src/server/api/anime.py b/src/server/api/anime.py index 583ff94..3f724ad 100644 --- a/src/server/api/anime.py +++ b/src/server/api/anime.py @@ -1,12 +1,13 @@ import logging import warnings +from pathlib import Path from typing import Any, List, Optional from fastapi import APIRouter, Depends, HTTPException, status from pydantic import BaseModel, Field, field_validator from sqlalchemy.ext.asyncio import AsyncSession -from src.core.entities.series import Serie +from src.config.settings import settings from src.core.utils.key_utils import generate_key_from_folder, is_valid_key from src.server.database.service import AnimeSeriesService from src.server.exceptions import ( @@ -26,6 +27,9 @@ from src.server.utils.dependencies import ( ) from src.server.utils.filesystem import sanitize_folder_name from src.server.utils.validators import validate_filter_value, validate_search_query +from src.server.services.folder_rename_service import ( + _scan_for_pre_existing_duplicates, +) logger = logging.getLogger(__name__) @@ -71,6 +75,100 @@ async def get_anime_status( ) from exc +class DuplicateFolderGroup(BaseModel): + """A group of duplicate folders for the same series. + + Attributes: + key: Series key (provider-assigned unique identifier) + folders: List of folder names that are duplicates + folder_count: Number of duplicate folders + """ + key: str = Field(..., description="Series key (unique identifier)") + folders: List[str] = Field(..., description="List of duplicate folder names") + folder_count: int = Field(..., description="Number of duplicate folders") + + +class DuplicateFoldersResponse(BaseModel): + """Response model for duplicate folders listing. + + Attributes: + total_groups: Total number of duplicate groups found + duplicate_groups: List of duplicate folder groups + message: Human-readable summary + """ + total_groups: int = Field(..., description="Total number of duplicate groups") + duplicate_groups: List[DuplicateFolderGroup] = Field( + ..., description="List of duplicate folder groups" + ) + message: str = Field(..., description="Human-readable summary") + + +@router.get("/duplicate-folders", response_model=DuplicateFoldersResponse) +async def get_duplicate_folders( + _auth: dict = Depends(require_auth), +) -> DuplicateFoldersResponse: + """List all pre-existing duplicate folder groups. + + Scans the anime directory for folders with tvshow.nfo files that + map to the same series key. Returns groups of duplicates for + manual review and cleanup. + + Returns: + DuplicateFoldersResponse with groups of duplicate folders + + Note: + Not all duplicate folders are safe to merge - some may belong + to different releases (e.g., dubbed vs. subbed). Review carefully + before taking action. + """ + try: + if not settings.anime_directory: + return DuplicateFoldersResponse( + total_groups=0, + duplicate_groups=[], + message="Anime directory not configured", + ) + + anime_dir = Path(settings.anime_directory) + if not anime_dir.is_dir(): + return DuplicateFoldersResponse( + total_groups=0, + duplicate_groups=[], + message=f"Anime directory not found: {anime_dir}", + ) + + duplicates = _scan_for_pre_existing_duplicates(anime_dir) + + groups = [ + DuplicateFolderGroup( + key=dup.key, + folders=dup.folders, + folder_count=dup.count, + ) + for dup in duplicates + ] + + if groups: + message = ( + f"Found {len(groups)} duplicate group(s). " + "Review carefully - some duplicates may be different releases " + "(e.g., dubbed vs. subbed)." + ) + else: + message = "No duplicate folders found." + + return DuplicateFoldersResponse( + total_groups=len(groups), + duplicate_groups=groups, + message=message, + ) + except Exception as exc: + logger.error("Failed to scan for duplicate folders: %s", str(exc)) + raise ServerError( + message=f"Failed to scan for duplicates: {str(exc)}" + ) from exc + + class AnimeSummary(BaseModel): """Summary of an anime series with missing episodes. diff --git a/src/server/services/folder_rename_service.py b/src/server/services/folder_rename_service.py index 9c7f4fd..baf851e 100644 --- a/src/server/services/folder_rename_service.py +++ b/src/server/services/folder_rename_service.py @@ -13,8 +13,9 @@ reflect the new paths. from __future__ import annotations import logging +from collections import defaultdict from pathlib import Path -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Set, Tuple from lxml import etree @@ -34,6 +35,141 @@ logger = logging.getLogger(__name__) INVALID_PATH_CHARS = '<>:"/\\|?*\x00' +class DuplicateGroup: + """Represents a group of duplicate folders for the same series. + + Attributes: + key: The series key (folder name before rename). + folders: List of folder paths that map to this series. + nfo_paths: List of corresponding NFO file paths. + """ + + def __init__(self, key: str, folders: List[str], nfo_paths: List[Path]): + self.key = key + self.folders = folders + self.nfo_paths = nfo_paths + + @property + def count(self) -> int: + return len(self.folders) + + def __repr__(self) -> str: + return f"DuplicateGroup(key={self.key!r}, folders={self.folders})" + + +def _scan_for_pre_existing_duplicates(anime_dir: Path) -> List[DuplicateGroup]: + """Scan anime directory for pre-existing duplicate folders. + + Groups folders by the series key extracted from their NFO files. + Folders with the same title+year (same expected name) are flagged as duplicates. + + Args: + anime_dir: Path to the anime directory to scan. + + Returns: + List of DuplicateGroup objects, one per series with duplicate folders. + """ + # Group folders by their expected name (title+year from NFO) + groups: Dict[str, List[Tuple[str, Path]]] = defaultdict(list) + + for series_dir in anime_dir.iterdir(): + if not series_dir.is_dir(): + continue + nfo_path = series_dir / "tvshow.nfo" + if not nfo_path.exists(): + continue + title, year = _parse_nfo_title_and_year(nfo_path) + if not title or not year: + continue + expected_name = _compute_expected_folder_name(title, year) + groups[expected_name].append((series_dir.name, nfo_path)) + + # Filter to only groups with more than one folder + duplicates = [] + for key, items in groups.items(): + if len(items) > 1: + folders = [item[0] for item in items] + nfo_paths = [item[1] for item in items] + duplicates.append(DuplicateGroup(key=key, folders=folders, nfo_paths=nfo_paths)) + + return duplicates + + +def _try_merge_duplicate_group(group: DuplicateGroup, dry_run: bool = False) -> bool: + """Attempt to merge a duplicate group automatically. + + Uses the first folder as the canonical one and removes others if they are + empty or contain only symlinks. + + Args: + group: The DuplicateGroup to merge. + dry_run: If True, only log actions without executing them. + + Returns: + True if merge was successful, False otherwise. + """ + if len(group.folders) < 2: + return True + + # Keep first folder as canonical, mark others for removal + canonical = group.folders[0] + to_remove = group.folders[1:] + + for folder in to_remove: + folder_path = group.nfo_paths[0].parent.parent / folder # same parent dir + if not folder_path.exists(): + continue + + # Check if folder is empty or only has symlinks + try: + contents = list(folder_path.iterdir()) + except PermissionError: + logger.warning("Permission denied accessing %s, skip merge", folder_path) + return False + except OSError: + return False + + if not contents: + # Empty folder - safe to remove + if dry_run: + logger.info("[DRY-RUN] Would delete empty duplicate folder: %s", folder_path) + else: + try: + folder_path.rmdir() + logger.info("Deleted empty duplicate folder: %s", folder_path) + except OSError: + return False + continue + + # Check if all contents are symlinks pointing to canonical + all_symlinks = all( + item.is_symlink() and item.resolve() == (folder_path.parent / canonical).resolve() + for item in contents + ) + if all_symlinks: + if dry_run: + logger.info("[DRY-RUN] Would remove symlinks in duplicate folder: %s", folder_path) + else: + for item in contents: + item.unlink() + try: + folder_path.rmdir() + logger.info("Removed symlink-only duplicate folder: %s", folder_path) + except OSError: + return False + continue + + # Cannot auto-merge - requires manual intervention + logger.warning( + "Cannot auto-merge duplicate folders for '%s': %s (manual merge required)", + group.key, + [canonical] + to_remove, + ) + return False + + return True + + def _parse_nfo_title_and_year(nfo_path: Path) -> Tuple[Optional[str], Optional[str]]: """Parse a tvshow.nfo and return (title, year) text values. @@ -383,6 +519,28 @@ async def validate_and_rename_series_folders(dry_run: bool = False) -> Dict[str, stats = {"scanned": 0, "renamed": 0, "skipped": 0, "errors": 0} + # Detect pre-existing duplicates before rename loop + pre_existing_duplicates: Set[str] = set() + duplicates = _scan_for_pre_existing_duplicates(anime_dir) + for dup_group in duplicates: + # Try automatic merge first + if _try_merge_duplicate_group(dup_group, dry_run=dry_run): + logger.info( + "Auto-merged duplicate group for '%s' (%d folders)", + dup_group.key, + dup_group.count, + ) + else: + # Flag all folders in this group as pre-existing duplicates + for folder in dup_group.folders: + pre_existing_duplicates.add(folder) + logger.warning( + "Duplicate folders detected for series '%s': %s — " + "manual cleanup required (different releases or non-empty duplicates)", + dup_group.key, + dup_group.folders, + ) + for series_dir in sorted(anime_dir.iterdir()): if not series_dir.is_dir(): continue @@ -422,6 +580,15 @@ async def validate_and_rename_series_folders(dry_run: bool = False) -> Dict[str, expected_path = anime_dir / expected_name + # Check for pre-existing duplicate + if current_name in pre_existing_duplicates: + logger.warning( + "Skipping rename for '%s' — pre-existing duplicate folder detected", + current_name, + ) + stats["errors"] += 1 + continue + # Check for duplicate target if expected_path.exists(): logger.warning(