feat: add duplicate folder detection and /duplicate-folders API endpoint

- Add DuplicateFolderGroup and DuplicateFoldersResponse Pydantic models
- Add /duplicate-folders GET endpoint for listing pre-existing duplicates
- Add _scan_for_pre_existing_duplicates() function for NFO-based detection
- Add _try_merge_duplicate_group() for auto-merging empty/symlink-only duplicates
- Integrate duplicate detection into validate_and_rename_series_folders workflow
- Skip rename for flagged duplicates to prevent data loss during merge
This commit is contained in:
2026-05-28 21:46:08 +02:00
parent 239341629c
commit 1ef59c5283
2 changed files with 267 additions and 2 deletions

View File

@@ -1,12 +1,13 @@
import logging
import warnings
from pathlib import Path
from typing import Any, List, Optional
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, Field, field_validator
from sqlalchemy.ext.asyncio import AsyncSession
from src.core.entities.series import Serie
from src.config.settings import settings
from src.core.utils.key_utils import generate_key_from_folder, is_valid_key
from src.server.database.service import AnimeSeriesService
from src.server.exceptions import (
@@ -26,6 +27,9 @@ from src.server.utils.dependencies import (
)
from src.server.utils.filesystem import sanitize_folder_name
from src.server.utils.validators import validate_filter_value, validate_search_query
from src.server.services.folder_rename_service import (
_scan_for_pre_existing_duplicates,
)
logger = logging.getLogger(__name__)
@@ -71,6 +75,100 @@ async def get_anime_status(
) from exc
class DuplicateFolderGroup(BaseModel):
"""A group of duplicate folders for the same series.
Attributes:
key: Series key (provider-assigned unique identifier)
folders: List of folder names that are duplicates
folder_count: Number of duplicate folders
"""
key: str = Field(..., description="Series key (unique identifier)")
folders: List[str] = Field(..., description="List of duplicate folder names")
folder_count: int = Field(..., description="Number of duplicate folders")
class DuplicateFoldersResponse(BaseModel):
"""Response model for duplicate folders listing.
Attributes:
total_groups: Total number of duplicate groups found
duplicate_groups: List of duplicate folder groups
message: Human-readable summary
"""
total_groups: int = Field(..., description="Total number of duplicate groups")
duplicate_groups: List[DuplicateFolderGroup] = Field(
..., description="List of duplicate folder groups"
)
message: str = Field(..., description="Human-readable summary")
@router.get("/duplicate-folders", response_model=DuplicateFoldersResponse)
async def get_duplicate_folders(
_auth: dict = Depends(require_auth),
) -> DuplicateFoldersResponse:
"""List all pre-existing duplicate folder groups.
Scans the anime directory for folders with tvshow.nfo files that
map to the same series key. Returns groups of duplicates for
manual review and cleanup.
Returns:
DuplicateFoldersResponse with groups of duplicate folders
Note:
Not all duplicate folders are safe to merge - some may belong
to different releases (e.g., dubbed vs. subbed). Review carefully
before taking action.
"""
try:
if not settings.anime_directory:
return DuplicateFoldersResponse(
total_groups=0,
duplicate_groups=[],
message="Anime directory not configured",
)
anime_dir = Path(settings.anime_directory)
if not anime_dir.is_dir():
return DuplicateFoldersResponse(
total_groups=0,
duplicate_groups=[],
message=f"Anime directory not found: {anime_dir}",
)
duplicates = _scan_for_pre_existing_duplicates(anime_dir)
groups = [
DuplicateFolderGroup(
key=dup.key,
folders=dup.folders,
folder_count=dup.count,
)
for dup in duplicates
]
if groups:
message = (
f"Found {len(groups)} duplicate group(s). "
"Review carefully - some duplicates may be different releases "
"(e.g., dubbed vs. subbed)."
)
else:
message = "No duplicate folders found."
return DuplicateFoldersResponse(
total_groups=len(groups),
duplicate_groups=groups,
message=message,
)
except Exception as exc:
logger.error("Failed to scan for duplicate folders: %s", str(exc))
raise ServerError(
message=f"Failed to scan for duplicates: {str(exc)}"
) from exc
class AnimeSummary(BaseModel):
"""Summary of an anime series with missing episodes.