feat: add duplicate folder detection and /duplicate-folders API endpoint
- Add DuplicateFolderGroup and DuplicateFoldersResponse Pydantic models - Add /duplicate-folders GET endpoint for listing pre-existing duplicates - Add _scan_for_pre_existing_duplicates() function for NFO-based detection - Add _try_merge_duplicate_group() for auto-merging empty/symlink-only duplicates - Integrate duplicate detection into validate_and_rename_series_folders workflow - Skip rename for flagged duplicates to prevent data loss during merge
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
import logging
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.core.entities.series import Serie
|
||||
from src.config.settings import settings
|
||||
from src.core.utils.key_utils import generate_key_from_folder, is_valid_key
|
||||
from src.server.database.service import AnimeSeriesService
|
||||
from src.server.exceptions import (
|
||||
@@ -26,6 +27,9 @@ from src.server.utils.dependencies import (
|
||||
)
|
||||
from src.server.utils.filesystem import sanitize_folder_name
|
||||
from src.server.utils.validators import validate_filter_value, validate_search_query
|
||||
from src.server.services.folder_rename_service import (
|
||||
_scan_for_pre_existing_duplicates,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -71,6 +75,100 @@ async def get_anime_status(
|
||||
) from exc
|
||||
|
||||
|
||||
class DuplicateFolderGroup(BaseModel):
|
||||
"""A group of duplicate folders for the same series.
|
||||
|
||||
Attributes:
|
||||
key: Series key (provider-assigned unique identifier)
|
||||
folders: List of folder names that are duplicates
|
||||
folder_count: Number of duplicate folders
|
||||
"""
|
||||
key: str = Field(..., description="Series key (unique identifier)")
|
||||
folders: List[str] = Field(..., description="List of duplicate folder names")
|
||||
folder_count: int = Field(..., description="Number of duplicate folders")
|
||||
|
||||
|
||||
class DuplicateFoldersResponse(BaseModel):
|
||||
"""Response model for duplicate folders listing.
|
||||
|
||||
Attributes:
|
||||
total_groups: Total number of duplicate groups found
|
||||
duplicate_groups: List of duplicate folder groups
|
||||
message: Human-readable summary
|
||||
"""
|
||||
total_groups: int = Field(..., description="Total number of duplicate groups")
|
||||
duplicate_groups: List[DuplicateFolderGroup] = Field(
|
||||
..., description="List of duplicate folder groups"
|
||||
)
|
||||
message: str = Field(..., description="Human-readable summary")
|
||||
|
||||
|
||||
@router.get("/duplicate-folders", response_model=DuplicateFoldersResponse)
|
||||
async def get_duplicate_folders(
|
||||
_auth: dict = Depends(require_auth),
|
||||
) -> DuplicateFoldersResponse:
|
||||
"""List all pre-existing duplicate folder groups.
|
||||
|
||||
Scans the anime directory for folders with tvshow.nfo files that
|
||||
map to the same series key. Returns groups of duplicates for
|
||||
manual review and cleanup.
|
||||
|
||||
Returns:
|
||||
DuplicateFoldersResponse with groups of duplicate folders
|
||||
|
||||
Note:
|
||||
Not all duplicate folders are safe to merge - some may belong
|
||||
to different releases (e.g., dubbed vs. subbed). Review carefully
|
||||
before taking action.
|
||||
"""
|
||||
try:
|
||||
if not settings.anime_directory:
|
||||
return DuplicateFoldersResponse(
|
||||
total_groups=0,
|
||||
duplicate_groups=[],
|
||||
message="Anime directory not configured",
|
||||
)
|
||||
|
||||
anime_dir = Path(settings.anime_directory)
|
||||
if not anime_dir.is_dir():
|
||||
return DuplicateFoldersResponse(
|
||||
total_groups=0,
|
||||
duplicate_groups=[],
|
||||
message=f"Anime directory not found: {anime_dir}",
|
||||
)
|
||||
|
||||
duplicates = _scan_for_pre_existing_duplicates(anime_dir)
|
||||
|
||||
groups = [
|
||||
DuplicateFolderGroup(
|
||||
key=dup.key,
|
||||
folders=dup.folders,
|
||||
folder_count=dup.count,
|
||||
)
|
||||
for dup in duplicates
|
||||
]
|
||||
|
||||
if groups:
|
||||
message = (
|
||||
f"Found {len(groups)} duplicate group(s). "
|
||||
"Review carefully - some duplicates may be different releases "
|
||||
"(e.g., dubbed vs. subbed)."
|
||||
)
|
||||
else:
|
||||
message = "No duplicate folders found."
|
||||
|
||||
return DuplicateFoldersResponse(
|
||||
total_groups=len(groups),
|
||||
duplicate_groups=groups,
|
||||
message=message,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to scan for duplicate folders: %s", str(exc))
|
||||
raise ServerError(
|
||||
message=f"Failed to scan for duplicates: {str(exc)}"
|
||||
) from exc
|
||||
|
||||
|
||||
class AnimeSummary(BaseModel):
|
||||
"""Summary of an anime series with missing episodes.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user