feat: Enhanced anime add flow with sanitized folders and targeted scan

- Add sanitize_folder_name utility for filesystem-safe folder names
- Add sanitized_folder property to Serie entity
- Update SerieList.add() to use sanitized display names for folders
- Add scan_single_series() method for targeted episode scanning
- Enhance add_series endpoint: DB save -> folder create -> targeted scan
- Update response to include missing_episodes and total_missing
- Add comprehensive unit tests for new functionality
- Update API tests with proper mock support
This commit is contained in:
2025-12-26 12:49:23 +01:00
parent f28dc756c5
commit 1b7ca7b4da
11 changed files with 1370 additions and 146 deletions

View File

@@ -1,4 +1,5 @@
import logging
import os
import warnings
from typing import Any, List, Optional
@@ -21,6 +22,7 @@ from src.server.utils.dependencies import (
get_series_app,
require_auth,
)
from src.server.utils.filesystem import sanitize_folder_name
logger = logging.getLogger(__name__)
@@ -620,16 +622,20 @@ async def add_series(
_auth: dict = Depends(require_auth),
series_app: Any = Depends(get_series_app),
db: Optional[AsyncSession] = Depends(get_optional_database_session),
anime_service: AnimeService = Depends(get_anime_service),
) -> dict:
"""Add a new series to the library.
"""Add a new series to the library with full initialization.
Extracts the series `key` from the provided link URL.
The `key` is the URL-safe identifier used for all lookups.
The `name` is stored as display metadata along with a
filesystem-friendly `folder` name derived from the name.
This endpoint performs the complete series addition flow:
1. Validates inputs and extracts the series key from the link URL
2. Creates a sanitized folder name from the display name
3. Saves the series to the database (if available)
4. Creates the folder on disk with the sanitized name
5. Triggers a targeted scan for missing episodes (only this series)
Series are saved to the database using AnimeSeriesService when
database is available, falling back to in-memory storage otherwise.
The `key` is the URL-safe identifier used for all lookups.
The `name` is stored as display metadata and used to derive
the filesystem folder name (sanitized for filesystem safety).
Args:
request: Request containing the series link and name.
@@ -638,15 +644,23 @@ async def add_series(
_auth: Ensures the caller is authenticated (value unused)
series_app: Core `SeriesApp` instance provided via dependency
db: Optional database session for async operations
anime_service: AnimeService for scanning operations
Returns:
Dict[str, Any]: Status payload with success message, key, and db_id
Dict[str, Any]: Status payload with:
- status: "success" or "exists"
- message: Human-readable status message
- key: Series unique identifier
- folder: Created folder path
- db_id: Database ID (if saved to DB)
- missing_episodes: Dict of missing episodes by season
- total_missing: Total count of missing episodes
Raises:
HTTPException: If adding the series fails or link is invalid
"""
try:
# Validate inputs
# Step A: Validate inputs
if not request.link or not request.link.strip():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
@@ -679,28 +693,40 @@ async def add_series(
detail="Could not extract series key from link",
)
# Create folder from name (filesystem-friendly)
folder = request.name.strip()
db_id = None
# Step B: Create sanitized folder name from display name
name = request.name.strip()
try:
folder = sanitize_folder_name(name)
except ValueError as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid series name for folder: {str(e)}",
)
# Try to save to database if available
db_id = None
missing_episodes: dict = {}
scan_error: Optional[str] = None
# Step C: Save to database if available
if db is not None:
# Check if series already exists in database
existing = await AnimeSeriesService.get_by_key(db, key)
if existing:
return {
"status": "exists",
"message": f"Series already exists: {request.name}",
"message": f"Series already exists: {name}",
"key": key,
"folder": existing.folder,
"db_id": existing.id
"db_id": existing.id,
"missing_episodes": {},
"total_missing": 0
}
# Save to database using AnimeSeriesService
anime_series = await AnimeSeriesService.create(
db=db,
key=key,
name=request.name.strip(),
name=name,
site="aniworld.to",
folder=folder,
)
@@ -708,41 +734,109 @@ async def add_series(
logger.info(
"Added series to database: %s (key=%s, db_id=%d)",
request.name,
name,
key,
db_id
)
# Also add to in-memory cache if series_app has the list attribute
# Step D: Create folder on disk and add to SerieList
folder_path = None
if series_app and hasattr(series_app, "list"):
serie = Serie(
key=key,
name=request.name.strip(),
name=name,
site="aniworld.to",
folder=folder,
episodeDict={}
)
# Add to in-memory cache
if hasattr(series_app.list, 'keyDict'):
# Direct update without file saving
series_app.list.keyDict[key] = serie
elif hasattr(series_app.list, 'add'):
# Legacy: use add method (may create file with deprecation warning)
# Add to SerieList - this creates the folder with sanitized name
if hasattr(series_app.list, 'add'):
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
series_app.list.add(serie)
folder_path = series_app.list.add(serie, use_sanitized_folder=True)
# Update folder to reflect what was actually created
folder = serie.folder
elif hasattr(series_app.list, 'keyDict'):
# Manual folder creation and cache update
if hasattr(series_app.list, 'directory'):
folder_path = os.path.join(series_app.list.directory, folder)
os.makedirs(folder_path, exist_ok=True)
series_app.list.keyDict[key] = serie
logger.info(
"Created folder for series: %s at %s",
name,
folder_path or folder
)
return {
"status": "success",
"message": f"Successfully added series: {request.name}",
"key": key,
"folder": folder,
"db_id": db_id
# Step E: Trigger targeted scan for missing episodes
try:
if series_app and hasattr(series_app, "scanner"):
missing_episodes = series_app.scanner.scan_single_series(
key=key,
folder=folder
)
logger.info(
"Targeted scan completed for %s: found %d missing episodes",
key,
sum(len(eps) for eps in missing_episodes.values())
)
# Update the serie in keyDict with the missing episodes
if hasattr(series_app, "list") and hasattr(series_app.list, "keyDict"):
if key in series_app.list.keyDict:
series_app.list.keyDict[key].episodeDict = missing_episodes
elif anime_service:
# Fallback to anime_service if scanner not directly available
# Note: This is a lightweight scan, not a full rescan
logger.info(
"Scanner not directly available, "
"skipping targeted scan for %s",
key
)
except Exception as e:
# Scan failure is not critical - series was still added
scan_error = str(e)
logger.warning(
"Targeted scan failed for %s: %s (series still added)",
key,
e
)
# Convert missing episodes keys to strings for JSON serialization
missing_episodes_serializable = {
str(season): episodes
for season, episodes in missing_episodes.items()
}
# Calculate total missing
total_missing = sum(len(eps) for eps in missing_episodes.values())
# Step F: Return response
response = {
"status": "success",
"message": f"Successfully added series: {name}",
"key": key,
"folder": folder_path or folder,
"db_id": db_id,
"missing_episodes": missing_episodes_serializable,
"total_missing": total_missing
}
if scan_error:
response["scan_warning"] = f"Scan partially failed: {scan_error}"
return response
except HTTPException:
raise
except Exception as exc:
logger.error("Failed to add series: %s", exc, exc_info=True)
# Attempt to rollback database entry if folder creation failed
# (This is a best-effort cleanup)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to add series: {str(exc)}",

View File

@@ -0,0 +1,180 @@
"""Filesystem utilities for safe file and folder operations.
This module provides utility functions for safely handling filesystem
operations, including sanitizing folder names and path validation.
Security:
- All functions sanitize inputs to prevent path traversal attacks
- Invalid filesystem characters are removed or replaced
- Unicode characters are preserved for international titles
"""
import os
import re
import unicodedata
from typing import Optional
# Characters that are invalid in filesystem paths across platforms
# Windows: < > : " / \ | ? *
# Linux/Mac: / and null byte
INVALID_PATH_CHARS = '<>:"/\\|?*\x00'
# Additional characters to remove for cleaner folder names
EXTRA_CLEANUP_CHARS = '\r\n\t'
# Maximum folder name length (conservative for cross-platform compatibility)
MAX_FOLDER_NAME_LENGTH = 200
def sanitize_folder_name(
name: str,
replacement: str = "",
max_length: Optional[int] = None,
) -> str:
"""Sanitize a string for use as a filesystem folder name.
Removes or replaces characters that are invalid for filesystems while
preserving Unicode characters (for Japanese/Chinese titles, etc.).
Args:
name: The string to sanitize (e.g., anime display name)
replacement: Character to replace invalid chars with (default: "")
max_length: Maximum length for the result (default: MAX_FOLDER_NAME_LENGTH)
Returns:
str: A filesystem-safe folder name
Raises:
ValueError: If name is None, empty, or results in empty string
Examples:
>>> sanitize_folder_name("Attack on Titan: Final Season")
'Attack on Titan Final Season'
>>> sanitize_folder_name("What If...?")
'What If...'
>>> sanitize_folder_name("Re:Zero")
'ReZero'
>>> sanitize_folder_name("日本語タイトル")
'日本語タイトル'
"""
if name is None:
raise ValueError("Folder name cannot be None")
# Strip leading/trailing whitespace
name = name.strip()
if not name:
raise ValueError("Folder name cannot be empty")
max_len = max_length or MAX_FOLDER_NAME_LENGTH
# Normalize Unicode characters (NFC form for consistency)
name = unicodedata.normalize('NFC', name)
# Remove invalid filesystem characters
for char in INVALID_PATH_CHARS:
name = name.replace(char, replacement)
# Remove extra cleanup characters
for char in EXTRA_CLEANUP_CHARS:
name = name.replace(char, replacement)
# Remove control characters but preserve Unicode
name = ''.join(
char for char in name
if not unicodedata.category(char).startswith('C')
or char == ' ' # Preserve spaces
)
# Collapse multiple consecutive spaces
name = re.sub(r' +', ' ', name)
# Remove leading/trailing dots and whitespace
# (dots at start can make folders hidden on Unix)
name = name.strip('. ')
# Handle edge case: all characters were invalid
if not name:
raise ValueError(
"Folder name contains only invalid characters"
)
# Truncate to max length while avoiding breaking in middle of word
if len(name) > max_len:
# Try to truncate at a word boundary
truncated = name[:max_len]
last_space = truncated.rfind(' ')
if last_space > max_len // 2: # Only if we don't lose too much
truncated = truncated[:last_space]
name = truncated.rstrip()
return name
def is_safe_path(base_path: str, target_path: str) -> bool:
"""Check if target_path is safely within base_path.
Prevents path traversal attacks by ensuring the target path
is actually within the base path after resolution.
Args:
base_path: The base directory that should contain the target
target_path: The path to validate
Returns:
bool: True if target_path is safely within base_path
Example:
>>> is_safe_path("/anime", "/anime/Attack on Titan")
True
>>> is_safe_path("/anime", "/anime/../etc/passwd")
False
"""
# Resolve to absolute paths
base_resolved = os.path.abspath(base_path)
target_resolved = os.path.abspath(target_path)
# Check that target starts with base (with trailing separator)
base_with_sep = base_resolved + os.sep
return (
target_resolved == base_resolved or
target_resolved.startswith(base_with_sep)
)
def create_safe_folder(
base_path: str,
folder_name: str,
exist_ok: bool = True,
) -> str:
"""Create a folder with a sanitized name safely within base_path.
Args:
base_path: Base directory to create folder within
folder_name: Unsanitized folder name
exist_ok: If True, don't raise error if folder exists
Returns:
str: Full path to the created folder
Raises:
ValueError: If resulting path would be outside base_path
OSError: If folder creation fails
"""
# Sanitize the folder name
safe_name = sanitize_folder_name(folder_name)
# Construct full path
full_path = os.path.join(base_path, safe_name)
# Validate path safety
if not is_safe_path(base_path, full_path):
raise ValueError(
f"Folder name '{folder_name}' would create path outside "
f"base directory"
)
# Create the folder
os.makedirs(full_path, exist_ok=exist_ok)
return full_path