feat: Enhanced anime add flow with sanitized folders and targeted scan

- Add sanitize_folder_name utility for filesystem-safe folder names
- Add sanitized_folder property to Serie entity
- Update SerieList.add() to use sanitized display names for folders
- Add scan_single_series() method for targeted episode scanning
- Enhance add_series endpoint: DB save -> folder create -> targeted scan
- Update response to include missing_episodes and total_missing
- Add comprehensive unit tests for new functionality
- Update API tests with proper mock support
This commit is contained in:
2025-12-26 12:49:23 +01:00
parent f28dc756c5
commit 1b7ca7b4da
11 changed files with 1370 additions and 146 deletions

View File

@@ -0,0 +1,180 @@
"""Filesystem utilities for safe file and folder operations.
This module provides utility functions for safely handling filesystem
operations, including sanitizing folder names and path validation.
Security:
- All functions sanitize inputs to prevent path traversal attacks
- Invalid filesystem characters are removed or replaced
- Unicode characters are preserved for international titles
"""
import os
import re
import unicodedata
from typing import Optional
# Characters that are invalid in filesystem paths across platforms
# Windows: < > : " / \ | ? *
# Linux/Mac: / and null byte
INVALID_PATH_CHARS = '<>:"/\\|?*\x00'
# Additional characters to remove for cleaner folder names
EXTRA_CLEANUP_CHARS = '\r\n\t'
# Maximum folder name length (conservative for cross-platform compatibility)
MAX_FOLDER_NAME_LENGTH = 200
def sanitize_folder_name(
name: str,
replacement: str = "",
max_length: Optional[int] = None,
) -> str:
"""Sanitize a string for use as a filesystem folder name.
Removes or replaces characters that are invalid for filesystems while
preserving Unicode characters (for Japanese/Chinese titles, etc.).
Args:
name: The string to sanitize (e.g., anime display name)
replacement: Character to replace invalid chars with (default: "")
max_length: Maximum length for the result (default: MAX_FOLDER_NAME_LENGTH)
Returns:
str: A filesystem-safe folder name
Raises:
ValueError: If name is None, empty, or results in empty string
Examples:
>>> sanitize_folder_name("Attack on Titan: Final Season")
'Attack on Titan Final Season'
>>> sanitize_folder_name("What If...?")
'What If...'
>>> sanitize_folder_name("Re:Zero")
'ReZero'
>>> sanitize_folder_name("日本語タイトル")
'日本語タイトル'
"""
if name is None:
raise ValueError("Folder name cannot be None")
# Strip leading/trailing whitespace
name = name.strip()
if not name:
raise ValueError("Folder name cannot be empty")
max_len = max_length or MAX_FOLDER_NAME_LENGTH
# Normalize Unicode characters (NFC form for consistency)
name = unicodedata.normalize('NFC', name)
# Remove invalid filesystem characters
for char in INVALID_PATH_CHARS:
name = name.replace(char, replacement)
# Remove extra cleanup characters
for char in EXTRA_CLEANUP_CHARS:
name = name.replace(char, replacement)
# Remove control characters but preserve Unicode
name = ''.join(
char for char in name
if not unicodedata.category(char).startswith('C')
or char == ' ' # Preserve spaces
)
# Collapse multiple consecutive spaces
name = re.sub(r' +', ' ', name)
# Remove leading/trailing dots and whitespace
# (dots at start can make folders hidden on Unix)
name = name.strip('. ')
# Handle edge case: all characters were invalid
if not name:
raise ValueError(
"Folder name contains only invalid characters"
)
# Truncate to max length while avoiding breaking in middle of word
if len(name) > max_len:
# Try to truncate at a word boundary
truncated = name[:max_len]
last_space = truncated.rfind(' ')
if last_space > max_len // 2: # Only if we don't lose too much
truncated = truncated[:last_space]
name = truncated.rstrip()
return name
def is_safe_path(base_path: str, target_path: str) -> bool:
"""Check if target_path is safely within base_path.
Prevents path traversal attacks by ensuring the target path
is actually within the base path after resolution.
Args:
base_path: The base directory that should contain the target
target_path: The path to validate
Returns:
bool: True if target_path is safely within base_path
Example:
>>> is_safe_path("/anime", "/anime/Attack on Titan")
True
>>> is_safe_path("/anime", "/anime/../etc/passwd")
False
"""
# Resolve to absolute paths
base_resolved = os.path.abspath(base_path)
target_resolved = os.path.abspath(target_path)
# Check that target starts with base (with trailing separator)
base_with_sep = base_resolved + os.sep
return (
target_resolved == base_resolved or
target_resolved.startswith(base_with_sep)
)
def create_safe_folder(
base_path: str,
folder_name: str,
exist_ok: bool = True,
) -> str:
"""Create a folder with a sanitized name safely within base_path.
Args:
base_path: Base directory to create folder within
folder_name: Unsanitized folder name
exist_ok: If True, don't raise error if folder exists
Returns:
str: Full path to the created folder
Raises:
ValueError: If resulting path would be outside base_path
OSError: If folder creation fails
"""
# Sanitize the folder name
safe_name = sanitize_folder_name(folder_name)
# Construct full path
full_path = os.path.join(base_path, safe_name)
# Validate path safety
if not is_safe_path(base_path, full_path):
raise ValueError(
f"Folder name '{folder_name}' would create path outside "
f"base directory"
)
# Create the folder
os.makedirs(full_path, exist_ok=exist_ok)
return full_path