feat: Enhanced anime add flow with sanitized folders and targeted scan
- Add sanitize_folder_name utility for filesystem-safe folder names - Add sanitized_folder property to Serie entity - Update SerieList.add() to use sanitized display names for folders - Add scan_single_series() method for targeted episode scanning - Enhance add_series endpoint: DB save -> folder create -> targeted scan - Update response to include missing_episodes and total_missing - Add comprehensive unit tests for new functionality - Update API tests with proper mock support
This commit is contained in:
180
src/server/utils/filesystem.py
Normal file
180
src/server/utils/filesystem.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Filesystem utilities for safe file and folder operations.
|
||||
|
||||
This module provides utility functions for safely handling filesystem
|
||||
operations, including sanitizing folder names and path validation.
|
||||
|
||||
Security:
|
||||
- All functions sanitize inputs to prevent path traversal attacks
|
||||
- Invalid filesystem characters are removed or replaced
|
||||
- Unicode characters are preserved for international titles
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
# Characters that are invalid in filesystem paths across platforms
|
||||
# Windows: < > : " / \ | ? *
|
||||
# Linux/Mac: / and null byte
|
||||
INVALID_PATH_CHARS = '<>:"/\\|?*\x00'
|
||||
|
||||
# Additional characters to remove for cleaner folder names
|
||||
EXTRA_CLEANUP_CHARS = '\r\n\t'
|
||||
|
||||
# Maximum folder name length (conservative for cross-platform compatibility)
|
||||
MAX_FOLDER_NAME_LENGTH = 200
|
||||
|
||||
|
||||
def sanitize_folder_name(
|
||||
name: str,
|
||||
replacement: str = "",
|
||||
max_length: Optional[int] = None,
|
||||
) -> str:
|
||||
"""Sanitize a string for use as a filesystem folder name.
|
||||
|
||||
Removes or replaces characters that are invalid for filesystems while
|
||||
preserving Unicode characters (for Japanese/Chinese titles, etc.).
|
||||
|
||||
Args:
|
||||
name: The string to sanitize (e.g., anime display name)
|
||||
replacement: Character to replace invalid chars with (default: "")
|
||||
max_length: Maximum length for the result (default: MAX_FOLDER_NAME_LENGTH)
|
||||
|
||||
Returns:
|
||||
str: A filesystem-safe folder name
|
||||
|
||||
Raises:
|
||||
ValueError: If name is None, empty, or results in empty string
|
||||
|
||||
Examples:
|
||||
>>> sanitize_folder_name("Attack on Titan: Final Season")
|
||||
'Attack on Titan Final Season'
|
||||
>>> sanitize_folder_name("What If...?")
|
||||
'What If...'
|
||||
>>> sanitize_folder_name("Re:Zero")
|
||||
'ReZero'
|
||||
>>> sanitize_folder_name("日本語タイトル")
|
||||
'日本語タイトル'
|
||||
"""
|
||||
if name is None:
|
||||
raise ValueError("Folder name cannot be None")
|
||||
|
||||
# Strip leading/trailing whitespace
|
||||
name = name.strip()
|
||||
|
||||
if not name:
|
||||
raise ValueError("Folder name cannot be empty")
|
||||
|
||||
max_len = max_length or MAX_FOLDER_NAME_LENGTH
|
||||
|
||||
# Normalize Unicode characters (NFC form for consistency)
|
||||
name = unicodedata.normalize('NFC', name)
|
||||
|
||||
# Remove invalid filesystem characters
|
||||
for char in INVALID_PATH_CHARS:
|
||||
name = name.replace(char, replacement)
|
||||
|
||||
# Remove extra cleanup characters
|
||||
for char in EXTRA_CLEANUP_CHARS:
|
||||
name = name.replace(char, replacement)
|
||||
|
||||
# Remove control characters but preserve Unicode
|
||||
name = ''.join(
|
||||
char for char in name
|
||||
if not unicodedata.category(char).startswith('C')
|
||||
or char == ' ' # Preserve spaces
|
||||
)
|
||||
|
||||
# Collapse multiple consecutive spaces
|
||||
name = re.sub(r' +', ' ', name)
|
||||
|
||||
# Remove leading/trailing dots and whitespace
|
||||
# (dots at start can make folders hidden on Unix)
|
||||
name = name.strip('. ')
|
||||
|
||||
# Handle edge case: all characters were invalid
|
||||
if not name:
|
||||
raise ValueError(
|
||||
"Folder name contains only invalid characters"
|
||||
)
|
||||
|
||||
# Truncate to max length while avoiding breaking in middle of word
|
||||
if len(name) > max_len:
|
||||
# Try to truncate at a word boundary
|
||||
truncated = name[:max_len]
|
||||
last_space = truncated.rfind(' ')
|
||||
if last_space > max_len // 2: # Only if we don't lose too much
|
||||
truncated = truncated[:last_space]
|
||||
name = truncated.rstrip()
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def is_safe_path(base_path: str, target_path: str) -> bool:
|
||||
"""Check if target_path is safely within base_path.
|
||||
|
||||
Prevents path traversal attacks by ensuring the target path
|
||||
is actually within the base path after resolution.
|
||||
|
||||
Args:
|
||||
base_path: The base directory that should contain the target
|
||||
target_path: The path to validate
|
||||
|
||||
Returns:
|
||||
bool: True if target_path is safely within base_path
|
||||
|
||||
Example:
|
||||
>>> is_safe_path("/anime", "/anime/Attack on Titan")
|
||||
True
|
||||
>>> is_safe_path("/anime", "/anime/../etc/passwd")
|
||||
False
|
||||
"""
|
||||
# Resolve to absolute paths
|
||||
base_resolved = os.path.abspath(base_path)
|
||||
target_resolved = os.path.abspath(target_path)
|
||||
|
||||
# Check that target starts with base (with trailing separator)
|
||||
base_with_sep = base_resolved + os.sep
|
||||
return (
|
||||
target_resolved == base_resolved or
|
||||
target_resolved.startswith(base_with_sep)
|
||||
)
|
||||
|
||||
|
||||
def create_safe_folder(
|
||||
base_path: str,
|
||||
folder_name: str,
|
||||
exist_ok: bool = True,
|
||||
) -> str:
|
||||
"""Create a folder with a sanitized name safely within base_path.
|
||||
|
||||
Args:
|
||||
base_path: Base directory to create folder within
|
||||
folder_name: Unsanitized folder name
|
||||
exist_ok: If True, don't raise error if folder exists
|
||||
|
||||
Returns:
|
||||
str: Full path to the created folder
|
||||
|
||||
Raises:
|
||||
ValueError: If resulting path would be outside base_path
|
||||
OSError: If folder creation fails
|
||||
"""
|
||||
# Sanitize the folder name
|
||||
safe_name = sanitize_folder_name(folder_name)
|
||||
|
||||
# Construct full path
|
||||
full_path = os.path.join(base_path, safe_name)
|
||||
|
||||
# Validate path safety
|
||||
if not is_safe_path(base_path, full_path):
|
||||
raise ValueError(
|
||||
f"Folder name '{folder_name}' would create path outside "
|
||||
f"base directory"
|
||||
)
|
||||
|
||||
# Create the folder
|
||||
os.makedirs(full_path, exist_ok=exist_ok)
|
||||
|
||||
return full_path
|
||||
Reference in New Issue
Block a user