Refactor filter configuration with regex validation

- Add regex validation utility for query strings
- Update filter_config_service to use regex validation
- Add comprehensive test coverage for regex validator
- Update exception handling for validation errors
- Update documentation for tasks

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-01 18:17:12 +02:00
parent 445c2c5418
commit 60d9c5b340
6 changed files with 367 additions and 41 deletions

View File

@@ -256,6 +256,56 @@ class FilterInvalidRegexError(BadRequestError):
return {"pattern": self.pattern, "error": self.error}
class FilterRegexTooLongError(BadRequestError):
"""Raised when a regex pattern exceeds the maximum length."""
error_code: str = "filter_regex_too_long"
def __init__(self, pattern: str, max_length: int) -> None:
"""Initialize with the pattern and maximum allowed length.
Args:
pattern: The regex pattern that is too long.
max_length: The maximum allowed length.
"""
self.pattern = pattern
self.max_length = max_length
self.actual_length = len(pattern)
super().__init__(
f"Regex pattern exceeds maximum length of {max_length} characters: "
f"{self.actual_length} provided"
)
def get_error_metadata(self) -> dict[str, str | int | float | bool | None]:
return {
"pattern_length": self.actual_length,
"max_length": self.max_length,
}
class FilterRegexTimeoutError(BadRequestError):
"""Raised when a regex pattern compilation times out (possible ReDoS attack)."""
error_code: str = "filter_regex_timeout"
def __init__(self, pattern: str, timeout_seconds: int) -> None:
"""Initialize with the pattern and timeout value.
Args:
pattern: The regex pattern that timed out.
timeout_seconds: The timeout value in seconds.
"""
self.pattern = pattern
self.timeout_seconds = timeout_seconds
super().__init__(
f"Regex pattern compilation timed out after {timeout_seconds}s "
f"(possible ReDoS attack). Pattern is too complex or causes catastrophic backtracking."
)
def get_error_metadata(self) -> dict[str, str | int | float | bool | None]:
return {"timeout_seconds": self.timeout_seconds}
class JailNotFoundInConfigError(NotFoundError):
"""Raised when the requested jail name is not defined in any config file."""

View File

@@ -118,9 +118,15 @@ async def update_filter(
) -> FilterConfig:
"""Update a filter's ``[Definition]`` fields by writing a ``.local`` override.
All regex patterns are validated before writing. The original ``.conf``
file is never modified. Fields left as ``null`` in the request body are
kept at their current values.
All regex patterns are validated before writing. Validation includes:
- **Length limit**: Patterns must not exceed 1000 characters (prevents DoS)
- **Compilation timeout**: Pattern compilation must complete within 2 seconds
(prevents ReDoS attacks via catastrophic backtracking)
- **Syntax validation**: Patterns must be valid Python regex
The original ``.conf`` file is never modified. Fields left as ``null`` in the
request body are kept at their current values.
Args:
request: FastAPI request object.
@@ -135,8 +141,10 @@ async def update_filter(
Raises:
HTTPException: 400 if *name* contains invalid characters.
HTTPException: 404 if the filter does not exist.
HTTPException: 400 if any regex pattern exceeds 1000 characters.
HTTPException: 400 if any regex pattern times out during compilation (ReDoS).
HTTPException: 422 if any regex pattern fails to compile.
HTTPException: 404 if the filter does not exist.
HTTPException: 500 if writing the ``.local`` file fails.
"""
return await filter_config_service.update_filter(config_dir, socket_path, name, body, do_reload=reload)
@@ -164,6 +172,13 @@ async def create_filter(
shipped ``.conf`` files. Returns 409 if a ``.conf`` or ``.local`` for
the requested name already exists.
All regex patterns are validated before writing. Validation includes:
- **Length limit**: Patterns must not exceed 1000 characters (prevents DoS)
- **Compilation timeout**: Pattern compilation must complete within 2 seconds
(prevents ReDoS attacks via catastrophic backtracking)
- **Syntax validation**: Patterns must be valid Python regex
Args:
request: FastAPI request object.
_auth: Validated session.
@@ -175,6 +190,8 @@ async def create_filter(
Raises:
HTTPException: 400 if the name contains invalid characters.
HTTPException: 400 if any regex pattern exceeds 1000 characters.
HTTPException: 400 if any regex pattern times out during compilation (ReDoS).
HTTPException: 409 if the filter already exists.
HTTPException: 422 if any regex pattern is invalid.
HTTPException: 500 if writing fails.

View File

@@ -21,6 +21,8 @@ from app.exceptions import (
FilterInvalidRegexError,
FilterNotFoundError,
FilterReadonlyError,
FilterRegexTimeoutError,
FilterRegexTooLongError,
JailNotFoundInConfigError,
)
from app.models.config import (
@@ -45,6 +47,7 @@ from app.utils.config_file_utils import (
set_jail_local_key_sync,
)
from app.utils.jail_socket import reload_all
from app.utils.regex_validator import RegexTimeoutError, validate_regex_pattern
log: structlog.stdlib.BoundLogger = structlog.get_logger()
@@ -231,16 +234,30 @@ def _parse_filters_sync(
def _validate_regex_patterns(patterns: list[str]) -> None:
"""Validate each pattern in *patterns* using Python's ``re`` module.
Checks each pattern for:
- Length limit (max 1000 characters)
- Compilation timeout (2 seconds) to prevent ReDoS attacks
- Syntax validity
Args:
patterns: List of regex strings to validate.
Raises:
FilterRegexTooLongError: If any pattern exceeds 1000 characters.
FilterRegexTimeoutError: If compilation times out (possible ReDoS).
FilterInvalidRegexError: If any pattern fails to compile.
"""
for pattern in patterns:
try:
re.compile(pattern)
validate_regex_pattern(pattern)
except ValueError as exc:
# Pattern length exceeded
raise FilterRegexTooLongError(pattern, max_length=1000) from exc
except RegexTimeoutError as exc:
# Pattern compilation timed out
raise FilterRegexTimeoutError(pattern, timeout_seconds=2) from exc
except re.error as exc:
# Pattern syntax error
raise FilterInvalidRegexError(pattern, str(exc)) from exc

View File

@@ -0,0 +1,123 @@
"""Regex pattern validation with security checks against ReDoS attacks.
Provides timeout and complexity limits to prevent catastrophic backtracking
(ReDoS - Regular Expression Denial of Service).
"""
from __future__ import annotations
import re
import signal
from contextlib import contextmanager
from typing import TYPE_CHECKING
import structlog
if TYPE_CHECKING:
from collections.abc import Generator
logger = structlog.get_logger()
# Constants for regex validation
MAX_REGEX_LENGTH = 1000
REGEX_COMPILE_TIMEOUT_SECONDS = 2
class RegexTimeoutError(Exception):
"""Raised when regex compilation exceeds the timeout limit."""
def __init__(self, pattern: str, timeout_seconds: int) -> None:
"""Initialize with the pattern and timeout value.
Args:
pattern: The regex pattern that timed out.
timeout_seconds: The timeout value in seconds.
"""
self.pattern = pattern
self.timeout_seconds = timeout_seconds
super().__init__(
f"Regex pattern compilation timed out after {timeout_seconds}s "
f"(possible ReDoS attack): {pattern!r}"
)
def validate_regex_pattern(pattern: str) -> None:
"""Validate a regex pattern with length and timeout checks.
Validates a regex pattern by:
1. Checking length does not exceed MAX_REGEX_LENGTH characters
2. Attempting compilation with a timeout to prevent ReDoS attacks
Args:
pattern: The regex pattern string to validate.
Raises:
ValueError: If the pattern exceeds maximum length.
RegexTimeoutError: If compilation exceeds the timeout.
re.error: If the pattern is syntactically invalid.
Example:
>>> validate_regex_pattern(r'^[a-z]+$') # OK
>>> validate_regex_pattern('a' * 1001) # Raises ValueError
>>> validate_regex_pattern(r'(a+)+b') # May raise RegexTimeoutError
"""
# Check length first (fast, no timeout needed)
if len(pattern) > MAX_REGEX_LENGTH:
msg = f"Regex pattern exceeds maximum length of {MAX_REGEX_LENGTH} characters: {len(pattern)} provided"
logger.warning("regex_validation_length_exceeded", max_length=MAX_REGEX_LENGTH, actual_length=len(pattern))
raise ValueError(msg)
# Attempt compilation with timeout
try:
with _timeout_context(REGEX_COMPILE_TIMEOUT_SECONDS):
re.compile(pattern)
except TimeoutError as exc:
logger.warning(
"regex_compilation_timeout",
timeout_seconds=REGEX_COMPILE_TIMEOUT_SECONDS,
pattern_preview=pattern[:100],
)
raise RegexTimeoutError(pattern, REGEX_COMPILE_TIMEOUT_SECONDS) from exc
@contextmanager
def _timeout_context(timeout_seconds: int) -> Generator[None, None, None]:
"""Context manager to enforce a timeout using signal.alarm().
Works on Unix-like systems (Linux, macOS, etc.). On Windows or other
platforms where signal.SIGALRM is unavailable, compilation proceeds
without timeout (not ideal, but graceful degradation).
Args:
timeout_seconds: Timeout duration in seconds.
Yields:
None.
Raises:
TimeoutError: If the timeout is exceeded.
Note:
This uses signal.alarm() which is only available on Unix. On Windows,
timeouts are not enforced (limitation of the platform).
"""
# Check if signal.SIGALRM is available (Unix-like systems)
if not hasattr(signal, "SIGALRM"):
# Windows or other platforms without SIGALRM
# Just proceed without timeout (not ideal, but prevents crashes)
yield
return
def _timeout_handler(signum: int, frame: object) -> None:
raise TimeoutError("Timeout exceeded")
# Set up signal handler
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
signal.alarm(timeout_seconds)
try:
yield
finally:
# Always disable the alarm, even if an exception occurred
signal.alarm(0)
signal.signal(signal.SIGALRM, old_handler)