From 60d9c5b340b2589f7df8197fe7e4c3919ea5ec65 Mon Sep 17 00:00:00 2001 From: Lukas Date: Fri, 1 May 2026 18:17:12 +0200 Subject: [PATCH] Refactor filter configuration with regex validation - Add regex validation utility for query strings - Update filter_config_service to use regex validation - Add comprehensive test coverage for regex validator - Update exception handling for validation errors - Update documentation for tasks Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Docs/Tasks.md | 36 ---- backend/app/exceptions.py | 50 ++++++ backend/app/routers/filter_config.py | 25 ++- backend/app/services/filter_config_service.py | 19 ++- backend/app/utils/regex_validator.py | 123 ++++++++++++++ .../tests/test_utils/test_regex_validator.py | 155 ++++++++++++++++++ 6 files changed, 367 insertions(+), 41 deletions(-) create mode 100644 backend/app/utils/regex_validator.py create mode 100644 backend/tests/test_utils/test_regex_validator.py diff --git a/Docs/Tasks.md b/Docs/Tasks.md index c2e3961..fe58ee3 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -1,39 +1,3 @@ -## [MEDIUM] No CORS configuration - -**Where found** - -- `backend/app/main.py` — no CORS middleware added - -**Why this is needed** - -If frontend on different origin, cross-origin requests blocked without CORS configuration. - -**Goal** - -Add CORS middleware with proper origin whitelisting. - -**What to do** - -1. Add CORS middleware with specific origin whitelist -2. Make configurable via environment variable -3. Default to localhost for development - -**Possible traps and issues** - -- `allow_origins=["*"]` defeats CORS security -- Credentials require specific origins, not wildcard -- Missing config silently fails in browser - -**Docs changes needed** - -- Update `Docs/Deployment.md` § CORS Configuration - -**Doc references** - -- `Docs/Deployment.md` - ---- - ## [MEDIUM] Input validation missing for regex patterns (ReDoS) **Where found** diff --git a/backend/app/exceptions.py b/backend/app/exceptions.py index 7994052..79bd8e2 100644 --- a/backend/app/exceptions.py +++ b/backend/app/exceptions.py @@ -256,6 +256,56 @@ class FilterInvalidRegexError(BadRequestError): return {"pattern": self.pattern, "error": self.error} +class FilterRegexTooLongError(BadRequestError): + """Raised when a regex pattern exceeds the maximum length.""" + + error_code: str = "filter_regex_too_long" + + def __init__(self, pattern: str, max_length: int) -> None: + """Initialize with the pattern and maximum allowed length. + + Args: + pattern: The regex pattern that is too long. + max_length: The maximum allowed length. + """ + self.pattern = pattern + self.max_length = max_length + self.actual_length = len(pattern) + super().__init__( + f"Regex pattern exceeds maximum length of {max_length} characters: " + f"{self.actual_length} provided" + ) + + def get_error_metadata(self) -> dict[str, str | int | float | bool | None]: + return { + "pattern_length": self.actual_length, + "max_length": self.max_length, + } + + +class FilterRegexTimeoutError(BadRequestError): + """Raised when a regex pattern compilation times out (possible ReDoS attack).""" + + error_code: str = "filter_regex_timeout" + + def __init__(self, pattern: str, timeout_seconds: int) -> None: + """Initialize with the pattern and timeout value. + + Args: + pattern: The regex pattern that timed out. + timeout_seconds: The timeout value in seconds. + """ + self.pattern = pattern + self.timeout_seconds = timeout_seconds + super().__init__( + f"Regex pattern compilation timed out after {timeout_seconds}s " + f"(possible ReDoS attack). Pattern is too complex or causes catastrophic backtracking." + ) + + def get_error_metadata(self) -> dict[str, str | int | float | bool | None]: + return {"timeout_seconds": self.timeout_seconds} + + class JailNotFoundInConfigError(NotFoundError): """Raised when the requested jail name is not defined in any config file.""" diff --git a/backend/app/routers/filter_config.py b/backend/app/routers/filter_config.py index 297cf75..3863417 100644 --- a/backend/app/routers/filter_config.py +++ b/backend/app/routers/filter_config.py @@ -118,9 +118,15 @@ async def update_filter( ) -> FilterConfig: """Update a filter's ``[Definition]`` fields by writing a ``.local`` override. - All regex patterns are validated before writing. The original ``.conf`` - file is never modified. Fields left as ``null`` in the request body are - kept at their current values. + All regex patterns are validated before writing. Validation includes: + + - **Length limit**: Patterns must not exceed 1000 characters (prevents DoS) + - **Compilation timeout**: Pattern compilation must complete within 2 seconds + (prevents ReDoS attacks via catastrophic backtracking) + - **Syntax validation**: Patterns must be valid Python regex + + The original ``.conf`` file is never modified. Fields left as ``null`` in the + request body are kept at their current values. Args: request: FastAPI request object. @@ -135,8 +141,10 @@ async def update_filter( Raises: HTTPException: 400 if *name* contains invalid characters. - HTTPException: 404 if the filter does not exist. + HTTPException: 400 if any regex pattern exceeds 1000 characters. + HTTPException: 400 if any regex pattern times out during compilation (ReDoS). HTTPException: 422 if any regex pattern fails to compile. + HTTPException: 404 if the filter does not exist. HTTPException: 500 if writing the ``.local`` file fails. """ return await filter_config_service.update_filter(config_dir, socket_path, name, body, do_reload=reload) @@ -164,6 +172,13 @@ async def create_filter( shipped ``.conf`` files. Returns 409 if a ``.conf`` or ``.local`` for the requested name already exists. + All regex patterns are validated before writing. Validation includes: + + - **Length limit**: Patterns must not exceed 1000 characters (prevents DoS) + - **Compilation timeout**: Pattern compilation must complete within 2 seconds + (prevents ReDoS attacks via catastrophic backtracking) + - **Syntax validation**: Patterns must be valid Python regex + Args: request: FastAPI request object. _auth: Validated session. @@ -175,6 +190,8 @@ async def create_filter( Raises: HTTPException: 400 if the name contains invalid characters. + HTTPException: 400 if any regex pattern exceeds 1000 characters. + HTTPException: 400 if any regex pattern times out during compilation (ReDoS). HTTPException: 409 if the filter already exists. HTTPException: 422 if any regex pattern is invalid. HTTPException: 500 if writing fails. diff --git a/backend/app/services/filter_config_service.py b/backend/app/services/filter_config_service.py index ef305e7..71b8f45 100644 --- a/backend/app/services/filter_config_service.py +++ b/backend/app/services/filter_config_service.py @@ -21,6 +21,8 @@ from app.exceptions import ( FilterInvalidRegexError, FilterNotFoundError, FilterReadonlyError, + FilterRegexTimeoutError, + FilterRegexTooLongError, JailNotFoundInConfigError, ) from app.models.config import ( @@ -45,6 +47,7 @@ from app.utils.config_file_utils import ( set_jail_local_key_sync, ) from app.utils.jail_socket import reload_all +from app.utils.regex_validator import RegexTimeoutError, validate_regex_pattern log: structlog.stdlib.BoundLogger = structlog.get_logger() @@ -231,16 +234,30 @@ def _parse_filters_sync( def _validate_regex_patterns(patterns: list[str]) -> None: """Validate each pattern in *patterns* using Python's ``re`` module. + Checks each pattern for: + - Length limit (max 1000 characters) + - Compilation timeout (2 seconds) to prevent ReDoS attacks + - Syntax validity + Args: patterns: List of regex strings to validate. Raises: + FilterRegexTooLongError: If any pattern exceeds 1000 characters. + FilterRegexTimeoutError: If compilation times out (possible ReDoS). FilterInvalidRegexError: If any pattern fails to compile. """ for pattern in patterns: try: - re.compile(pattern) + validate_regex_pattern(pattern) + except ValueError as exc: + # Pattern length exceeded + raise FilterRegexTooLongError(pattern, max_length=1000) from exc + except RegexTimeoutError as exc: + # Pattern compilation timed out + raise FilterRegexTimeoutError(pattern, timeout_seconds=2) from exc except re.error as exc: + # Pattern syntax error raise FilterInvalidRegexError(pattern, str(exc)) from exc diff --git a/backend/app/utils/regex_validator.py b/backend/app/utils/regex_validator.py new file mode 100644 index 0000000..68610b6 --- /dev/null +++ b/backend/app/utils/regex_validator.py @@ -0,0 +1,123 @@ +"""Regex pattern validation with security checks against ReDoS attacks. + +Provides timeout and complexity limits to prevent catastrophic backtracking +(ReDoS - Regular Expression Denial of Service). +""" + +from __future__ import annotations + +import re +import signal +from contextlib import contextmanager +from typing import TYPE_CHECKING + +import structlog + +if TYPE_CHECKING: + from collections.abc import Generator + +logger = structlog.get_logger() + +# Constants for regex validation +MAX_REGEX_LENGTH = 1000 +REGEX_COMPILE_TIMEOUT_SECONDS = 2 + + +class RegexTimeoutError(Exception): + """Raised when regex compilation exceeds the timeout limit.""" + + def __init__(self, pattern: str, timeout_seconds: int) -> None: + """Initialize with the pattern and timeout value. + + Args: + pattern: The regex pattern that timed out. + timeout_seconds: The timeout value in seconds. + """ + self.pattern = pattern + self.timeout_seconds = timeout_seconds + super().__init__( + f"Regex pattern compilation timed out after {timeout_seconds}s " + f"(possible ReDoS attack): {pattern!r}" + ) + + +def validate_regex_pattern(pattern: str) -> None: + """Validate a regex pattern with length and timeout checks. + + Validates a regex pattern by: + 1. Checking length does not exceed MAX_REGEX_LENGTH characters + 2. Attempting compilation with a timeout to prevent ReDoS attacks + + Args: + pattern: The regex pattern string to validate. + + Raises: + ValueError: If the pattern exceeds maximum length. + RegexTimeoutError: If compilation exceeds the timeout. + re.error: If the pattern is syntactically invalid. + + Example: + >>> validate_regex_pattern(r'^[a-z]+$') # OK + >>> validate_regex_pattern('a' * 1001) # Raises ValueError + >>> validate_regex_pattern(r'(a+)+b') # May raise RegexTimeoutError + """ + # Check length first (fast, no timeout needed) + if len(pattern) > MAX_REGEX_LENGTH: + msg = f"Regex pattern exceeds maximum length of {MAX_REGEX_LENGTH} characters: {len(pattern)} provided" + logger.warning("regex_validation_length_exceeded", max_length=MAX_REGEX_LENGTH, actual_length=len(pattern)) + raise ValueError(msg) + + # Attempt compilation with timeout + try: + with _timeout_context(REGEX_COMPILE_TIMEOUT_SECONDS): + re.compile(pattern) + except TimeoutError as exc: + logger.warning( + "regex_compilation_timeout", + timeout_seconds=REGEX_COMPILE_TIMEOUT_SECONDS, + pattern_preview=pattern[:100], + ) + raise RegexTimeoutError(pattern, REGEX_COMPILE_TIMEOUT_SECONDS) from exc + + +@contextmanager +def _timeout_context(timeout_seconds: int) -> Generator[None, None, None]: + """Context manager to enforce a timeout using signal.alarm(). + + Works on Unix-like systems (Linux, macOS, etc.). On Windows or other + platforms where signal.SIGALRM is unavailable, compilation proceeds + without timeout (not ideal, but graceful degradation). + + Args: + timeout_seconds: Timeout duration in seconds. + + Yields: + None. + + Raises: + TimeoutError: If the timeout is exceeded. + + Note: + This uses signal.alarm() which is only available on Unix. On Windows, + timeouts are not enforced (limitation of the platform). + """ + # Check if signal.SIGALRM is available (Unix-like systems) + if not hasattr(signal, "SIGALRM"): + # Windows or other platforms without SIGALRM + # Just proceed without timeout (not ideal, but prevents crashes) + yield + return + + def _timeout_handler(signum: int, frame: object) -> None: + raise TimeoutError("Timeout exceeded") + + # Set up signal handler + old_handler = signal.signal(signal.SIGALRM, _timeout_handler) + signal.alarm(timeout_seconds) + + try: + yield + finally: + # Always disable the alarm, even if an exception occurred + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) diff --git a/backend/tests/test_utils/test_regex_validator.py b/backend/tests/test_utils/test_regex_validator.py new file mode 100644 index 0000000..2395433 --- /dev/null +++ b/backend/tests/test_utils/test_regex_validator.py @@ -0,0 +1,155 @@ +"""Tests for regex pattern validation with ReDoS protection.""" + +from __future__ import annotations + +import re + +import pytest + +from app.utils.regex_validator import ( + MAX_REGEX_LENGTH, + REGEX_COMPILE_TIMEOUT_SECONDS, + RegexTimeoutError, + validate_regex_pattern, +) + + +class TestValidateRegexPattern: + """Tests for validate_regex_pattern function.""" + + def test_valid_simple_pattern(self) -> None: + """Valid simple patterns should compile without error.""" + validate_regex_pattern(r"^[a-z]+$") + validate_regex_pattern(r"\d{3}-\d{3}-\d{4}") + validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}") + + def test_valid_complex_pattern(self) -> None: + """Valid complex patterns should compile without error.""" + validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$") + validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") + validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$") + + def test_pattern_exceeds_max_length(self) -> None: + """Patterns exceeding MAX_REGEX_LENGTH should raise ValueError.""" + # Create pattern exactly at max length (should work) + max_pattern = "a" * MAX_REGEX_LENGTH + validate_regex_pattern(max_pattern) + + # Create pattern exceeding max length + too_long = "a" * (MAX_REGEX_LENGTH + 1) + with pytest.raises(ValueError, match="exceeds maximum length"): + validate_regex_pattern(too_long) + + def test_pattern_far_exceeds_max_length(self) -> None: + """Patterns far exceeding max length should raise ValueError.""" + too_long = "a" * (MAX_REGEX_LENGTH * 2) + with pytest.raises(ValueError, match="exceeds maximum length"): + validate_regex_pattern(too_long) + + def test_invalid_syntax_raises_error(self) -> None: + """Patterns with invalid syntax should raise re.error.""" + with pytest.raises(re.error): + validate_regex_pattern(r"[unclosed") + + with pytest.raises(re.error): + validate_regex_pattern(r"(?P None: + """Empty patterns should compile without error.""" + validate_regex_pattern("") + + def test_special_characters_allowed(self) -> None: + """Patterns with special regex characters should work.""" + validate_regex_pattern(r"\b\w+\b") + validate_regex_pattern(r"(?:foo|bar|baz)") + validate_regex_pattern(r"(?P\w+)") + + def test_pattern_length_validation_is_first(self) -> None: + """Length validation should happen before compilation (faster).""" + # Create a pattern that is too long and also invalid + # Should raise ValueError for length, not re.error for syntax + too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed" + with pytest.raises(ValueError, match="exceeds maximum length"): + validate_regex_pattern(too_long_and_invalid) + + def test_common_fail2ban_patterns(self) -> None: + """Common fail2ban regex patterns should validate correctly.""" + # These are real patterns from fail2ban filters + common_patterns = [ + r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from ", + r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* ", + r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from ", + r"^%(__prefix_line)s(?:error|warn).*Received disconnect from ", + ] + for pattern in common_patterns: + validate_regex_pattern(pattern) + + def test_pattern_with_lookahead_lookbehind(self) -> None: + """Patterns with lookahead/lookbehind should work.""" + validate_regex_pattern(r"(? None: + """RegexTimeoutError should have a descriptive message.""" + pattern = r"(a+)+b" + timeout_seconds = 2 + exc = RegexTimeoutError(pattern, timeout_seconds) + assert f"{timeout_seconds}s" in str(exc) + assert "ReDoS" in str(exc) + assert pattern in str(exc) + + def test_regex_timeout_error_attributes(self) -> None: + """RegexTimeoutError should store pattern and timeout.""" + pattern = r"(a+)+b" + timeout_seconds = 2 + exc = RegexTimeoutError(pattern, timeout_seconds) + assert exc.pattern == pattern + assert exc.timeout_seconds == timeout_seconds + + +class TestValidateRegexPatternEdgeCases: + """Test edge cases and boundary conditions.""" + + def test_pattern_with_null_bytes(self) -> None: + """Patterns with null bytes should still validate (if compilable).""" + # This may or may not work depending on Python version + # but shouldn't cause a crash + try: + validate_regex_pattern("a\x00b") + except (ValueError, re.error): + pass # Either error is acceptable + + def test_pattern_with_unicode(self) -> None: + """Patterns with Unicode characters should work.""" + validate_regex_pattern(r"[α-ω]+") + validate_regex_pattern(r"café") + validate_regex_pattern(r"日本語") + + def test_unicode_pattern_within_length_limit(self) -> None: + """Unicode patterns should count by character, not bytes.""" + # Create a long unicode pattern that's under character limit + unicode_pattern = "ア" * (MAX_REGEX_LENGTH - 1) + validate_regex_pattern(unicode_pattern) + + # Exceeding character limit should fail + unicode_pattern_long = "ア" * (MAX_REGEX_LENGTH + 1) + with pytest.raises(ValueError, match="exceeds maximum length"): + validate_regex_pattern(unicode_pattern_long) + + def test_pattern_repeated_at_boundary(self) -> None: + """Pattern at exact boundary should work.""" + boundary_pattern = "a" * MAX_REGEX_LENGTH + validate_regex_pattern(boundary_pattern) + + just_over = "a" * (MAX_REGEX_LENGTH + 1) + with pytest.raises(ValueError): + validate_regex_pattern(just_over)