Refactor filter configuration with regex validation

- Add regex validation utility for query strings
- Update filter_config_service to use regex validation
- Add comprehensive test coverage for regex validator
- Update exception handling for validation errors
- Update documentation for tasks

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-01 18:17:12 +02:00
parent 445c2c5418
commit 60d9c5b340
6 changed files with 367 additions and 41 deletions

View File

@@ -0,0 +1,155 @@
"""Tests for regex pattern validation with ReDoS protection."""
from __future__ import annotations
import re
import pytest
from app.utils.regex_validator import (
MAX_REGEX_LENGTH,
REGEX_COMPILE_TIMEOUT_SECONDS,
RegexTimeoutError,
validate_regex_pattern,
)
class TestValidateRegexPattern:
"""Tests for validate_regex_pattern function."""
def test_valid_simple_pattern(self) -> None:
"""Valid simple patterns should compile without error."""
validate_regex_pattern(r"^[a-z]+$")
validate_regex_pattern(r"\d{3}-\d{3}-\d{4}")
validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
def test_valid_complex_pattern(self) -> None:
"""Valid complex patterns should compile without error."""
validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$")
validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$")
def test_pattern_exceeds_max_length(self) -> None:
"""Patterns exceeding MAX_REGEX_LENGTH should raise ValueError."""
# Create pattern exactly at max length (should work)
max_pattern = "a" * MAX_REGEX_LENGTH
validate_regex_pattern(max_pattern)
# Create pattern exceeding max length
too_long = "a" * (MAX_REGEX_LENGTH + 1)
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(too_long)
def test_pattern_far_exceeds_max_length(self) -> None:
"""Patterns far exceeding max length should raise ValueError."""
too_long = "a" * (MAX_REGEX_LENGTH * 2)
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(too_long)
def test_invalid_syntax_raises_error(self) -> None:
"""Patterns with invalid syntax should raise re.error."""
with pytest.raises(re.error):
validate_regex_pattern(r"[unclosed")
with pytest.raises(re.error):
validate_regex_pattern(r"(?P<incomplete")
with pytest.raises(re.error):
validate_regex_pattern(r"(unclosed")
def test_empty_pattern(self) -> None:
"""Empty patterns should compile without error."""
validate_regex_pattern("")
def test_special_characters_allowed(self) -> None:
"""Patterns with special regex characters should work."""
validate_regex_pattern(r"\b\w+\b")
validate_regex_pattern(r"(?:foo|bar|baz)")
validate_regex_pattern(r"(?P<name>\w+)")
def test_pattern_length_validation_is_first(self) -> None:
"""Length validation should happen before compilation (faster)."""
# Create a pattern that is too long and also invalid
# Should raise ValueError for length, not re.error for syntax
too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed"
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(too_long_and_invalid)
def test_common_fail2ban_patterns(self) -> None:
"""Common fail2ban regex patterns should validate correctly."""
# These are real patterns from fail2ban filters
common_patterns = [
r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from <HOST>",
r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* <HOST>",
r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from <HOST>",
r"^%(__prefix_line)s(?:error|warn).*Received disconnect from <HOST>",
]
for pattern in common_patterns:
validate_regex_pattern(pattern)
def test_pattern_with_lookahead_lookbehind(self) -> None:
"""Patterns with lookahead/lookbehind should work."""
validate_regex_pattern(r"(?<!abc)def") # Negative lookbehind
validate_regex_pattern(r"abc(?!def)") # Negative lookahead
validate_regex_pattern(r"(?<=abc)def") # Positive lookbehind
validate_regex_pattern(r"abc(?=def)") # Positive lookahead
class TestRegexTimeoutError:
"""Tests for RegexTimeoutError exception class."""
def test_regex_timeout_error_message(self) -> None:
"""RegexTimeoutError should have a descriptive message."""
pattern = r"(a+)+b"
timeout_seconds = 2
exc = RegexTimeoutError(pattern, timeout_seconds)
assert f"{timeout_seconds}s" in str(exc)
assert "ReDoS" in str(exc)
assert pattern in str(exc)
def test_regex_timeout_error_attributes(self) -> None:
"""RegexTimeoutError should store pattern and timeout."""
pattern = r"(a+)+b"
timeout_seconds = 2
exc = RegexTimeoutError(pattern, timeout_seconds)
assert exc.pattern == pattern
assert exc.timeout_seconds == timeout_seconds
class TestValidateRegexPatternEdgeCases:
"""Test edge cases and boundary conditions."""
def test_pattern_with_null_bytes(self) -> None:
"""Patterns with null bytes should still validate (if compilable)."""
# This may or may not work depending on Python version
# but shouldn't cause a crash
try:
validate_regex_pattern("a\x00b")
except (ValueError, re.error):
pass # Either error is acceptable
def test_pattern_with_unicode(self) -> None:
"""Patterns with Unicode characters should work."""
validate_regex_pattern(r"[α-ω]+")
validate_regex_pattern(r"café")
validate_regex_pattern(r"日本語")
def test_unicode_pattern_within_length_limit(self) -> None:
"""Unicode patterns should count by character, not bytes."""
# Create a long unicode pattern that's under character limit
unicode_pattern = "" * (MAX_REGEX_LENGTH - 1)
validate_regex_pattern(unicode_pattern)
# Exceeding character limit should fail
unicode_pattern_long = "" * (MAX_REGEX_LENGTH + 1)
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(unicode_pattern_long)
def test_pattern_repeated_at_boundary(self) -> None:
"""Pattern at exact boundary should work."""
boundary_pattern = "a" * MAX_REGEX_LENGTH
validate_regex_pattern(boundary_pattern)
just_over = "a" * (MAX_REGEX_LENGTH + 1)
with pytest.raises(ValueError):
validate_regex_pattern(just_over)