- Add regex validation utility for query strings - Update filter_config_service to use regex validation - Add comprehensive test coverage for regex validator - Update exception handling for validation errors - Update documentation for tasks Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
156 lines
6.4 KiB
Python
156 lines
6.4 KiB
Python
"""Tests for regex pattern validation with ReDoS protection."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
|
||
import pytest
|
||
|
||
from app.utils.regex_validator import (
|
||
MAX_REGEX_LENGTH,
|
||
REGEX_COMPILE_TIMEOUT_SECONDS,
|
||
RegexTimeoutError,
|
||
validate_regex_pattern,
|
||
)
|
||
|
||
|
||
class TestValidateRegexPattern:
|
||
"""Tests for validate_regex_pattern function."""
|
||
|
||
def test_valid_simple_pattern(self) -> None:
|
||
"""Valid simple patterns should compile without error."""
|
||
validate_regex_pattern(r"^[a-z]+$")
|
||
validate_regex_pattern(r"\d{3}-\d{3}-\d{4}")
|
||
validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
|
||
|
||
def test_valid_complex_pattern(self) -> None:
|
||
"""Valid complex patterns should compile without error."""
|
||
validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$")
|
||
validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
||
validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$")
|
||
|
||
def test_pattern_exceeds_max_length(self) -> None:
|
||
"""Patterns exceeding MAX_REGEX_LENGTH should raise ValueError."""
|
||
# Create pattern exactly at max length (should work)
|
||
max_pattern = "a" * MAX_REGEX_LENGTH
|
||
validate_regex_pattern(max_pattern)
|
||
|
||
# Create pattern exceeding max length
|
||
too_long = "a" * (MAX_REGEX_LENGTH + 1)
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(too_long)
|
||
|
||
def test_pattern_far_exceeds_max_length(self) -> None:
|
||
"""Patterns far exceeding max length should raise ValueError."""
|
||
too_long = "a" * (MAX_REGEX_LENGTH * 2)
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(too_long)
|
||
|
||
def test_invalid_syntax_raises_error(self) -> None:
|
||
"""Patterns with invalid syntax should raise re.error."""
|
||
with pytest.raises(re.error):
|
||
validate_regex_pattern(r"[unclosed")
|
||
|
||
with pytest.raises(re.error):
|
||
validate_regex_pattern(r"(?P<incomplete")
|
||
|
||
with pytest.raises(re.error):
|
||
validate_regex_pattern(r"(unclosed")
|
||
|
||
def test_empty_pattern(self) -> None:
|
||
"""Empty patterns should compile without error."""
|
||
validate_regex_pattern("")
|
||
|
||
def test_special_characters_allowed(self) -> None:
|
||
"""Patterns with special regex characters should work."""
|
||
validate_regex_pattern(r"\b\w+\b")
|
||
validate_regex_pattern(r"(?:foo|bar|baz)")
|
||
validate_regex_pattern(r"(?P<name>\w+)")
|
||
|
||
def test_pattern_length_validation_is_first(self) -> None:
|
||
"""Length validation should happen before compilation (faster)."""
|
||
# Create a pattern that is too long and also invalid
|
||
# Should raise ValueError for length, not re.error for syntax
|
||
too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed"
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(too_long_and_invalid)
|
||
|
||
def test_common_fail2ban_patterns(self) -> None:
|
||
"""Common fail2ban regex patterns should validate correctly."""
|
||
# These are real patterns from fail2ban filters
|
||
common_patterns = [
|
||
r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from <HOST>",
|
||
r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* <HOST>",
|
||
r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from <HOST>",
|
||
r"^%(__prefix_line)s(?:error|warn).*Received disconnect from <HOST>",
|
||
]
|
||
for pattern in common_patterns:
|
||
validate_regex_pattern(pattern)
|
||
|
||
def test_pattern_with_lookahead_lookbehind(self) -> None:
|
||
"""Patterns with lookahead/lookbehind should work."""
|
||
validate_regex_pattern(r"(?<!abc)def") # Negative lookbehind
|
||
validate_regex_pattern(r"abc(?!def)") # Negative lookahead
|
||
validate_regex_pattern(r"(?<=abc)def") # Positive lookbehind
|
||
validate_regex_pattern(r"abc(?=def)") # Positive lookahead
|
||
|
||
|
||
class TestRegexTimeoutError:
|
||
"""Tests for RegexTimeoutError exception class."""
|
||
|
||
def test_regex_timeout_error_message(self) -> None:
|
||
"""RegexTimeoutError should have a descriptive message."""
|
||
pattern = r"(a+)+b"
|
||
timeout_seconds = 2
|
||
exc = RegexTimeoutError(pattern, timeout_seconds)
|
||
assert f"{timeout_seconds}s" in str(exc)
|
||
assert "ReDoS" in str(exc)
|
||
assert pattern in str(exc)
|
||
|
||
def test_regex_timeout_error_attributes(self) -> None:
|
||
"""RegexTimeoutError should store pattern and timeout."""
|
||
pattern = r"(a+)+b"
|
||
timeout_seconds = 2
|
||
exc = RegexTimeoutError(pattern, timeout_seconds)
|
||
assert exc.pattern == pattern
|
||
assert exc.timeout_seconds == timeout_seconds
|
||
|
||
|
||
class TestValidateRegexPatternEdgeCases:
|
||
"""Test edge cases and boundary conditions."""
|
||
|
||
def test_pattern_with_null_bytes(self) -> None:
|
||
"""Patterns with null bytes should still validate (if compilable)."""
|
||
# This may or may not work depending on Python version
|
||
# but shouldn't cause a crash
|
||
try:
|
||
validate_regex_pattern("a\x00b")
|
||
except (ValueError, re.error):
|
||
pass # Either error is acceptable
|
||
|
||
def test_pattern_with_unicode(self) -> None:
|
||
"""Patterns with Unicode characters should work."""
|
||
validate_regex_pattern(r"[α-ω]+")
|
||
validate_regex_pattern(r"café")
|
||
validate_regex_pattern(r"日本語")
|
||
|
||
def test_unicode_pattern_within_length_limit(self) -> None:
|
||
"""Unicode patterns should count by character, not bytes."""
|
||
# Create a long unicode pattern that's under character limit
|
||
unicode_pattern = "ア" * (MAX_REGEX_LENGTH - 1)
|
||
validate_regex_pattern(unicode_pattern)
|
||
|
||
# Exceeding character limit should fail
|
||
unicode_pattern_long = "ア" * (MAX_REGEX_LENGTH + 1)
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(unicode_pattern_long)
|
||
|
||
def test_pattern_repeated_at_boundary(self) -> None:
|
||
"""Pattern at exact boundary should work."""
|
||
boundary_pattern = "a" * MAX_REGEX_LENGTH
|
||
validate_regex_pattern(boundary_pattern)
|
||
|
||
just_over = "a" * (MAX_REGEX_LENGTH + 1)
|
||
with pytest.raises(ValueError):
|
||
validate_regex_pattern(just_over)
|