Detect catastrophic backtracking patterns before regex compilation using regexploit library. Add ReDoSDetectedError exception and _MINIMUM_STARRINESS threshold (>=3) to catch dangerous patterns like (a+)+b. Update pyproject.toml deps, add tests for detection.
211 lines
8.7 KiB
Python
211 lines
8.7 KiB
Python
"""Tests for regex pattern validation with ReDoS protection."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import re
|
||
|
||
import pytest
|
||
|
||
from app.utils.regex_validator import (
|
||
MAX_REGEX_LENGTH,
|
||
ReDoSDetectedError,
|
||
RegexTimeoutError,
|
||
validate_regex_pattern,
|
||
)
|
||
|
||
|
||
class TestValidateRegexPattern:
|
||
"""Tests for validate_regex_pattern function."""
|
||
|
||
def test_valid_simple_pattern(self) -> None:
|
||
"""Valid simple patterns should compile without error."""
|
||
validate_regex_pattern(r"^[a-z]+$")
|
||
validate_regex_pattern(r"\d{3}-\d{3}-\d{4}")
|
||
validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
|
||
|
||
def test_valid_complex_pattern(self) -> None:
|
||
"""Valid complex patterns should compile without error."""
|
||
validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$")
|
||
validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
||
validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$")
|
||
|
||
def test_pattern_exceeds_max_length(self) -> None:
|
||
"""Patterns exceeding MAX_REGEX_LENGTH should raise ValueError."""
|
||
# Create pattern exactly at max length (should work)
|
||
max_pattern = "a" * MAX_REGEX_LENGTH
|
||
validate_regex_pattern(max_pattern)
|
||
|
||
# Create pattern exceeding max length
|
||
too_long = "a" * (MAX_REGEX_LENGTH + 1)
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(too_long)
|
||
|
||
def test_pattern_far_exceeds_max_length(self) -> None:
|
||
"""Patterns far exceeding max length should raise ValueError."""
|
||
too_long = "a" * (MAX_REGEX_LENGTH * 2)
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(too_long)
|
||
|
||
def test_invalid_syntax_raises_error(self) -> None:
|
||
"""Patterns with invalid syntax should raise re.error."""
|
||
with pytest.raises(re.error):
|
||
validate_regex_pattern(r"[unclosed")
|
||
|
||
with pytest.raises(re.error):
|
||
validate_regex_pattern(r"(?P<incomplete")
|
||
|
||
with pytest.raises(re.error):
|
||
validate_regex_pattern(r"(unclosed")
|
||
|
||
def test_empty_pattern(self) -> None:
|
||
"""Empty patterns should compile without error."""
|
||
validate_regex_pattern("")
|
||
|
||
def test_special_characters_allowed(self) -> None:
|
||
"""Patterns with special regex characters should work."""
|
||
validate_regex_pattern(r"\b\w+\b")
|
||
validate_regex_pattern(r"(?:foo|bar|baz)")
|
||
validate_regex_pattern(r"(?P<name>\w+)")
|
||
|
||
def test_pattern_length_validation_is_first(self) -> None:
|
||
"""Length validation should happen before compilation (faster)."""
|
||
# Create a pattern that is too long and also invalid
|
||
# Should raise ValueError for length, not re.error for syntax
|
||
too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed"
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(too_long_and_invalid)
|
||
|
||
def test_common_fail2ban_patterns(self) -> None:
|
||
"""Common fail2ban regex patterns should validate correctly."""
|
||
# These are real patterns from fail2ban filters
|
||
common_patterns = [
|
||
r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from <HOST>",
|
||
r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* <HOST>",
|
||
r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from <HOST>",
|
||
r"^%(__prefix_line)s(?:error|warn).*Received disconnect from <HOST>",
|
||
]
|
||
for pattern in common_patterns:
|
||
validate_regex_pattern(pattern)
|
||
|
||
def test_pattern_with_lookahead_lookbehind(self) -> None:
|
||
"""Patterns with lookahead/lookbehind should work."""
|
||
validate_regex_pattern(r"(?<!abc)def") # Negative lookbehind
|
||
validate_regex_pattern(r"abc(?!def)") # Negative lookahead
|
||
validate_regex_pattern(r"(?<=abc)def") # Positive lookbehind
|
||
validate_regex_pattern(r"abc(?=def)") # Positive lookahead
|
||
|
||
|
||
class TestRegexTimeoutError:
|
||
"""Tests for RegexTimeoutError exception class."""
|
||
|
||
def test_regex_timeout_error_message(self) -> None:
|
||
"""RegexTimeoutError should have a descriptive message."""
|
||
pattern = r"(a+)+b"
|
||
timeout_seconds = 2
|
||
exc = RegexTimeoutError(pattern, timeout_seconds)
|
||
assert f"{timeout_seconds}s" in str(exc)
|
||
assert "ReDoS" in str(exc)
|
||
assert pattern in str(exc)
|
||
|
||
def test_regex_timeout_error_attributes(self) -> None:
|
||
"""RegexTimeoutError should store pattern and timeout."""
|
||
pattern = r"(a+)+b"
|
||
timeout_seconds = 2
|
||
exc = RegexTimeoutError(pattern, timeout_seconds)
|
||
assert exc.pattern == pattern
|
||
assert exc.timeout_seconds == timeout_seconds
|
||
|
||
|
||
class TestReDoSDetection:
|
||
"""Tests for ReDoS pattern detection via regexploit."""
|
||
|
||
def test_redos_pattern_raises_error(self) -> None:
|
||
"""Known catastrophic backtracking patterns should raise ReDoSDetectedError."""
|
||
redos_patterns = [
|
||
r"(a+)+b",
|
||
r"([a-zA-Z]+)*d",
|
||
r"(x+)+y",
|
||
]
|
||
for pattern in redos_patterns:
|
||
with pytest.raises(ReDoSDetectedError, match="ReDoS pattern detected"):
|
||
validate_regex_pattern(pattern)
|
||
|
||
def test_redos_error_message_contains_reason(self) -> None:
|
||
"""ReDoSDetectedError should include the detection reason."""
|
||
pattern = r"(a+)+b"
|
||
from regexploit.ast.sre import SreOpParser
|
||
from regexploit.redos import find
|
||
parsed = SreOpParser().parse_sre(pattern, 0)
|
||
redos_obj = list(find(parsed))[0]
|
||
exc = ReDoSDetectedError(pattern, redos_obj)
|
||
assert "ReDoS pattern detected" in str(exc)
|
||
assert str(redos_obj.starriness) in str(exc) # starriness is included
|
||
|
||
def test_redos_error_attributes(self) -> None:
|
||
"""ReDoSDetectedError should store pattern and starriness."""
|
||
pattern = r"(x+)+y"
|
||
from regexploit.ast.sre import SreOpParser
|
||
from regexploit.redos import find
|
||
parsed = SreOpParser().parse_sre(pattern, 0)
|
||
redos_obj = list(find(parsed))[0]
|
||
exc = ReDoSDetectedError(pattern, redos_obj)
|
||
assert exc.pattern == pattern
|
||
assert exc.starriness == redos_obj.starriness
|
||
assert exc.reason is not None
|
||
|
||
def test_non_redos_complex_pattern_passes(self) -> None:
|
||
"""Complex but safe patterns should pass validation."""
|
||
safe_patterns = [
|
||
r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
||
r"^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$",
|
||
r"(?:foo|bar|baz)",
|
||
]
|
||
for pattern in safe_patterns:
|
||
validate_regex_pattern(pattern)
|
||
|
||
def test_redos_detection_before_timeout(self) -> None:
|
||
"""ReDoS detection should occur before timeout check."""
|
||
# This pattern is detected as ReDoS by regexploit
|
||
redos_pattern = r"(a+)+b"
|
||
with pytest.raises(ReDoSDetectedError):
|
||
validate_regex_pattern(redos_pattern)
|
||
|
||
|
||
class TestValidateRegexPatternEdgeCases:
|
||
"""Test edge cases and boundary conditions."""
|
||
|
||
def test_pattern_with_null_bytes(self) -> None:
|
||
"""Patterns with null bytes should still validate (if compilable)."""
|
||
# This may or may not work depending on Python version
|
||
# but shouldn't cause a crash
|
||
try:
|
||
validate_regex_pattern("a\x00b")
|
||
except (ValueError, re.error):
|
||
pass # Either error is acceptable
|
||
|
||
def test_pattern_with_unicode(self) -> None:
|
||
"""Patterns with Unicode characters should work."""
|
||
validate_regex_pattern(r"[α-ω]+")
|
||
validate_regex_pattern(r"café")
|
||
validate_regex_pattern(r"日本語")
|
||
|
||
def test_unicode_pattern_within_length_limit(self) -> None:
|
||
"""Unicode patterns should count by character, not bytes."""
|
||
# Create a long unicode pattern that's under character limit
|
||
unicode_pattern = "ア" * (MAX_REGEX_LENGTH - 1)
|
||
validate_regex_pattern(unicode_pattern)
|
||
|
||
# Exceeding character limit should fail
|
||
unicode_pattern_long = "ア" * (MAX_REGEX_LENGTH + 1)
|
||
with pytest.raises(ValueError, match="exceeds maximum length"):
|
||
validate_regex_pattern(unicode_pattern_long)
|
||
|
||
def test_pattern_repeated_at_boundary(self) -> None:
|
||
"""Pattern at exact boundary should work."""
|
||
boundary_pattern = "a" * MAX_REGEX_LENGTH
|
||
validate_regex_pattern(boundary_pattern)
|
||
|
||
just_over = "a" * (MAX_REGEX_LENGTH + 1)
|
||
with pytest.raises(ValueError):
|
||
validate_regex_pattern(just_over)
|