Files
BanGUI/backend/tests/test_utils/test_regex_validator.py
Lukas 0817a4cb47 fix(regex_validator): add ReDoS detection via regexploit
Detect catastrophic backtracking patterns before regex compilation
using regexploit library. Add ReDoSDetectedError exception and
_MINIMUM_STARRINESS threshold (>=3) to catch dangerous patterns
like (a+)+b. Update pyproject.toml deps, add tests for detection.
2026-05-03 00:05:33 +02:00

211 lines
8.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for regex pattern validation with ReDoS protection."""
from __future__ import annotations
import re
import pytest
from app.utils.regex_validator import (
MAX_REGEX_LENGTH,
ReDoSDetectedError,
RegexTimeoutError,
validate_regex_pattern,
)
class TestValidateRegexPattern:
"""Tests for validate_regex_pattern function."""
def test_valid_simple_pattern(self) -> None:
"""Valid simple patterns should compile without error."""
validate_regex_pattern(r"^[a-z]+$")
validate_regex_pattern(r"\d{3}-\d{3}-\d{4}")
validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}")
def test_valid_complex_pattern(self) -> None:
"""Valid complex patterns should compile without error."""
validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$")
validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$")
def test_pattern_exceeds_max_length(self) -> None:
"""Patterns exceeding MAX_REGEX_LENGTH should raise ValueError."""
# Create pattern exactly at max length (should work)
max_pattern = "a" * MAX_REGEX_LENGTH
validate_regex_pattern(max_pattern)
# Create pattern exceeding max length
too_long = "a" * (MAX_REGEX_LENGTH + 1)
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(too_long)
def test_pattern_far_exceeds_max_length(self) -> None:
"""Patterns far exceeding max length should raise ValueError."""
too_long = "a" * (MAX_REGEX_LENGTH * 2)
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(too_long)
def test_invalid_syntax_raises_error(self) -> None:
"""Patterns with invalid syntax should raise re.error."""
with pytest.raises(re.error):
validate_regex_pattern(r"[unclosed")
with pytest.raises(re.error):
validate_regex_pattern(r"(?P<incomplete")
with pytest.raises(re.error):
validate_regex_pattern(r"(unclosed")
def test_empty_pattern(self) -> None:
"""Empty patterns should compile without error."""
validate_regex_pattern("")
def test_special_characters_allowed(self) -> None:
"""Patterns with special regex characters should work."""
validate_regex_pattern(r"\b\w+\b")
validate_regex_pattern(r"(?:foo|bar|baz)")
validate_regex_pattern(r"(?P<name>\w+)")
def test_pattern_length_validation_is_first(self) -> None:
"""Length validation should happen before compilation (faster)."""
# Create a pattern that is too long and also invalid
# Should raise ValueError for length, not re.error for syntax
too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed"
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(too_long_and_invalid)
def test_common_fail2ban_patterns(self) -> None:
"""Common fail2ban regex patterns should validate correctly."""
# These are real patterns from fail2ban filters
common_patterns = [
r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from <HOST>",
r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* <HOST>",
r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from <HOST>",
r"^%(__prefix_line)s(?:error|warn).*Received disconnect from <HOST>",
]
for pattern in common_patterns:
validate_regex_pattern(pattern)
def test_pattern_with_lookahead_lookbehind(self) -> None:
"""Patterns with lookahead/lookbehind should work."""
validate_regex_pattern(r"(?<!abc)def") # Negative lookbehind
validate_regex_pattern(r"abc(?!def)") # Negative lookahead
validate_regex_pattern(r"(?<=abc)def") # Positive lookbehind
validate_regex_pattern(r"abc(?=def)") # Positive lookahead
class TestRegexTimeoutError:
"""Tests for RegexTimeoutError exception class."""
def test_regex_timeout_error_message(self) -> None:
"""RegexTimeoutError should have a descriptive message."""
pattern = r"(a+)+b"
timeout_seconds = 2
exc = RegexTimeoutError(pattern, timeout_seconds)
assert f"{timeout_seconds}s" in str(exc)
assert "ReDoS" in str(exc)
assert pattern in str(exc)
def test_regex_timeout_error_attributes(self) -> None:
"""RegexTimeoutError should store pattern and timeout."""
pattern = r"(a+)+b"
timeout_seconds = 2
exc = RegexTimeoutError(pattern, timeout_seconds)
assert exc.pattern == pattern
assert exc.timeout_seconds == timeout_seconds
class TestReDoSDetection:
"""Tests for ReDoS pattern detection via regexploit."""
def test_redos_pattern_raises_error(self) -> None:
"""Known catastrophic backtracking patterns should raise ReDoSDetectedError."""
redos_patterns = [
r"(a+)+b",
r"([a-zA-Z]+)*d",
r"(x+)+y",
]
for pattern in redos_patterns:
with pytest.raises(ReDoSDetectedError, match="ReDoS pattern detected"):
validate_regex_pattern(pattern)
def test_redos_error_message_contains_reason(self) -> None:
"""ReDoSDetectedError should include the detection reason."""
pattern = r"(a+)+b"
from regexploit.ast.sre import SreOpParser
from regexploit.redos import find
parsed = SreOpParser().parse_sre(pattern, 0)
redos_obj = list(find(parsed))[0]
exc = ReDoSDetectedError(pattern, redos_obj)
assert "ReDoS pattern detected" in str(exc)
assert str(redos_obj.starriness) in str(exc) # starriness is included
def test_redos_error_attributes(self) -> None:
"""ReDoSDetectedError should store pattern and starriness."""
pattern = r"(x+)+y"
from regexploit.ast.sre import SreOpParser
from regexploit.redos import find
parsed = SreOpParser().parse_sre(pattern, 0)
redos_obj = list(find(parsed))[0]
exc = ReDoSDetectedError(pattern, redos_obj)
assert exc.pattern == pattern
assert exc.starriness == redos_obj.starriness
assert exc.reason is not None
def test_non_redos_complex_pattern_passes(self) -> None:
"""Complex but safe patterns should pass validation."""
safe_patterns = [
r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
r"^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$",
r"(?:foo|bar|baz)",
]
for pattern in safe_patterns:
validate_regex_pattern(pattern)
def test_redos_detection_before_timeout(self) -> None:
"""ReDoS detection should occur before timeout check."""
# This pattern is detected as ReDoS by regexploit
redos_pattern = r"(a+)+b"
with pytest.raises(ReDoSDetectedError):
validate_regex_pattern(redos_pattern)
class TestValidateRegexPatternEdgeCases:
"""Test edge cases and boundary conditions."""
def test_pattern_with_null_bytes(self) -> None:
"""Patterns with null bytes should still validate (if compilable)."""
# This may or may not work depending on Python version
# but shouldn't cause a crash
try:
validate_regex_pattern("a\x00b")
except (ValueError, re.error):
pass # Either error is acceptable
def test_pattern_with_unicode(self) -> None:
"""Patterns with Unicode characters should work."""
validate_regex_pattern(r"[α-ω]+")
validate_regex_pattern(r"café")
validate_regex_pattern(r"日本語")
def test_unicode_pattern_within_length_limit(self) -> None:
"""Unicode patterns should count by character, not bytes."""
# Create a long unicode pattern that's under character limit
unicode_pattern = "" * (MAX_REGEX_LENGTH - 1)
validate_regex_pattern(unicode_pattern)
# Exceeding character limit should fail
unicode_pattern_long = "" * (MAX_REGEX_LENGTH + 1)
with pytest.raises(ValueError, match="exceeds maximum length"):
validate_regex_pattern(unicode_pattern_long)
def test_pattern_repeated_at_boundary(self) -> None:
"""Pattern at exact boundary should work."""
boundary_pattern = "a" * MAX_REGEX_LENGTH
validate_regex_pattern(boundary_pattern)
just_over = "a" * (MAX_REGEX_LENGTH + 1)
with pytest.raises(ValueError):
validate_regex_pattern(just_over)