fix(regex_validator): add ReDoS detection via regexploit

Detect catastrophic backtracking patterns before regex compilation
using regexploit library. Add ReDoSDetectedError exception and
_MINIMUM_STARRINESS threshold (>=3) to catch dangerous patterns
like (a+)+b. Update pyproject.toml deps, add tests for detection.
This commit is contained in:
2026-05-03 00:05:33 +02:00
parent e436727942
commit 0817a4cb47
5 changed files with 290 additions and 8 deletions

View File

@@ -8,7 +8,7 @@ import pytest
from app.utils.regex_validator import (
MAX_REGEX_LENGTH,
REGEX_COMPILE_TIMEOUT_SECONDS,
ReDoSDetectedError,
RegexTimeoutError,
validate_regex_pattern,
)
@@ -116,6 +116,61 @@ class TestRegexTimeoutError:
assert exc.timeout_seconds == timeout_seconds
class TestReDoSDetection:
"""Tests for ReDoS pattern detection via regexploit."""
def test_redos_pattern_raises_error(self) -> None:
"""Known catastrophic backtracking patterns should raise ReDoSDetectedError."""
redos_patterns = [
r"(a+)+b",
r"([a-zA-Z]+)*d",
r"(x+)+y",
]
for pattern in redos_patterns:
with pytest.raises(ReDoSDetectedError, match="ReDoS pattern detected"):
validate_regex_pattern(pattern)
def test_redos_error_message_contains_reason(self) -> None:
"""ReDoSDetectedError should include the detection reason."""
pattern = r"(a+)+b"
from regexploit.ast.sre import SreOpParser
from regexploit.redos import find
parsed = SreOpParser().parse_sre(pattern, 0)
redos_obj = list(find(parsed))[0]
exc = ReDoSDetectedError(pattern, redos_obj)
assert "ReDoS pattern detected" in str(exc)
assert str(redos_obj.starriness) in str(exc) # starriness is included
def test_redos_error_attributes(self) -> None:
"""ReDoSDetectedError should store pattern and starriness."""
pattern = r"(x+)+y"
from regexploit.ast.sre import SreOpParser
from regexploit.redos import find
parsed = SreOpParser().parse_sre(pattern, 0)
redos_obj = list(find(parsed))[0]
exc = ReDoSDetectedError(pattern, redos_obj)
assert exc.pattern == pattern
assert exc.starriness == redos_obj.starriness
assert exc.reason is not None
def test_non_redos_complex_pattern_passes(self) -> None:
"""Complex but safe patterns should pass validation."""
safe_patterns = [
r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
r"^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$",
r"(?:foo|bar|baz)",
]
for pattern in safe_patterns:
validate_regex_pattern(pattern)
def test_redos_detection_before_timeout(self) -> None:
"""ReDoS detection should occur before timeout check."""
# This pattern is detected as ReDoS by regexploit
redos_pattern = r"(a+)+b"
with pytest.raises(ReDoSDetectedError):
validate_regex_pattern(redos_pattern)
class TestValidateRegexPatternEdgeCases:
"""Test edge cases and boundary conditions."""