fix(regex_validator): add ReDoS detection via regexploit
Detect catastrophic backtracking patterns before regex compilation using regexploit library. Add ReDoSDetectedError exception and _MINIMUM_STARRINESS threshold (>=3) to catch dangerous patterns like (a+)+b. Update pyproject.toml deps, add tests for detection.
This commit is contained in:
@@ -8,7 +8,7 @@ import pytest
|
||||
|
||||
from app.utils.regex_validator import (
|
||||
MAX_REGEX_LENGTH,
|
||||
REGEX_COMPILE_TIMEOUT_SECONDS,
|
||||
ReDoSDetectedError,
|
||||
RegexTimeoutError,
|
||||
validate_regex_pattern,
|
||||
)
|
||||
@@ -116,6 +116,61 @@ class TestRegexTimeoutError:
|
||||
assert exc.timeout_seconds == timeout_seconds
|
||||
|
||||
|
||||
class TestReDoSDetection:
|
||||
"""Tests for ReDoS pattern detection via regexploit."""
|
||||
|
||||
def test_redos_pattern_raises_error(self) -> None:
|
||||
"""Known catastrophic backtracking patterns should raise ReDoSDetectedError."""
|
||||
redos_patterns = [
|
||||
r"(a+)+b",
|
||||
r"([a-zA-Z]+)*d",
|
||||
r"(x+)+y",
|
||||
]
|
||||
for pattern in redos_patterns:
|
||||
with pytest.raises(ReDoSDetectedError, match="ReDoS pattern detected"):
|
||||
validate_regex_pattern(pattern)
|
||||
|
||||
def test_redos_error_message_contains_reason(self) -> None:
|
||||
"""ReDoSDetectedError should include the detection reason."""
|
||||
pattern = r"(a+)+b"
|
||||
from regexploit.ast.sre import SreOpParser
|
||||
from regexploit.redos import find
|
||||
parsed = SreOpParser().parse_sre(pattern, 0)
|
||||
redos_obj = list(find(parsed))[0]
|
||||
exc = ReDoSDetectedError(pattern, redos_obj)
|
||||
assert "ReDoS pattern detected" in str(exc)
|
||||
assert str(redos_obj.starriness) in str(exc) # starriness is included
|
||||
|
||||
def test_redos_error_attributes(self) -> None:
|
||||
"""ReDoSDetectedError should store pattern and starriness."""
|
||||
pattern = r"(x+)+y"
|
||||
from regexploit.ast.sre import SreOpParser
|
||||
from regexploit.redos import find
|
||||
parsed = SreOpParser().parse_sre(pattern, 0)
|
||||
redos_obj = list(find(parsed))[0]
|
||||
exc = ReDoSDetectedError(pattern, redos_obj)
|
||||
assert exc.pattern == pattern
|
||||
assert exc.starriness == redos_obj.starriness
|
||||
assert exc.reason is not None
|
||||
|
||||
def test_non_redos_complex_pattern_passes(self) -> None:
|
||||
"""Complex but safe patterns should pass validation."""
|
||||
safe_patterns = [
|
||||
r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
||||
r"^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$",
|
||||
r"(?:foo|bar|baz)",
|
||||
]
|
||||
for pattern in safe_patterns:
|
||||
validate_regex_pattern(pattern)
|
||||
|
||||
def test_redos_detection_before_timeout(self) -> None:
|
||||
"""ReDoS detection should occur before timeout check."""
|
||||
# This pattern is detected as ReDoS by regexploit
|
||||
redos_pattern = r"(a+)+b"
|
||||
with pytest.raises(ReDoSDetectedError):
|
||||
validate_regex_pattern(redos_pattern)
|
||||
|
||||
|
||||
class TestValidateRegexPatternEdgeCases:
|
||||
"""Test edge cases and boundary conditions."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user