fix(regex_validator): add ReDoS detection via regexploit

Detect catastrophic backtracking patterns before regex compilation
using regexploit library. Add ReDoSDetectedError exception and
_MINIMUM_STARRINESS threshold (>=3) to catch dangerous patterns
like (a+)+b. Update pyproject.toml deps, add tests for detection.
This commit is contained in:
2026-05-03 00:05:33 +02:00
parent e436727942
commit 0817a4cb47
5 changed files with 290 additions and 8 deletions

View File

@@ -12,6 +12,8 @@ from contextlib import contextmanager
from typing import TYPE_CHECKING
import structlog
from regexploit.ast.sre import SreOpParser
from regexploit.redos import Redos, find
if TYPE_CHECKING:
from collections.abc import Generator
@@ -22,6 +24,10 @@ logger = structlog.get_logger()
MAX_REGEX_LENGTH = 1000
REGEX_COMPILE_TIMEOUT_SECONDS = 2
# Minimum starriness threshold for flagging as ReDoS
# Higher values = more severe/numerous nested quantifiers
_MINIMUM_STARRINESS = 3
class RegexTimeoutError(Exception):
"""Raised when regex compilation exceeds the timeout limit."""
@@ -41,25 +47,67 @@ class RegexTimeoutError(Exception):
)
class ReDoSDetectedError(Exception):
"""Raised when a regex pattern is detected to have catastrophic backtracking."""
def __init__(self, pattern: str, redos: Redos) -> None:
"""Initialize with the pattern and detection reason.
Args:
pattern: The regex pattern that was detected as dangerous.
redos: The Redos object containing details about the vulnerability.
"""
self.pattern = pattern
self.starriness = redos.starriness
self.reason = redos.example()
super().__init__(
f"ReDoS pattern detected (starriness={redos.starriness}): {self.reason}"
)
def _check_redos(pattern: str) -> Redos | None:
"""Check if a pattern has catastrophic backtracking.
Args:
pattern: The regex pattern string to check.
Returns:
A Redos object if vulnerability detected, None otherwise.
"""
try:
parsed = SreOpParser().parse_sre(pattern, 0)
except re.error:
# Invalid regex - will be caught by re.compile() later
return None
redos_list = find(parsed)
for redos in redos_list:
if redos.starriness >= _MINIMUM_STARRINESS:
return redos
return None
def validate_regex_pattern(pattern: str) -> None:
"""Validate a regex pattern with length and timeout checks.
"""Validate a regex pattern with length and ReDoS checks.
Validates a regex pattern by:
1. Checking length does not exceed MAX_REGEX_LENGTH characters
2. Attempting compilation with a timeout to prevent ReDoS attacks
2. Checking for known catastrophic backtracking patterns (ReDoS)
3. Attempting compilation with a timeout to prevent ReDoS attacks
Args:
pattern: The regex pattern string to validate.
Raises:
ValueError: If the pattern exceeds maximum length.
ReDoSDetectedError: If the pattern is detected as a ReDoS vulnerability.
RegexTimeoutError: If compilation exceeds the timeout.
re.error: If the pattern is syntactically invalid.
Example:
>>> validate_regex_pattern(r'^[a-z]+$') # OK
>>> validate_regex_pattern('a' * 1001) # Raises ValueError
>>> validate_regex_pattern(r'(a+)+b') # May raise RegexTimeoutError
>>> validate_regex_pattern(r'(a+)+b') # Raises ReDoSDetectedError
"""
# Check length first (fast, no timeout needed)
if len(pattern) > MAX_REGEX_LENGTH:
@@ -67,6 +115,16 @@ def validate_regex_pattern(pattern: str) -> None:
logger.warning("regex_validation_length_exceeded", max_length=MAX_REGEX_LENGTH, actual_length=len(pattern))
raise ValueError(msg)
# Check for ReDoS patterns before compilation
redos = _check_redos(pattern)
if redos is not None:
logger.warning(
"regex_redos_detected",
starriness=redos.starriness,
pattern_preview=pattern[:100],
)
raise ReDoSDetectedError(pattern, redos)
# Attempt compilation with timeout
try:
with _timeout_context(REGEX_COMPILE_TIMEOUT_SECONDS):