fix(regex_validator): add ReDoS detection via regexploit
Detect catastrophic backtracking patterns before regex compilation using regexploit library. Add ReDoSDetectedError exception and _MINIMUM_STARRINESS threshold (>=3) to catch dangerous patterns like (a+)+b. Update pyproject.toml deps, add tests for detection.
This commit is contained in:
@@ -5,7 +5,7 @@ Request, response, and domain models used by the ban router and service.
|
||||
|
||||
from typing import Literal
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, field_validator
|
||||
|
||||
from app.models.response import BanGuiBaseModel, CollectionResponse, PaginatedListResponse
|
||||
|
||||
@@ -67,6 +67,18 @@ class Ban(BanGuiBaseModel):
|
||||
description="Whether this ban came from a blocklist import or fail2ban itself.",
|
||||
)
|
||||
|
||||
@field_validator("country")
|
||||
@classmethod
|
||||
def _normalize_empty_country(cls, v: str | None) -> str | None:
|
||||
"""Coerce empty strings to None for country.
|
||||
|
||||
Geo enrichment may produce an empty string instead of None for
|
||||
unresolved IPs, which breaks frontend truthiness checks.
|
||||
"""
|
||||
if v == "":
|
||||
return None
|
||||
return v
|
||||
|
||||
class BanResponse(BanGuiBaseModel):
|
||||
"""Response containing a single ban record."""
|
||||
|
||||
@@ -97,6 +109,18 @@ class ActiveBan(BanGuiBaseModel):
|
||||
ban_count: int = Field(default=1, ge=1, description="Running ban count for this IP.")
|
||||
country: str | None = Field(default=None, description="ISO 3166-1 alpha-2 country code.")
|
||||
|
||||
@field_validator("country")
|
||||
@classmethod
|
||||
def _normalize_empty_country(cls, v: str | None) -> str | None:
|
||||
"""Coerce empty strings to None for country.
|
||||
|
||||
Geo enrichment may produce an empty string instead of None for
|
||||
unresolved IPs, which breaks frontend truthiness checks.
|
||||
"""
|
||||
if v == "":
|
||||
return None
|
||||
return v
|
||||
|
||||
class ActiveBanListResponse(CollectionResponse[ActiveBan]):
|
||||
"""List of all currently active bans across all jails.
|
||||
|
||||
@@ -154,6 +178,20 @@ class DashboardBanItem(BanGuiBaseModel):
|
||||
description="Whether this ban came from a blocklist import or fail2ban itself.",
|
||||
)
|
||||
|
||||
@field_validator("country_code")
|
||||
@classmethod
|
||||
def _normalize_empty_country_code(cls, v: str | None) -> str | None:
|
||||
"""Coerce empty strings to None for country_code.
|
||||
|
||||
The geo enrichment layer may produce an empty string instead of None
|
||||
for unresolved IPs. Frontend type narrowing uses truthiness, so an
|
||||
empty string would slip through ``if (ban.country_code)`` checks and
|
||||
appear as a falsy-but-not-null value — breaking UI rendering.
|
||||
"""
|
||||
if v == "":
|
||||
return None
|
||||
return v
|
||||
|
||||
class DashboardBanListResponse(PaginatedListResponse[DashboardBanItem]):
|
||||
"""Paginated dashboard ban-list response.
|
||||
|
||||
|
||||
@@ -12,6 +12,8 @@ from contextlib import contextmanager
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import structlog
|
||||
from regexploit.ast.sre import SreOpParser
|
||||
from regexploit.redos import Redos, find
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Generator
|
||||
@@ -22,6 +24,10 @@ logger = structlog.get_logger()
|
||||
MAX_REGEX_LENGTH = 1000
|
||||
REGEX_COMPILE_TIMEOUT_SECONDS = 2
|
||||
|
||||
# Minimum starriness threshold for flagging as ReDoS
|
||||
# Higher values = more severe/numerous nested quantifiers
|
||||
_MINIMUM_STARRINESS = 3
|
||||
|
||||
|
||||
class RegexTimeoutError(Exception):
|
||||
"""Raised when regex compilation exceeds the timeout limit."""
|
||||
@@ -41,25 +47,67 @@ class RegexTimeoutError(Exception):
|
||||
)
|
||||
|
||||
|
||||
class ReDoSDetectedError(Exception):
|
||||
"""Raised when a regex pattern is detected to have catastrophic backtracking."""
|
||||
|
||||
def __init__(self, pattern: str, redos: Redos) -> None:
|
||||
"""Initialize with the pattern and detection reason.
|
||||
|
||||
Args:
|
||||
pattern: The regex pattern that was detected as dangerous.
|
||||
redos: The Redos object containing details about the vulnerability.
|
||||
"""
|
||||
self.pattern = pattern
|
||||
self.starriness = redos.starriness
|
||||
self.reason = redos.example()
|
||||
super().__init__(
|
||||
f"ReDoS pattern detected (starriness={redos.starriness}): {self.reason}"
|
||||
)
|
||||
|
||||
|
||||
def _check_redos(pattern: str) -> Redos | None:
|
||||
"""Check if a pattern has catastrophic backtracking.
|
||||
|
||||
Args:
|
||||
pattern: The regex pattern string to check.
|
||||
|
||||
Returns:
|
||||
A Redos object if vulnerability detected, None otherwise.
|
||||
"""
|
||||
try:
|
||||
parsed = SreOpParser().parse_sre(pattern, 0)
|
||||
except re.error:
|
||||
# Invalid regex - will be caught by re.compile() later
|
||||
return None
|
||||
|
||||
redos_list = find(parsed)
|
||||
for redos in redos_list:
|
||||
if redos.starriness >= _MINIMUM_STARRINESS:
|
||||
return redos
|
||||
return None
|
||||
|
||||
|
||||
def validate_regex_pattern(pattern: str) -> None:
|
||||
"""Validate a regex pattern with length and timeout checks.
|
||||
"""Validate a regex pattern with length and ReDoS checks.
|
||||
|
||||
Validates a regex pattern by:
|
||||
1. Checking length does not exceed MAX_REGEX_LENGTH characters
|
||||
2. Attempting compilation with a timeout to prevent ReDoS attacks
|
||||
2. Checking for known catastrophic backtracking patterns (ReDoS)
|
||||
3. Attempting compilation with a timeout to prevent ReDoS attacks
|
||||
|
||||
Args:
|
||||
pattern: The regex pattern string to validate.
|
||||
|
||||
Raises:
|
||||
ValueError: If the pattern exceeds maximum length.
|
||||
ReDoSDetectedError: If the pattern is detected as a ReDoS vulnerability.
|
||||
RegexTimeoutError: If compilation exceeds the timeout.
|
||||
re.error: If the pattern is syntactically invalid.
|
||||
|
||||
Example:
|
||||
>>> validate_regex_pattern(r'^[a-z]+$') # OK
|
||||
>>> validate_regex_pattern('a' * 1001) # Raises ValueError
|
||||
>>> validate_regex_pattern(r'(a+)+b') # May raise RegexTimeoutError
|
||||
>>> validate_regex_pattern(r'(a+)+b') # Raises ReDoSDetectedError
|
||||
"""
|
||||
# Check length first (fast, no timeout needed)
|
||||
if len(pattern) > MAX_REGEX_LENGTH:
|
||||
@@ -67,6 +115,16 @@ def validate_regex_pattern(pattern: str) -> None:
|
||||
logger.warning("regex_validation_length_exceeded", max_length=MAX_REGEX_LENGTH, actual_length=len(pattern))
|
||||
raise ValueError(msg)
|
||||
|
||||
# Check for ReDoS patterns before compilation
|
||||
redos = _check_redos(pattern)
|
||||
if redos is not None:
|
||||
logger.warning(
|
||||
"regex_redos_detected",
|
||||
starriness=redos.starriness,
|
||||
pattern_preview=pattern[:100],
|
||||
)
|
||||
raise ReDoSDetectedError(pattern, redos)
|
||||
|
||||
# Attempt compilation with timeout
|
||||
try:
|
||||
with _timeout_context(REGEX_COMPILE_TIMEOUT_SECONDS):
|
||||
|
||||
Reference in New Issue
Block a user