"""Tests for regex pattern validation with ReDoS protection.""" from __future__ import annotations import re import pytest from app.utils.regex_validator import ( MAX_REGEX_LENGTH, ReDoSDetectedError, RegexTimeoutError, validate_regex_pattern, ) class TestValidateRegexPattern: """Tests for validate_regex_pattern function.""" def test_valid_simple_pattern(self) -> None: """Valid simple patterns should compile without error.""" validate_regex_pattern(r"^[a-z]+$") validate_regex_pattern(r"\d{3}-\d{3}-\d{4}") validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}") def test_valid_complex_pattern(self) -> None: """Valid complex patterns should compile without error.""" validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$") validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$") def test_pattern_exceeds_max_length(self) -> None: """Patterns exceeding MAX_REGEX_LENGTH should raise ValueError.""" # Create pattern exactly at max length (should work) max_pattern = "a" * MAX_REGEX_LENGTH validate_regex_pattern(max_pattern) # Create pattern exceeding max length too_long = "a" * (MAX_REGEX_LENGTH + 1) with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(too_long) def test_pattern_far_exceeds_max_length(self) -> None: """Patterns far exceeding max length should raise ValueError.""" too_long = "a" * (MAX_REGEX_LENGTH * 2) with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(too_long) def test_invalid_syntax_raises_error(self) -> None: """Patterns with invalid syntax should raise re.error.""" with pytest.raises(re.error): validate_regex_pattern(r"[unclosed") with pytest.raises(re.error): validate_regex_pattern(r"(?P None: """Empty patterns should compile without error.""" validate_regex_pattern("") def test_special_characters_allowed(self) -> None: """Patterns with special regex characters should work.""" validate_regex_pattern(r"\b\w+\b") validate_regex_pattern(r"(?:foo|bar|baz)") validate_regex_pattern(r"(?P\w+)") def test_pattern_length_validation_is_first(self) -> None: """Length validation should happen before compilation (faster).""" # Create a pattern that is too long and also invalid # Should raise ValueError for length, not re.error for syntax too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed" with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(too_long_and_invalid) def test_common_fail2ban_patterns(self) -> None: """Common fail2ban regex patterns should validate correctly.""" # These are real patterns from fail2ban filters common_patterns = [ r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from ", r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* ", r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from ", r"^%(__prefix_line)s(?:error|warn).*Received disconnect from ", ] for pattern in common_patterns: validate_regex_pattern(pattern) def test_pattern_with_lookahead_lookbehind(self) -> None: """Patterns with lookahead/lookbehind should work.""" validate_regex_pattern(r"(? None: """RegexTimeoutError should have a descriptive message.""" pattern = r"(a+)+b" timeout_seconds = 2 exc = RegexTimeoutError(pattern, timeout_seconds) assert f"{timeout_seconds}s" in str(exc) assert "ReDoS" in str(exc) assert pattern in str(exc) def test_regex_timeout_error_attributes(self) -> None: """RegexTimeoutError should store pattern and timeout.""" pattern = r"(a+)+b" timeout_seconds = 2 exc = RegexTimeoutError(pattern, timeout_seconds) assert exc.pattern == pattern assert exc.timeout_seconds == timeout_seconds class TestReDoSDetection: """Tests for ReDoS pattern detection via regexploit.""" def test_redos_pattern_raises_error(self) -> None: """Known catastrophic backtracking patterns should raise ReDoSDetectedError.""" redos_patterns = [ r"(a+)+b", r"([a-zA-Z]+)*d", r"(x+)+y", ] for pattern in redos_patterns: with pytest.raises(ReDoSDetectedError, match="ReDoS pattern detected"): validate_regex_pattern(pattern) def test_redos_error_message_contains_reason(self) -> None: """ReDoSDetectedError should include the detection reason.""" pattern = r"(a+)+b" from regexploit.ast.sre import SreOpParser from regexploit.redos import find parsed = SreOpParser().parse_sre(pattern, 0) redos_obj = list(find(parsed))[0] exc = ReDoSDetectedError(pattern, redos_obj) assert "ReDoS pattern detected" in str(exc) assert str(redos_obj.starriness) in str(exc) # starriness is included def test_redos_error_attributes(self) -> None: """ReDoSDetectedError should store pattern and starriness.""" pattern = r"(x+)+y" from regexploit.ast.sre import SreOpParser from regexploit.redos import find parsed = SreOpParser().parse_sre(pattern, 0) redos_obj = list(find(parsed))[0] exc = ReDoSDetectedError(pattern, redos_obj) assert exc.pattern == pattern assert exc.starriness == redos_obj.starriness assert exc.reason is not None def test_non_redos_complex_pattern_passes(self) -> None: """Complex but safe patterns should pass validation.""" safe_patterns = [ r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", r"^(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$", r"(?:foo|bar|baz)", ] for pattern in safe_patterns: validate_regex_pattern(pattern) def test_redos_detection_before_timeout(self) -> None: """ReDoS detection should occur before timeout check.""" # This pattern is detected as ReDoS by regexploit redos_pattern = r"(a+)+b" with pytest.raises(ReDoSDetectedError): validate_regex_pattern(redos_pattern) class TestValidateRegexPatternEdgeCases: """Test edge cases and boundary conditions.""" def test_pattern_with_null_bytes(self) -> None: """Patterns with null bytes should still validate (if compilable).""" # This may or may not work depending on Python version # but shouldn't cause a crash try: validate_regex_pattern("a\x00b") except (ValueError, re.error): pass # Either error is acceptable def test_pattern_with_unicode(self) -> None: """Patterns with Unicode characters should work.""" validate_regex_pattern(r"[α-ω]+") validate_regex_pattern(r"café") validate_regex_pattern(r"日本語") def test_unicode_pattern_within_length_limit(self) -> None: """Unicode patterns should count by character, not bytes.""" # Create a long unicode pattern that's under character limit unicode_pattern = "ア" * (MAX_REGEX_LENGTH - 1) validate_regex_pattern(unicode_pattern) # Exceeding character limit should fail unicode_pattern_long = "ア" * (MAX_REGEX_LENGTH + 1) with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(unicode_pattern_long) def test_pattern_repeated_at_boundary(self) -> None: """Pattern at exact boundary should work.""" boundary_pattern = "a" * MAX_REGEX_LENGTH validate_regex_pattern(boundary_pattern) just_over = "a" * (MAX_REGEX_LENGTH + 1) with pytest.raises(ValueError): validate_regex_pattern(just_over)