"""Tests for regex pattern validation with ReDoS protection.""" from __future__ import annotations import re import pytest from app.utils.regex_validator import ( MAX_REGEX_LENGTH, REGEX_COMPILE_TIMEOUT_SECONDS, RegexTimeoutError, validate_regex_pattern, ) class TestValidateRegexPattern: """Tests for validate_regex_pattern function.""" def test_valid_simple_pattern(self) -> None: """Valid simple patterns should compile without error.""" validate_regex_pattern(r"^[a-z]+$") validate_regex_pattern(r"\d{3}-\d{3}-\d{4}") validate_regex_pattern(r"[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}") def test_valid_complex_pattern(self) -> None: """Valid complex patterns should compile without error.""" validate_regex_pattern(r"^(?:[0-9]{1,3}\.){3}[0-9]{1,3}$") validate_regex_pattern(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$") validate_regex_pattern(r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$") def test_pattern_exceeds_max_length(self) -> None: """Patterns exceeding MAX_REGEX_LENGTH should raise ValueError.""" # Create pattern exactly at max length (should work) max_pattern = "a" * MAX_REGEX_LENGTH validate_regex_pattern(max_pattern) # Create pattern exceeding max length too_long = "a" * (MAX_REGEX_LENGTH + 1) with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(too_long) def test_pattern_far_exceeds_max_length(self) -> None: """Patterns far exceeding max length should raise ValueError.""" too_long = "a" * (MAX_REGEX_LENGTH * 2) with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(too_long) def test_invalid_syntax_raises_error(self) -> None: """Patterns with invalid syntax should raise re.error.""" with pytest.raises(re.error): validate_regex_pattern(r"[unclosed") with pytest.raises(re.error): validate_regex_pattern(r"(?P None: """Empty patterns should compile without error.""" validate_regex_pattern("") def test_special_characters_allowed(self) -> None: """Patterns with special regex characters should work.""" validate_regex_pattern(r"\b\w+\b") validate_regex_pattern(r"(?:foo|bar|baz)") validate_regex_pattern(r"(?P\w+)") def test_pattern_length_validation_is_first(self) -> None: """Length validation should happen before compilation (faster).""" # Create a pattern that is too long and also invalid # Should raise ValueError for length, not re.error for syntax too_long_and_invalid = "a" * (MAX_REGEX_LENGTH + 1) + r"[unclosed" with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(too_long_and_invalid) def test_common_fail2ban_patterns(self) -> None: """Common fail2ban regex patterns should validate correctly.""" # These are real patterns from fail2ban filters common_patterns = [ r"^%(__prefix_line)s(?:error|warn).*Failed (?:password|publickey) for invalid user (?:[^ ]+) from ", r"^%(__prefix_line)s(?:error|warn).*Connection (?:closed|reset) by (?:authenticating )?user .* ", r"^%(__prefix_line)s(?:error|warn).*Invalid user (?:[^ ]+) from ", r"^%(__prefix_line)s(?:error|warn).*Received disconnect from ", ] for pattern in common_patterns: validate_regex_pattern(pattern) def test_pattern_with_lookahead_lookbehind(self) -> None: """Patterns with lookahead/lookbehind should work.""" validate_regex_pattern(r"(? None: """RegexTimeoutError should have a descriptive message.""" pattern = r"(a+)+b" timeout_seconds = 2 exc = RegexTimeoutError(pattern, timeout_seconds) assert f"{timeout_seconds}s" in str(exc) assert "ReDoS" in str(exc) assert pattern in str(exc) def test_regex_timeout_error_attributes(self) -> None: """RegexTimeoutError should store pattern and timeout.""" pattern = r"(a+)+b" timeout_seconds = 2 exc = RegexTimeoutError(pattern, timeout_seconds) assert exc.pattern == pattern assert exc.timeout_seconds == timeout_seconds class TestValidateRegexPatternEdgeCases: """Test edge cases and boundary conditions.""" def test_pattern_with_null_bytes(self) -> None: """Patterns with null bytes should still validate (if compilable).""" # This may or may not work depending on Python version # but shouldn't cause a crash try: validate_regex_pattern("a\x00b") except (ValueError, re.error): pass # Either error is acceptable def test_pattern_with_unicode(self) -> None: """Patterns with Unicode characters should work.""" validate_regex_pattern(r"[α-ω]+") validate_regex_pattern(r"café") validate_regex_pattern(r"日本語") def test_unicode_pattern_within_length_limit(self) -> None: """Unicode patterns should count by character, not bytes.""" # Create a long unicode pattern that's under character limit unicode_pattern = "ア" * (MAX_REGEX_LENGTH - 1) validate_regex_pattern(unicode_pattern) # Exceeding character limit should fail unicode_pattern_long = "ア" * (MAX_REGEX_LENGTH + 1) with pytest.raises(ValueError, match="exceeds maximum length"): validate_regex_pattern(unicode_pattern_long) def test_pattern_repeated_at_boundary(self) -> None: """Pattern at exact boundary should work.""" boundary_pattern = "a" * MAX_REGEX_LENGTH validate_regex_pattern(boundary_pattern) just_over = "a" * (MAX_REGEX_LENGTH + 1) with pytest.raises(ValueError): validate_regex_pattern(just_over)