- Add Observability.md documentation - Standardize task logging with correlation_id support - Add log_sanitizer utility for PII masking - Update Tasks.md tracking - Update geo_cache tasks and other task modules with correlation_id Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
150 lines
6.0 KiB
Python
150 lines
6.0 KiB
Python
"""Tests for app.utils.log_sanitizer."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from app.utils.log_sanitizer import sanitize_for_logging
|
|
|
|
|
|
class TestSanitizeForLogging:
|
|
"""Tests for sanitize_for_logging()."""
|
|
|
|
def test_passthrough_clean_text(self) -> None:
|
|
"""No sensitive patterns: text unchanged."""
|
|
text = "Server started on port 8000"
|
|
assert sanitize_for_logging(text) == text
|
|
|
|
def test_password_eq(self) -> None:
|
|
"""password=X replaced."""
|
|
assert sanitize_for_logging("password=Secret123") == "password=***"
|
|
|
|
def test_password_colon(self) -> None:
|
|
"""password:X replaced."""
|
|
assert sanitize_for_logging("password:Secret123") == "password=***"
|
|
|
|
def test_password_case_insensitive(self) -> None:
|
|
"""Password matching is case-insensitive."""
|
|
assert sanitize_for_logging("PASSWORD=Secret123") == "password=***"
|
|
assert sanitize_for_logging("Password:Secret123") == "password=***"
|
|
|
|
def test_api_key_underscore(self) -> None:
|
|
"""api_key=X replaced."""
|
|
assert sanitize_for_logging("api_key=my-secret-key") == "api_key=***"
|
|
|
|
def test_api_key_dash(self) -> None:
|
|
"""api-key=X replaced."""
|
|
assert sanitize_for_logging("api-key=my-secret-key") == "api_key=***"
|
|
|
|
def test_api_key_no_separator(self) -> None:
|
|
"""api_keyXYZ (no separator) is NOT matched by the api_key pattern.
|
|
|
|
The pattern requires =, :, _, or - after 'key', so plain 'api_keyXYZ'
|
|
passes through unsanitized. This is intentional — the risk of
|
|
false-positives (normal words like 'api_keyboard') outweighs coverage.
|
|
"""
|
|
result = sanitize_for_logging("api_keyXYZ")
|
|
# No separator → not matched → unchanged
|
|
assert result == "api_keyXYZ"
|
|
|
|
def test_token_eq(self) -> None:
|
|
"""token=X replaced."""
|
|
assert sanitize_for_logging("token=eyJhbGciOiJIUzI1NiJ9") == "token=***"
|
|
|
|
def test_token_case_insensitive(self) -> None:
|
|
"""Token matching is case-insensitive."""
|
|
assert sanitize_for_logging("TOKEN=eyJhbGciOiJIUzI1NiJ9") == "token=***"
|
|
|
|
def test_authorization_bearer(self) -> None:
|
|
"""Authorization: Bearer <token> replaced."""
|
|
result = sanitize_for_logging("Authorization: Bearer eyJhbGciOiJIUzI1NiJ9")
|
|
assert "***" in result
|
|
assert "Bearer" not in result
|
|
|
|
def test_authorization_bearer_only(self) -> None:
|
|
"""Authorization: Bearer (no token) replaced."""
|
|
result = sanitize_for_logging("Authorization: Bearer")
|
|
assert "***" in result
|
|
|
|
def test_authorization_basic(self) -> None:
|
|
"""Authorization: Basic replaced."""
|
|
result = sanitize_for_logging("Authorization: Basic dXNlcjpwYXNz")
|
|
assert "***" in result
|
|
|
|
def test_secret_eq(self) -> None:
|
|
"""secret=X replaced."""
|
|
assert sanitize_for_logging("secret=my_secret_value") == "secret=***"
|
|
|
|
def test_secret_key_eq(self) -> None:
|
|
"""secret_key=X replaced."""
|
|
assert sanitize_for_logging("secret_key=my_secret_value") == "secret_key=***"
|
|
|
|
def test_rsa_private_key(self) -> None:
|
|
"""RSA private key header redacted."""
|
|
text = "Some text -----BEGIN RSA PRIVATE KEY-----\nMIIBogAAAAAAA="
|
|
result = sanitize_for_logging(text)
|
|
assert "*** PRIVATE KEY ***" in result
|
|
assert "BEGIN RSA PRIVATE KEY" not in result
|
|
|
|
def test_dsa_private_key(self) -> None:
|
|
"""DSA private key header redacted."""
|
|
text = "Some text -----BEGIN DSA PRIVATE KEY-----\nMIIBogAAAAAAA="
|
|
result = sanitize_for_logging(text)
|
|
assert "*** PRIVATE KEY ***" in result
|
|
assert "BEGIN DSA PRIVATE KEY" not in result
|
|
|
|
def test_ec_private_key(self) -> None:
|
|
"""EC private key header redacted."""
|
|
text = "Some text -----BEGIN EC PRIVATE KEY-----\nMIIBogAAAAAAA="
|
|
result = sanitize_for_logging(text)
|
|
assert "*** PRIVATE KEY ***" in result
|
|
assert "BEGIN EC PRIVATE KEY" not in result
|
|
|
|
def test_openssh_private_key(self) -> None:
|
|
"""OPENSSH private key header redacted."""
|
|
text = "Some text -----BEGIN OPENSSH PRIVATE KEY-----\nMIIBogAAAAAAA="
|
|
result = sanitize_for_logging(text)
|
|
assert "*** PRIVATE KEY ***" in result
|
|
assert "BEGIN OPENSSH PRIVATE KEY" not in result
|
|
|
|
def test_aws_access_key(self) -> None:
|
|
"""AKIA... AWS access key redacted."""
|
|
text = "Access key: AKIAIOSFODNN7EXAMPLE"
|
|
result = sanitize_for_logging(text)
|
|
assert "AKIA***" in result
|
|
assert "EXAMPLE" not in result
|
|
|
|
def test_bearer_jwt_token(self) -> None:
|
|
"""Standalone JWT token after 'Bearer' is redacted."""
|
|
text = "Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"
|
|
result = sanitize_for_logging(text)
|
|
assert result == "Bearer ***"
|
|
|
|
def test_multiple_sensitive_values(self) -> None:
|
|
"""Multiple sensitive patterns on one line all redacted."""
|
|
text = "password=pass1 token=tok1 api_key=key1"
|
|
result = sanitize_for_logging(text)
|
|
assert result.count("***") == 3
|
|
|
|
def test_empty_string(self) -> None:
|
|
"""Empty string returns empty string."""
|
|
assert sanitize_for_logging("") == ""
|
|
|
|
def test_only_sensitive_pattern(self) -> None:
|
|
"""Text that is only a sensitive pattern returns only marker."""
|
|
assert sanitize_for_logging("password=secret") == "password=***"
|
|
|
|
def test_multiline_text(self) -> None:
|
|
"""Sensitive data redacted across multiple lines."""
|
|
text = "Line 1: password=secret\nLine 2: token=tok123\nLine 3: clean"
|
|
result = sanitize_for_logging(text)
|
|
assert "***" in result
|
|
assert "secret" not in result
|
|
assert "tok123" not in result
|
|
assert "clean" in result
|
|
|
|
def test_jwt_structure_preserved(self) -> None:
|
|
"""JWT dot-separated structure not matched as token=X."""
|
|
text = "header.payload.signature"
|
|
result = sanitize_for_logging(text)
|
|
assert result == text |