Update observability docs and task utilities
- Add Observability.md documentation - Standardize task logging with correlation_id support - Add log_sanitizer utility for PII masking - Update Tasks.md tracking - Update geo_cache tasks and other task modules with correlation_id Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -176,7 +176,10 @@ class TestRunImport:
|
||||
# Must not raise — the task swallows unexpected errors.
|
||||
await _run_import(app)
|
||||
|
||||
mock_log.exception.assert_called_once_with("blocklist_import_unexpected_error")
|
||||
mock_log.exception.assert_called_once()
|
||||
call_args = mock_log.exception.call_args
|
||||
assert call_args[0][0] == "blocklist_import_unexpected_error"
|
||||
assert "correlation_id" in call_args[1]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
150
backend/tests/test_utils/test_log_sanitizer.py
Normal file
150
backend/tests/test_utils/test_log_sanitizer.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""Tests for app.utils.log_sanitizer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.utils.log_sanitizer import sanitize_for_logging
|
||||
|
||||
|
||||
class TestSanitizeForLogging:
|
||||
"""Tests for sanitize_for_logging()."""
|
||||
|
||||
def test_passthrough_clean_text(self) -> None:
|
||||
"""No sensitive patterns: text unchanged."""
|
||||
text = "Server started on port 8000"
|
||||
assert sanitize_for_logging(text) == text
|
||||
|
||||
def test_password_eq(self) -> None:
|
||||
"""password=X replaced."""
|
||||
assert sanitize_for_logging("password=Secret123") == "password=***"
|
||||
|
||||
def test_password_colon(self) -> None:
|
||||
"""password:X replaced."""
|
||||
assert sanitize_for_logging("password:Secret123") == "password=***"
|
||||
|
||||
def test_password_case_insensitive(self) -> None:
|
||||
"""Password matching is case-insensitive."""
|
||||
assert sanitize_for_logging("PASSWORD=Secret123") == "password=***"
|
||||
assert sanitize_for_logging("Password:Secret123") == "password=***"
|
||||
|
||||
def test_api_key_underscore(self) -> None:
|
||||
"""api_key=X replaced."""
|
||||
assert sanitize_for_logging("api_key=my-secret-key") == "api_key=***"
|
||||
|
||||
def test_api_key_dash(self) -> None:
|
||||
"""api-key=X replaced."""
|
||||
assert sanitize_for_logging("api-key=my-secret-key") == "api_key=***"
|
||||
|
||||
def test_api_key_no_separator(self) -> None:
|
||||
"""api_keyXYZ (no separator) is NOT matched by the api_key pattern.
|
||||
|
||||
The pattern requires =, :, _, or - after 'key', so plain 'api_keyXYZ'
|
||||
passes through unsanitized. This is intentional — the risk of
|
||||
false-positives (normal words like 'api_keyboard') outweighs coverage.
|
||||
"""
|
||||
result = sanitize_for_logging("api_keyXYZ")
|
||||
# No separator → not matched → unchanged
|
||||
assert result == "api_keyXYZ"
|
||||
|
||||
def test_token_eq(self) -> None:
|
||||
"""token=X replaced."""
|
||||
assert sanitize_for_logging("token=eyJhbGciOiJIUzI1NiJ9") == "token=***"
|
||||
|
||||
def test_token_case_insensitive(self) -> None:
|
||||
"""Token matching is case-insensitive."""
|
||||
assert sanitize_for_logging("TOKEN=eyJhbGciOiJIUzI1NiJ9") == "token=***"
|
||||
|
||||
def test_authorization_bearer(self) -> None:
|
||||
"""Authorization: Bearer <token> replaced."""
|
||||
result = sanitize_for_logging("Authorization: Bearer eyJhbGciOiJIUzI1NiJ9")
|
||||
assert "***" in result
|
||||
assert "Bearer" not in result
|
||||
|
||||
def test_authorization_bearer_only(self) -> None:
|
||||
"""Authorization: Bearer (no token) replaced."""
|
||||
result = sanitize_for_logging("Authorization: Bearer")
|
||||
assert "***" in result
|
||||
|
||||
def test_authorization_basic(self) -> None:
|
||||
"""Authorization: Basic replaced."""
|
||||
result = sanitize_for_logging("Authorization: Basic dXNlcjpwYXNz")
|
||||
assert "***" in result
|
||||
|
||||
def test_secret_eq(self) -> None:
|
||||
"""secret=X replaced."""
|
||||
assert sanitize_for_logging("secret=my_secret_value") == "secret=***"
|
||||
|
||||
def test_secret_key_eq(self) -> None:
|
||||
"""secret_key=X replaced."""
|
||||
assert sanitize_for_logging("secret_key=my_secret_value") == "secret_key=***"
|
||||
|
||||
def test_rsa_private_key(self) -> None:
|
||||
"""RSA private key header redacted."""
|
||||
text = "Some text -----BEGIN RSA PRIVATE KEY-----\nMIIBogAAAAAAA="
|
||||
result = sanitize_for_logging(text)
|
||||
assert "*** PRIVATE KEY ***" in result
|
||||
assert "BEGIN RSA PRIVATE KEY" not in result
|
||||
|
||||
def test_dsa_private_key(self) -> None:
|
||||
"""DSA private key header redacted."""
|
||||
text = "Some text -----BEGIN DSA PRIVATE KEY-----\nMIIBogAAAAAAA="
|
||||
result = sanitize_for_logging(text)
|
||||
assert "*** PRIVATE KEY ***" in result
|
||||
assert "BEGIN DSA PRIVATE KEY" not in result
|
||||
|
||||
def test_ec_private_key(self) -> None:
|
||||
"""EC private key header redacted."""
|
||||
text = "Some text -----BEGIN EC PRIVATE KEY-----\nMIIBogAAAAAAA="
|
||||
result = sanitize_for_logging(text)
|
||||
assert "*** PRIVATE KEY ***" in result
|
||||
assert "BEGIN EC PRIVATE KEY" not in result
|
||||
|
||||
def test_openssh_private_key(self) -> None:
|
||||
"""OPENSSH private key header redacted."""
|
||||
text = "Some text -----BEGIN OPENSSH PRIVATE KEY-----\nMIIBogAAAAAAA="
|
||||
result = sanitize_for_logging(text)
|
||||
assert "*** PRIVATE KEY ***" in result
|
||||
assert "BEGIN OPENSSH PRIVATE KEY" not in result
|
||||
|
||||
def test_aws_access_key(self) -> None:
|
||||
"""AKIA... AWS access key redacted."""
|
||||
text = "Access key: AKIAIOSFODNN7EXAMPLE"
|
||||
result = sanitize_for_logging(text)
|
||||
assert "AKIA***" in result
|
||||
assert "EXAMPLE" not in result
|
||||
|
||||
def test_bearer_jwt_token(self) -> None:
|
||||
"""Standalone JWT token after 'Bearer' is redacted."""
|
||||
text = "Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"
|
||||
result = sanitize_for_logging(text)
|
||||
assert result == "Bearer ***"
|
||||
|
||||
def test_multiple_sensitive_values(self) -> None:
|
||||
"""Multiple sensitive patterns on one line all redacted."""
|
||||
text = "password=pass1 token=tok1 api_key=key1"
|
||||
result = sanitize_for_logging(text)
|
||||
assert result.count("***") == 3
|
||||
|
||||
def test_empty_string(self) -> None:
|
||||
"""Empty string returns empty string."""
|
||||
assert sanitize_for_logging("") == ""
|
||||
|
||||
def test_only_sensitive_pattern(self) -> None:
|
||||
"""Text that is only a sensitive pattern returns only marker."""
|
||||
assert sanitize_for_logging("password=secret") == "password=***"
|
||||
|
||||
def test_multiline_text(self) -> None:
|
||||
"""Sensitive data redacted across multiple lines."""
|
||||
text = "Line 1: password=secret\nLine 2: token=tok123\nLine 3: clean"
|
||||
result = sanitize_for_logging(text)
|
||||
assert "***" in result
|
||||
assert "secret" not in result
|
||||
assert "tok123" not in result
|
||||
assert "clean" in result
|
||||
|
||||
def test_jwt_structure_preserved(self) -> None:
|
||||
"""JWT dot-separated structure not matched as token=X."""
|
||||
text = "header.payload.signature"
|
||||
result = sanitize_for_logging(text)
|
||||
assert result == text
|
||||
Reference in New Issue
Block a user