Update observability docs and task utilities

- Add Observability.md documentation - Standardize task logging with correlation_id support - Add log_sanitizer utility for PII masking - Update Tasks.md tracking - Update geo_cache tasks and other task modules with correlation_id Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-03 11:52:09 +02:00
parent 7b93499551
commit 0133489920
17 changed files with 582 additions and 124 deletions
--- a/backend/tests/test_tasks/test_blocklist_import.py
+++ b/backend/tests/test_tasks/test_blocklist_import.py
@@ -176,7 +176,10 @@ class TestRunImport:
            # Must not raise — the task swallows unexpected errors.
            await _run_import(app)

-        mock_log.exception.assert_called_once_with("blocklist_import_unexpected_error")
+        mock_log.exception.assert_called_once()
+        call_args = mock_log.exception.call_args
+        assert call_args[0][0] == "blocklist_import_unexpected_error"
+        assert "correlation_id" in call_args[1]


 # ---------------------------------------------------------------------------
--- a/backend/tests/test_utils/test_log_sanitizer.py
+++ b/backend/tests/test_utils/test_log_sanitizer.py
@@ -0,0 +1,150 @@
+"""Tests for app.utils.log_sanitizer."""
+
+from __future__ import annotations
+
+import pytest
+
+from app.utils.log_sanitizer import sanitize_for_logging
+
+
+class TestSanitizeForLogging:
+    """Tests for sanitize_for_logging()."""
+
+    def test_passthrough_clean_text(self) -> None:
+        """No sensitive patterns: text unchanged."""
+        text = "Server started on port 8000"
+        assert sanitize_for_logging(text) == text
+
+    def test_password_eq(self) -> None:
+        """password=X replaced."""
+        assert sanitize_for_logging("password=Secret123") == "password=***"
+
+    def test_password_colon(self) -> None:
+        """password:X replaced."""
+        assert sanitize_for_logging("password:Secret123") == "password=***"
+
+    def test_password_case_insensitive(self) -> None:
+        """Password matching is case-insensitive."""
+        assert sanitize_for_logging("PASSWORD=Secret123") == "password=***"
+        assert sanitize_for_logging("Password:Secret123") == "password=***"
+
+    def test_api_key_underscore(self) -> None:
+        """api_key=X replaced."""
+        assert sanitize_for_logging("api_key=my-secret-key") == "api_key=***"
+
+    def test_api_key_dash(self) -> None:
+        """api-key=X replaced."""
+        assert sanitize_for_logging("api-key=my-secret-key") == "api_key=***"
+
+    def test_api_key_no_separator(self) -> None:
+        """api_keyXYZ (no separator) is NOT matched by the api_key pattern.
+
+        The pattern requires =, :, _, or - after 'key', so plain 'api_keyXYZ'
+        passes through unsanitized. This is intentional — the risk of
+        false-positives (normal words like 'api_keyboard') outweighs coverage.
+        """
+        result = sanitize_for_logging("api_keyXYZ")
+        # No separator → not matched → unchanged
+        assert result == "api_keyXYZ"
+
+    def test_token_eq(self) -> None:
+        """token=X replaced."""
+        assert sanitize_for_logging("token=eyJhbGciOiJIUzI1NiJ9") == "token=***"
+
+    def test_token_case_insensitive(self) -> None:
+        """Token matching is case-insensitive."""
+        assert sanitize_for_logging("TOKEN=eyJhbGciOiJIUzI1NiJ9") == "token=***"
+
+    def test_authorization_bearer(self) -> None:
+        """Authorization: Bearer <token> replaced."""
+        result = sanitize_for_logging("Authorization: Bearer eyJhbGciOiJIUzI1NiJ9")
+        assert "***" in result
+        assert "Bearer" not in result
+
+    def test_authorization_bearer_only(self) -> None:
+        """Authorization: Bearer (no token) replaced."""
+        result = sanitize_for_logging("Authorization: Bearer")
+        assert "***" in result
+
+    def test_authorization_basic(self) -> None:
+        """Authorization: Basic replaced."""
+        result = sanitize_for_logging("Authorization: Basic dXNlcjpwYXNz")
+        assert "***" in result
+
+    def test_secret_eq(self) -> None:
+        """secret=X replaced."""
+        assert sanitize_for_logging("secret=my_secret_value") == "secret=***"
+
+    def test_secret_key_eq(self) -> None:
+        """secret_key=X replaced."""
+        assert sanitize_for_logging("secret_key=my_secret_value") == "secret_key=***"
+
+    def test_rsa_private_key(self) -> None:
+        """RSA private key header redacted."""
+        text = "Some text -----BEGIN RSA PRIVATE KEY-----\nMIIBogAAAAAAA="
+        result = sanitize_for_logging(text)
+        assert "*** PRIVATE KEY ***" in result
+        assert "BEGIN RSA PRIVATE KEY" not in result
+
+    def test_dsa_private_key(self) -> None:
+        """DSA private key header redacted."""
+        text = "Some text -----BEGIN DSA PRIVATE KEY-----\nMIIBogAAAAAAA="
+        result = sanitize_for_logging(text)
+        assert "*** PRIVATE KEY ***" in result
+        assert "BEGIN DSA PRIVATE KEY" not in result
+
+    def test_ec_private_key(self) -> None:
+        """EC private key header redacted."""
+        text = "Some text -----BEGIN EC PRIVATE KEY-----\nMIIBogAAAAAAA="
+        result = sanitize_for_logging(text)
+        assert "*** PRIVATE KEY ***" in result
+        assert "BEGIN EC PRIVATE KEY" not in result
+
+    def test_openssh_private_key(self) -> None:
+        """OPENSSH private key header redacted."""
+        text = "Some text -----BEGIN OPENSSH PRIVATE KEY-----\nMIIBogAAAAAAA="
+        result = sanitize_for_logging(text)
+        assert "*** PRIVATE KEY ***" in result
+        assert "BEGIN OPENSSH PRIVATE KEY" not in result
+
+    def test_aws_access_key(self) -> None:
+        """AKIA... AWS access key redacted."""
+        text = "Access key: AKIAIOSFODNN7EXAMPLE"
+        result = sanitize_for_logging(text)
+        assert "AKIA***" in result
+        assert "EXAMPLE" not in result
+
+    def test_bearer_jwt_token(self) -> None:
+        """Standalone JWT token after 'Bearer' is redacted."""
+        text = "Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.dozjgNryP4J3jVmNHl0w5N_XgL0n3I9PlFUP0THsR8U"
+        result = sanitize_for_logging(text)
+        assert result == "Bearer ***"
+
+    def test_multiple_sensitive_values(self) -> None:
+        """Multiple sensitive patterns on one line all redacted."""
+        text = "password=pass1 token=tok1 api_key=key1"
+        result = sanitize_for_logging(text)
+        assert result.count("***") == 3
+
+    def test_empty_string(self) -> None:
+        """Empty string returns empty string."""
+        assert sanitize_for_logging("") == ""
+
+    def test_only_sensitive_pattern(self) -> None:
+        """Text that is only a sensitive pattern returns only marker."""
+        assert sanitize_for_logging("password=secret") == "password=***"
+
+    def test_multiline_text(self) -> None:
+        """Sensitive data redacted across multiple lines."""
+        text = "Line 1: password=secret\nLine 2: token=tok123\nLine 3: clean"
+        result = sanitize_for_logging(text)
+        assert "***" in result
+        assert "secret" not in result
+        assert "tok123" not in result
+        assert "clean" in result
+
+    def test_jwt_structure_preserved(self) -> None:
+        """JWT dot-separated structure not matched as token=X."""
+        text = "header.payload.signature"
+        result = sanitize_for_logging(text)
+        assert result == text