Update observability docs and task utilities

- Add Observability.md documentation
- Standardize task logging with correlation_id support
- Add log_sanitizer utility for PII masking
- Update Tasks.md tracking
- Update geo_cache tasks and other task modules with correlation_id

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-03 11:52:09 +02:00
parent 7b93499551
commit 0133489920
17 changed files with 582 additions and 124 deletions

View File

@@ -0,0 +1,50 @@
"""Log sanitization utilities for preventing sensitive data leakage.
All external output (subprocess, API responses, config data) passed to
structlog MUST be sanitized first. This module provides the canonical
sanitize_for_logging() function used across the codebase.
"""
from __future__ import annotations
import re
# Patterns for sensitive data that must never appear in logs.
_SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
# Passwords: password=PASS, password:PASS, password = PASS, etc.
(re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"),
# API keys: api_key=X, api-key=X, APIKEY=X, etc.
(re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"),
# Auth tokens: token=X, token: X, etc.
(re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"),
# Authorization headers: Authorization: Bearer <token>
(re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"),
# Secret values: secret=X, secret: X
(re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"),
# Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY-----
(re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"),
# AWS access keys: AKIA...
(re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"),
# Generic secret= patterns
(re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"),
# Bearer tokens in Authorization headers
(re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"),
]
def sanitize_for_logging(text: str) -> str:
"""Remove sensitive data patterns from text before logging.
Applies a set of regex substitutions to strip passwords, API keys,
tokens, secrets, private keys, and other credential-like strings.
Substituted values are replaced with a fixed redaction marker.
Args:
text: Raw text that may contain sensitive data.
Returns:
Text with sensitive patterns replaced by ``***``.
"""
for pattern, replacement in _SENSITIVE_PATTERNS:
text = pattern.sub(replacement, text)
return text