- Add Observability.md documentation - Standardize task logging with correlation_id support - Add log_sanitizer utility for PII masking - Update Tasks.md tracking - Update geo_cache tasks and other task modules with correlation_id Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
51 lines
2.1 KiB
Python
51 lines
2.1 KiB
Python
"""Log sanitization utilities for preventing sensitive data leakage.
|
|
|
|
All external output (subprocess, API responses, config data) passed to
|
|
structlog MUST be sanitized first. This module provides the canonical
|
|
sanitize_for_logging() function used across the codebase.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
# Patterns for sensitive data that must never appear in logs.
|
|
_SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
|
# Passwords: password=PASS, password:PASS, password = PASS, etc.
|
|
(re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"),
|
|
# API keys: api_key=X, api-key=X, APIKEY=X, etc.
|
|
(re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"),
|
|
# Auth tokens: token=X, token: X, etc.
|
|
(re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"),
|
|
# Authorization headers: Authorization: Bearer <token>
|
|
(re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"),
|
|
# Secret values: secret=X, secret: X
|
|
(re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"),
|
|
# Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY-----
|
|
(re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"),
|
|
# AWS access keys: AKIA...
|
|
(re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"),
|
|
# Generic secret= patterns
|
|
(re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"),
|
|
# Bearer tokens in Authorization headers
|
|
(re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"),
|
|
]
|
|
|
|
|
|
def sanitize_for_logging(text: str) -> str:
|
|
"""Remove sensitive data patterns from text before logging.
|
|
|
|
Applies a set of regex substitutions to strip passwords, API keys,
|
|
tokens, secrets, private keys, and other credential-like strings.
|
|
Substituted values are replaced with a fixed redaction marker.
|
|
|
|
Args:
|
|
text: Raw text that may contain sensitive data.
|
|
|
|
Returns:
|
|
Text with sensitive patterns replaced by ``***``.
|
|
"""
|
|
for pattern, replacement in _SENSITIVE_PATTERNS:
|
|
text = pattern.sub(replacement, text)
|
|
return text
|