"""Log sanitization utilities for preventing sensitive data leakage. All external output (subprocess, API responses, config data) passed to logging MUST be sanitized first. This module provides the canonical sanitize_for_logging() function used across the codebase. """ from __future__ import annotations import re # Patterns for sensitive data that must never appear in logs. _SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [ # Passwords: password=PASS, password:PASS, password = PASS, etc. (re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"), # API keys: api_key=X, api-key=X, APIKEY=X, etc. (re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"), # Auth tokens: token=X, token: X, etc. (re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"), # Authorization headers: Authorization: Bearer (re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"), # Secret values: secret=X, secret: X (re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"), # Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY----- (re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"), # AWS access keys: AKIA... (re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"), # Generic secret= patterns (re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"), # Bearer tokens in Authorization headers (re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"), ] def sanitize_for_logging(text: str) -> str: """Remove sensitive data patterns from text before logging. Applies a set of regex substitutions to strip passwords, API keys, tokens, secrets, private keys, and other credential-like strings. Substituted values are replaced with a fixed redaction marker. Args: text: Raw text that may contain sensitive data. Returns: Text with sensitive patterns replaced by ``***``. """ for pattern, replacement in _SENSITIVE_PATTERNS: text = pattern.sub(replacement, text) return text