- Remove structlog dependency from backend/pyproject.toml - Add app.utils.logging_compat shim for keyword-arg logging API - Add app.utils.json_formatter for JSON log output with extra fields - Update all backend modules to use logging_compat.get_logger() - Update docstrings in log_sanitizer.py and json_formatter.py - Update test comment in test_async_utils.py - Record 406 failing tests in Docs/Tasks.md for tracking
51 lines
2.1 KiB
Python
51 lines
2.1 KiB
Python
"""Log sanitization utilities for preventing sensitive data leakage.
|
|
|
|
All external output (subprocess, API responses, config data) passed to
|
|
logging MUST be sanitized first. This module provides the canonical
|
|
sanitize_for_logging() function used across the codebase.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
# Patterns for sensitive data that must never appear in logs.
|
|
_SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
|
# Passwords: password=PASS, password:PASS, password = PASS, etc.
|
|
(re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"),
|
|
# API keys: api_key=X, api-key=X, APIKEY=X, etc.
|
|
(re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"),
|
|
# Auth tokens: token=X, token: X, etc.
|
|
(re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"),
|
|
# Authorization headers: Authorization: Bearer <token>
|
|
(re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"),
|
|
# Secret values: secret=X, secret: X
|
|
(re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"),
|
|
# Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY-----
|
|
(re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"),
|
|
# AWS access keys: AKIA...
|
|
(re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"),
|
|
# Generic secret= patterns
|
|
(re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"),
|
|
# Bearer tokens in Authorization headers
|
|
(re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"),
|
|
]
|
|
|
|
|
|
def sanitize_for_logging(text: str) -> str:
|
|
"""Remove sensitive data patterns from text before logging.
|
|
|
|
Applies a set of regex substitutions to strip passwords, API keys,
|
|
tokens, secrets, private keys, and other credential-like strings.
|
|
Substituted values are replaced with a fixed redaction marker.
|
|
|
|
Args:
|
|
text: Raw text that may contain sensitive data.
|
|
|
|
Returns:
|
|
Text with sensitive patterns replaced by ``***``.
|
|
"""
|
|
for pattern, replacement in _SENSITIVE_PATTERNS:
|
|
text = pattern.sub(replacement, text)
|
|
return text
|