Files
BanGUI/backend/app/utils/log_sanitizer.py
Lukas 7ec80fdeec refactor(logging): replace structlog with stdlib logging compat layer
- Remove structlog dependency from backend/pyproject.toml
- Add app.utils.logging_compat shim for keyword-arg logging API
- Add app.utils.json_formatter for JSON log output with extra fields
- Update all backend modules to use logging_compat.get_logger()
- Update docstrings in log_sanitizer.py and json_formatter.py
- Update test comment in test_async_utils.py
- Record 406 failing tests in Docs/Tasks.md for tracking
2026-05-10 13:37:54 +02:00

51 lines
2.1 KiB
Python

"""Log sanitization utilities for preventing sensitive data leakage.
All external output (subprocess, API responses, config data) passed to
logging MUST be sanitized first. This module provides the canonical
sanitize_for_logging() function used across the codebase.
"""
from __future__ import annotations
import re
# Patterns for sensitive data that must never appear in logs.
_SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
# Passwords: password=PASS, password:PASS, password = PASS, etc.
(re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"),
# API keys: api_key=X, api-key=X, APIKEY=X, etc.
(re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"),
# Auth tokens: token=X, token: X, etc.
(re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"),
# Authorization headers: Authorization: Bearer <token>
(re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"),
# Secret values: secret=X, secret: X
(re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"),
# Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY-----
(re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"),
# AWS access keys: AKIA...
(re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"),
# Generic secret= patterns
(re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"),
# Bearer tokens in Authorization headers
(re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"),
]
def sanitize_for_logging(text: str) -> str:
"""Remove sensitive data patterns from text before logging.
Applies a set of regex substitutions to strip passwords, API keys,
tokens, secrets, private keys, and other credential-like strings.
Substituted values are replaced with a fixed redaction marker.
Args:
text: Raw text that may contain sensitive data.
Returns:
Text with sensitive patterns replaced by ``***``.
"""
for pattern, replacement in _SENSITIVE_PATTERNS:
text = pattern.sub(replacement, text)
return text