Files
BanGUI/backend/app/utils/log_sanitizer.py
Lukas 0133489920 Update observability docs and task utilities
- Add Observability.md documentation
- Standardize task logging with correlation_id support
- Add log_sanitizer utility for PII masking
- Update Tasks.md tracking
- Update geo_cache tasks and other task modules with correlation_id

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-03 11:52:09 +02:00

51 lines
2.1 KiB
Python

"""Log sanitization utilities for preventing sensitive data leakage.
All external output (subprocess, API responses, config data) passed to
structlog MUST be sanitized first. This module provides the canonical
sanitize_for_logging() function used across the codebase.
"""
from __future__ import annotations
import re
# Patterns for sensitive data that must never appear in logs.
_SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
# Passwords: password=PASS, password:PASS, password = PASS, etc.
(re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"),
# API keys: api_key=X, api-key=X, APIKEY=X, etc.
(re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"),
# Auth tokens: token=X, token: X, etc.
(re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"),
# Authorization headers: Authorization: Bearer <token>
(re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"),
# Secret values: secret=X, secret: X
(re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"),
# Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY-----
(re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"),
# AWS access keys: AKIA...
(re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"),
# Generic secret= patterns
(re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"),
# Bearer tokens in Authorization headers
(re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"),
]
def sanitize_for_logging(text: str) -> str:
"""Remove sensitive data patterns from text before logging.
Applies a set of regex substitutions to strip passwords, API keys,
tokens, secrets, private keys, and other credential-like strings.
Substituted values are replaced with a fixed redaction marker.
Args:
text: Raw text that may contain sensitive data.
Returns:
Text with sensitive patterns replaced by ``***``.
"""
for pattern, replacement in _SENSITIVE_PATTERNS:
text = pattern.sub(replacement, text)
return text