Update observability docs and task utilities
- Add Observability.md documentation - Standardize task logging with correlation_id support - Add log_sanitizer utility for PII masking - Update Tasks.md tracking - Update geo_cache tasks and other task modules with correlation_id Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -29,6 +29,7 @@ from app.utils.fail2ban_client import (
|
||||
Fail2BanResponse,
|
||||
)
|
||||
from app.utils.fail2ban_response import ok, to_dict
|
||||
from app.utils.log_sanitizer import sanitize_for_logging
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
@@ -500,8 +501,8 @@ async def start_daemon(start_cmd_parts: list[str]) -> bool:
|
||||
"fail2ban_start_failed",
|
||||
command=" ".join(start_cmd_parts),
|
||||
returncode=process.returncode,
|
||||
stdout=stdout.decode("utf-8", errors="replace"),
|
||||
stderr=stderr.decode("utf-8", errors="replace"),
|
||||
stdout=sanitize_for_logging(stdout.decode("utf-8", errors="replace")),
|
||||
stderr=sanitize_for_logging(stderr.decode("utf-8", errors="replace")),
|
||||
)
|
||||
return False
|
||||
log.info(
|
||||
|
||||
70
backend/app/utils/correlation.py
Normal file
70
backend/app/utils/correlation.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""Correlation ID context variable for distributed tracing.
|
||||
|
||||
This module provides a :class:`contextvars.ContextVar` that stores the correlation
|
||||
ID for the current execution context. Background tasks inherit or explicitly set
|
||||
the correlation ID to enable log correlation across request/task boundaries.
|
||||
|
||||
Usage in background tasks::
|
||||
|
||||
from app.utils.correlation import get_correlation_id, set_correlation_id
|
||||
|
||||
async def my_background_task(correlation_id: str) -> None:
|
||||
token = set_correlation_id(correlation_id)
|
||||
try:
|
||||
log.info("task_started") # Logs include correlation_id
|
||||
finally:
|
||||
reset_correlation_id(token)
|
||||
|
||||
For APScheduler jobs, pass the correlation ID through kwargs and call
|
||||
:func:`set_correlation_id` at the start of the async callback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextvars import ContextVar
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from contextvars import Token
|
||||
|
||||
#: Default correlation ID used when no parent request context exists.
|
||||
_DEFAULT_BG_CORRELATION_ID: str = "bg-task"
|
||||
|
||||
#: ContextVar holding the current correlation ID.
|
||||
correlation_id_var: ContextVar[str] = ContextVar(
|
||||
"correlation_id",
|
||||
default=_DEFAULT_BG_CORRELATION_ID,
|
||||
)
|
||||
|
||||
|
||||
def get_correlation_id() -> str:
|
||||
"""Return the current correlation ID from context.
|
||||
|
||||
Returns:
|
||||
The current correlation ID string. Returns ``"bg-task"`` if no
|
||||
correlation ID has been set in the current context.
|
||||
"""
|
||||
return correlation_id_var.get()
|
||||
|
||||
|
||||
def set_correlation_id(correlation_id: str) -> Token[str]:
|
||||
"""Set the correlation ID for the current context.
|
||||
|
||||
Args:
|
||||
correlation_id: The correlation ID to bind (typically passed from
|
||||
the triggering request).
|
||||
|
||||
Returns:
|
||||
A :class:`contextvars.Token` that must be passed to
|
||||
:func:`reset_correlation_id` when the task completes.
|
||||
"""
|
||||
return correlation_id_var.set(correlation_id)
|
||||
|
||||
|
||||
def reset_correlation_id(token: Token[str]) -> None:
|
||||
"""Reset the correlation ID to its previous value.
|
||||
|
||||
Args:
|
||||
token: The token returned by :func:`set_correlation_id`.
|
||||
"""
|
||||
correlation_id_var.reset(token)
|
||||
50
backend/app/utils/log_sanitizer.py
Normal file
50
backend/app/utils/log_sanitizer.py
Normal file
@@ -0,0 +1,50 @@
|
||||
"""Log sanitization utilities for preventing sensitive data leakage.
|
||||
|
||||
All external output (subprocess, API responses, config data) passed to
|
||||
structlog MUST be sanitized first. This module provides the canonical
|
||||
sanitize_for_logging() function used across the codebase.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
|
||||
# Patterns for sensitive data that must never appear in logs.
|
||||
_SENSITIVE_PATTERNS: list[tuple[re.Pattern[str], str]] = [
|
||||
# Passwords: password=PASS, password:PASS, password = PASS, etc.
|
||||
(re.compile(r"password[=:]\S+", re.IGNORECASE), "password=***"),
|
||||
# API keys: api_key=X, api-key=X, APIKEY=X, etc.
|
||||
(re.compile(r"api[_-]?key[=:]\S+", re.IGNORECASE), "api_key=***"),
|
||||
# Auth tokens: token=X, token: X, etc.
|
||||
(re.compile(r"token[=:]\S+", re.IGNORECASE), "token=***"),
|
||||
# Authorization headers: Authorization: Bearer <token>
|
||||
(re.compile(r"(?i)Authorization:\s*(?:Bearer\s+)?\S+"), "Authorization: ***"),
|
||||
# Secret values: secret=X, secret: X
|
||||
(re.compile(r"secret[=:]\S+", re.IGNORECASE), "secret=***"),
|
||||
# Private keys: -----BEGIN (RSA/DSA/EC/OPENSSH) PRIVATE KEY-----
|
||||
(re.compile(r"-----BEGIN +(?:RSA|DSA|EC|OPENSSH) +PRIVATE KEY-----"), "*** PRIVATE KEY ***"),
|
||||
# AWS access keys: AKIA...
|
||||
(re.compile(r"AKIA[0-9A-Z]{16}"), "AKIA***"),
|
||||
# Generic secret= patterns
|
||||
(re.compile(r"secret[_-]?key[=:]\S+", re.IGNORECASE), "secret_key=***"),
|
||||
# Bearer tokens in Authorization headers
|
||||
(re.compile(r"Bearer\s+[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "Bearer ***"),
|
||||
]
|
||||
|
||||
|
||||
def sanitize_for_logging(text: str) -> str:
|
||||
"""Remove sensitive data patterns from text before logging.
|
||||
|
||||
Applies a set of regex substitutions to strip passwords, API keys,
|
||||
tokens, secrets, private keys, and other credential-like strings.
|
||||
Substituted values are replaced with a fixed redaction marker.
|
||||
|
||||
Args:
|
||||
text: Raw text that may contain sensitive data.
|
||||
|
||||
Returns:
|
||||
Text with sensitive patterns replaced by ``***``.
|
||||
"""
|
||||
for pattern, replacement in _SENSITIVE_PATTERNS:
|
||||
text = pattern.sub(replacement, text)
|
||||
return text
|
||||
Reference in New Issue
Block a user