Implement frontend and backend observability alignment
Align frontend and backend error observability with correlation IDs and structured telemetry for distributed tracing across systems. Backend changes: - Add CorrelationIdMiddleware to generate/extract correlation IDs - Include correlation_id in all ErrorResponse objects - Store correlation ID in structlog contextvars for automatic inclusion in logs - Add correlation ID to response headers (X-Correlation-ID) Frontend changes: - API client automatically generates session-scoped UUID4 and includes X-Correlation-ID header in all requests - Extract correlation ID from API error responses - Update error handlers to use telemetry with correlation IDs - Add telemetry logging to ErrorBoundary, PageErrorBoundary, SectionErrorBoundary - Implement redaction utilities for privacy-safe logging of sensitive data Documentation: - Add observability guidelines to Web-Development.md * Correlation ID usage patterns * Privacy & security best practices * Telemetry event structure * Redaction utilities for sensitive data - Add distributed tracing architecture section to Architecture.md * Correlation ID flow across frontend/backend * Example troubleshooting scenario * Implementation details for future enhancements Testing: - Add comprehensive tests for correlation middleware - Update error boundary tests to verify telemetry integration - Verify TypeScript and ESLint pass with no warnings Fixes: Issue #40 - Frontend and backend observability are not aligned Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -40,6 +40,7 @@ from app.exceptions import (
|
||||
RateLimitError,
|
||||
ServiceUnavailableError,
|
||||
)
|
||||
from app.middleware.correlation import CorrelationIdMiddleware
|
||||
from app.middleware.csrf import CsrfMiddleware
|
||||
from app.models.response import ErrorResponse
|
||||
from app.routers import (
|
||||
@@ -215,6 +216,20 @@ def _get_error_metadata(exc: Exception) -> dict[str, str | int | float | bool |
|
||||
return {}
|
||||
|
||||
|
||||
def _get_correlation_id(request: Request) -> str | None:
|
||||
"""Extract correlation ID from request state if available.
|
||||
|
||||
The correlation ID is set by CorrelationIdMiddleware.
|
||||
|
||||
Args:
|
||||
request: The incoming FastAPI request.
|
||||
|
||||
Returns:
|
||||
The correlation ID string, or None if not present.
|
||||
"""
|
||||
return getattr(request.state, "correlation_id", None)
|
||||
|
||||
|
||||
async def _unhandled_exception_handler(
|
||||
request: Request,
|
||||
exc: Exception,
|
||||
@@ -241,6 +256,7 @@ async def _unhandled_exception_handler(
|
||||
code="internal_error",
|
||||
detail="An unexpected error occurred. Please try again later.",
|
||||
metadata={},
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
@@ -271,6 +287,7 @@ async def _fail2ban_connection_handler(
|
||||
code="fail2ban_unreachable",
|
||||
detail="Cannot reach the fail2ban service. Check the server status page.",
|
||||
metadata={"socket_path": exc.socket_path},
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=502,
|
||||
@@ -301,6 +318,7 @@ async def _fail2ban_protocol_handler(
|
||||
code="fail2ban_protocol_error",
|
||||
detail="Cannot reach the fail2ban service. Check the server status page.",
|
||||
metadata={},
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=502,
|
||||
@@ -331,6 +349,7 @@ async def _not_found_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
@@ -361,6 +380,7 @@ async def _bad_request_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
@@ -383,6 +403,7 @@ async def _conflict_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
@@ -406,6 +427,7 @@ async def _domain_error_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
@@ -436,6 +458,7 @@ async def _value_error_handler(
|
||||
code="invalid_input",
|
||||
detail=str(exc),
|
||||
metadata={},
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
@@ -466,6 +489,7 @@ async def _service_unavailable_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
@@ -496,6 +520,7 @@ async def _authentication_error_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
@@ -526,6 +551,7 @@ async def _rate_limit_error_handler(
|
||||
code=_get_error_code(exc),
|
||||
detail=str(exc),
|
||||
metadata=_get_error_metadata(exc),
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
||||
@@ -576,6 +602,7 @@ async def _http_exception_handler(
|
||||
code=error_code,
|
||||
detail=exc.detail,
|
||||
metadata={},
|
||||
correlation_id=_get_correlation_id(request),
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
@@ -743,6 +770,9 @@ def create_app(settings: Settings | None = None) -> FastAPI:
|
||||
# Note: middleware is applied in reverse order of registration.
|
||||
# The setup-redirect must run *after* CSRF, so it is added last.
|
||||
# CSRF middleware protects cookie-authenticated state-mutating requests.
|
||||
# CorrelationIdMiddleware must run first (added last) so correlation ID
|
||||
# is available to all downstream handlers and loggers.
|
||||
app.add_middleware(CorrelationIdMiddleware)
|
||||
app.add_middleware(SetupRedirectMiddleware)
|
||||
app.add_middleware(CsrfMiddleware)
|
||||
|
||||
|
||||
93
backend/app/middleware/correlation.py
Normal file
93
backend/app/middleware/correlation.py
Normal file
@@ -0,0 +1,93 @@
|
||||
"""Correlation ID middleware for distributed tracing.
|
||||
|
||||
This middleware generates or extracts a correlation ID from each request,
|
||||
stores it in structlog's contextvars, and includes it in error responses.
|
||||
This enables correlating logs across frontend and backend for a single
|
||||
user action or request flow.
|
||||
|
||||
Correlation IDs flow through the request lifecycle:
|
||||
1. Frontend generates/passes via `X-Correlation-ID` header
|
||||
2. Middleware extracts or generates a UUID4
|
||||
3. Middleware stores in structlog.contextvars
|
||||
4. All log entries include the correlation ID automatically
|
||||
5. Error responses include the correlation ID for client-side correlation
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import structlog
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response as StarletteResponse
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
# Standard header name for correlation IDs (follows W3C Trace Context conventions)
|
||||
_CORRELATION_ID_HEADER: str = "X-Correlation-ID"
|
||||
|
||||
# Key name for storing correlation ID in structlog context
|
||||
CORRELATION_ID_CONTEXT_KEY: str = "correlation_id"
|
||||
|
||||
|
||||
class CorrelationIdMiddleware(BaseHTTPMiddleware):
|
||||
"""Extract or generate correlation ID and inject into structlog context.
|
||||
|
||||
For each request, this middleware:
|
||||
1. Checks for `X-Correlation-ID` header (trusted from frontend)
|
||||
2. Generates a new UUID4 if header not present
|
||||
3. Stores in structlog.contextvars so all logs for this request include it
|
||||
4. Makes available via request.state for error handlers
|
||||
|
||||
The correlation ID enables tracing a single user action or request flow
|
||||
across both frontend and backend systems using structured logs.
|
||||
"""
|
||||
|
||||
async def dispatch(
|
||||
self,
|
||||
request: Request,
|
||||
call_next: Callable[[Request], Awaitable[StarletteResponse]],
|
||||
) -> StarletteResponse:
|
||||
"""Intercept requests to extract or generate correlation ID.
|
||||
|
||||
Args:
|
||||
request: The incoming HTTP request.
|
||||
call_next: The next middleware / router handler.
|
||||
|
||||
Returns:
|
||||
The response from the next middleware / router, with correlation ID
|
||||
in the request state for use by exception handlers.
|
||||
"""
|
||||
# Extract correlation ID from request header, or generate a new one
|
||||
correlation_id: str = request.headers.get(
|
||||
_CORRELATION_ID_HEADER,
|
||||
str(uuid.uuid4()),
|
||||
)
|
||||
|
||||
# Store in structlog context so all logs for this request include it
|
||||
structlog.contextvars.clear_contextvars()
|
||||
structlog.contextvars.bind_contextvars(
|
||||
**{CORRELATION_ID_CONTEXT_KEY: correlation_id}
|
||||
)
|
||||
|
||||
# Also store on request.state for use by exception handlers
|
||||
request.state.correlation_id = correlation_id
|
||||
|
||||
log.debug(
|
||||
"request_received",
|
||||
method=request.method,
|
||||
path=request.url.path,
|
||||
)
|
||||
|
||||
response: StarletteResponse = await call_next(request)
|
||||
|
||||
# Add correlation ID to response header so frontend can correlate errors
|
||||
response.headers[_CORRELATION_ID_HEADER] = correlation_id
|
||||
|
||||
return response
|
||||
@@ -214,10 +214,14 @@ class ErrorResponse(BanGuiBaseModel):
|
||||
The error code enables machine-readable branching, while detail provides
|
||||
human-readable context. Metadata offers optional structured context.
|
||||
|
||||
The correlation_id field enables tracing this error back through logs on both
|
||||
frontend and backend, enabling correlation across distributed systems.
|
||||
|
||||
Fields:
|
||||
code: Machine-readable error code (e.g., "jail_not_found", "invalid_input").
|
||||
detail: Human-readable error description for display to users.
|
||||
metadata: Optional structured context (e.g., field names, constraint violations).
|
||||
correlation_id: Unique ID for correlating this error with request logs.
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -225,21 +229,24 @@ class ErrorResponse(BanGuiBaseModel):
|
||||
{
|
||||
"code": "jail_not_found",
|
||||
"detail": "Jail 'sshd' not found",
|
||||
"metadata": {"jail_name": "sshd"}
|
||||
"metadata": {"jail_name": "sshd"},
|
||||
"correlation_id": "550e8400-e29b-41d4-a716-446655440000"
|
||||
}
|
||||
|
||||
# 400 Bad Request - Validation Error
|
||||
{
|
||||
"code": "invalid_input",
|
||||
"detail": "Invalid IP address format",
|
||||
"metadata": {"field": "ip", "value": "999.999.999.999"}
|
||||
"metadata": {"field": "ip", "value": "999.999.999.999"},
|
||||
"correlation_id": "550e8400-e29b-41d4-a716-446655440000"
|
||||
}
|
||||
|
||||
# 409 Conflict
|
||||
{
|
||||
"code": "jail_already_active",
|
||||
"detail": "Jail is already active: 'sshd'",
|
||||
"metadata": {"jail_name": "sshd", "current_status": "active"}
|
||||
"metadata": {"jail_name": "sshd", "current_status": "active"},
|
||||
"correlation_id": "550e8400-e29b-41d4-a716-446655440000"
|
||||
}
|
||||
```
|
||||
"""
|
||||
@@ -250,3 +257,7 @@ class ErrorResponse(BanGuiBaseModel):
|
||||
default_factory=dict,
|
||||
description="Optional structured context for the error.",
|
||||
)
|
||||
correlation_id: str | None = Field(
|
||||
default=None,
|
||||
description="Unique ID for correlating this error with request logs on both frontend and backend.",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user