Implement frontend and backend observability alignment

Align frontend and backend error observability with correlation IDs and
structured telemetry for distributed tracing across systems.

Backend changes:
- Add CorrelationIdMiddleware to generate/extract correlation IDs
- Include correlation_id in all ErrorResponse objects
- Store correlation ID in structlog contextvars for automatic inclusion in logs
- Add correlation ID to response headers (X-Correlation-ID)

Frontend changes:
- API client automatically generates session-scoped UUID4 and includes
  X-Correlation-ID header in all requests
- Extract correlation ID from API error responses
- Update error handlers to use telemetry with correlation IDs
- Add telemetry logging to ErrorBoundary, PageErrorBoundary, SectionErrorBoundary
- Implement redaction utilities for privacy-safe logging of sensitive data

Documentation:
- Add observability guidelines to Web-Development.md
  * Correlation ID usage patterns
  * Privacy & security best practices
  * Telemetry event structure
  * Redaction utilities for sensitive data
- Add distributed tracing architecture section to Architecture.md
  * Correlation ID flow across frontend/backend
  * Example troubleshooting scenario
  * Implementation details for future enhancements

Testing:
- Add comprehensive tests for correlation middleware
- Update error boundary tests to verify telemetry integration
- Verify TypeScript and ESLint pass with no warnings

Fixes: Issue #40 - Frontend and backend observability are not aligned

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-30 18:32:19 +02:00
parent 9a43123b3a
commit 3d1a6f5538
16 changed files with 916 additions and 54 deletions

View File

@@ -40,6 +40,7 @@ from app.exceptions import (
RateLimitError,
ServiceUnavailableError,
)
from app.middleware.correlation import CorrelationIdMiddleware
from app.middleware.csrf import CsrfMiddleware
from app.models.response import ErrorResponse
from app.routers import (
@@ -215,6 +216,20 @@ def _get_error_metadata(exc: Exception) -> dict[str, str | int | float | bool |
return {}
def _get_correlation_id(request: Request) -> str | None:
"""Extract correlation ID from request state if available.
The correlation ID is set by CorrelationIdMiddleware.
Args:
request: The incoming FastAPI request.
Returns:
The correlation ID string, or None if not present.
"""
return getattr(request.state, "correlation_id", None)
async def _unhandled_exception_handler(
request: Request,
exc: Exception,
@@ -241,6 +256,7 @@ async def _unhandled_exception_handler(
code="internal_error",
detail="An unexpected error occurred. Please try again later.",
metadata={},
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=500,
@@ -271,6 +287,7 @@ async def _fail2ban_connection_handler(
code="fail2ban_unreachable",
detail="Cannot reach the fail2ban service. Check the server status page.",
metadata={"socket_path": exc.socket_path},
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=502,
@@ -301,6 +318,7 @@ async def _fail2ban_protocol_handler(
code="fail2ban_protocol_error",
detail="Cannot reach the fail2ban service. Check the server status page.",
metadata={},
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=502,
@@ -331,6 +349,7 @@ async def _not_found_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_404_NOT_FOUND,
@@ -361,6 +380,7 @@ async def _bad_request_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
@@ -383,6 +403,7 @@ async def _conflict_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_409_CONFLICT,
@@ -406,6 +427,7 @@ async def _domain_error_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
@@ -436,6 +458,7 @@ async def _value_error_handler(
code="invalid_input",
detail=str(exc),
metadata={},
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_400_BAD_REQUEST,
@@ -466,6 +489,7 @@ async def _service_unavailable_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
@@ -496,6 +520,7 @@ async def _authentication_error_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_401_UNAUTHORIZED,
@@ -526,6 +551,7 @@ async def _rate_limit_error_handler(
code=_get_error_code(exc),
detail=str(exc),
metadata=_get_error_metadata(exc),
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
@@ -576,6 +602,7 @@ async def _http_exception_handler(
code=error_code,
detail=exc.detail,
metadata={},
correlation_id=_get_correlation_id(request),
)
return JSONResponse(
@@ -743,6 +770,9 @@ def create_app(settings: Settings | None = None) -> FastAPI:
# Note: middleware is applied in reverse order of registration.
# The setup-redirect must run *after* CSRF, so it is added last.
# CSRF middleware protects cookie-authenticated state-mutating requests.
# CorrelationIdMiddleware must run first (added last) so correlation ID
# is available to all downstream handlers and loggers.
app.add_middleware(CorrelationIdMiddleware)
app.add_middleware(SetupRedirectMiddleware)
app.add_middleware(CsrfMiddleware)