refactor(backend): external logging metrics, required mode, health checks

- Add external_logging_init_failures counter
- Add external_log_required flag, raise if init fails and required
- Health endpoint: add external_logging status check
- Blocklist service: enrich with metadata fields, update import logic
- Health check task: add runtime_state dependency, fix return typing
- Metrics: add Histogram for request latencies
- Frontend: align BlocklistImportLogSection props
- Docs: update deployment guide, remove stale tasks

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-04 03:45:13 +02:00
parent 42e177e6ea
commit 0a3f9c6c16
15 changed files with 172 additions and 131 deletions

View File

@@ -20,8 +20,10 @@ so that task logs can be correlated across runs.
from __future__ import annotations
import asyncio
import datetime
import uuid
from contextvars import copy_context
from typing import TYPE_CHECKING
import structlog
@@ -69,20 +71,24 @@ async def _run_probe_with_resources(
token = set_correlation_id(correlation_id)
try:
await _do_probe_with_resources(settings, runtime_state)
# Use copy_context() so ContextVar values (e.g. correlation_id)
# propagate to any child asyncio tasks spawned inside the coroutine.
probe_task = asyncio.create_task(
_do_probe_with_resources(settings, runtime_state),
context=copy_context(),
)
await run_with_timeout("health_check", probe_task, HEALTH_PROBE_TIMEOUT_SECONDS)
finally:
# Reset AFTER run_with_timeout completes, so child tasks still
# have the correlation ID in their context while they log.
reset_correlation_id(token)
async def _do_probe_with_resources(settings: Settings, runtime_state: RuntimeState) -> None:
"""Inner probe logic that runs with correlation context set."""
async def _do_probe() -> None:
socket_path: str = settings.fail2ban_socket
status: ServerStatus = await health_service.probe(socket_path)
process_health_probe_result(runtime_state, status)
await run_with_timeout("health_check", _do_probe(), HEALTH_PROBE_TIMEOUT_SECONDS)
socket_path: str = settings.fail2ban_socket
status: ServerStatus = await health_service.probe(socket_path)
process_health_probe_result(runtime_state, status)
async def _run_probe(app: FastAPI) -> None: