- Add /api/v1/health endpoint with component-level checks - Verify DB connectivity, fail2ban socket, scheduler, session cache - Add SQLite WAL cleanup on startup (orphan crash files) - Migration 8: import_log.timestamp → INTEGER UNIX epoch - Align import_log timestamps with history_archive (already UNIX int) - Add unit tests for DB cleanup and health router Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
136 lines
4.6 KiB
Python
136 lines
4.6 KiB
Python
"""Health check router.
|
|
|
|
A lightweight ``GET /api/v1/health`` endpoint that verifies the application
|
|
is running and can serve requests. Also reports the cached fail2ban liveness
|
|
state so monitoring tools and Docker health checks can observe daemon status
|
|
without probing the socket directly.
|
|
|
|
Comprehensive checks performed:
|
|
- Database connectivity
|
|
- fail2ban socket reachability (via cached server_status)
|
|
- Background scheduler health
|
|
- Session cache initialization
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Annotated, Literal
|
|
|
|
import structlog
|
|
from fastapi import APIRouter, status
|
|
from fastapi.responses import JSONResponse
|
|
|
|
from app.dependencies import AppStateDep, ServerStatusDep
|
|
from app.models.response import ComponentHealth, HealthResponse
|
|
|
|
router: APIRouter = APIRouter(prefix="/api/v1/health", tags=["Health"])
|
|
|
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
|
|
|
|
|
@router.get("", summary="Application health check", response_model=HealthResponse)
|
|
async def health_check(
|
|
app_state: AppStateDep,
|
|
server_status: ServerStatusDep,
|
|
) -> JSONResponse:
|
|
"""Return application and component status.
|
|
|
|
Performs lightweight checks on key application components and returns
|
|
HTTP 200 if all healthy, HTTP 503 if fail2ban is offline.
|
|
|
|
Docker/orchestration health checks interpret 503 as unhealthy and restart
|
|
the container if fail2ban remains unreachable.
|
|
|
|
Args:
|
|
app_state: Injected application state containing runtime components.
|
|
server_status: Injected cached server status snapshot.
|
|
|
|
Returns:
|
|
HTTP 200 with :class:`~app.models.response.HealthResponse` when healthy,
|
|
HTTP 503 with :class:`~app.models.response.HealthResponse` when fail2ban
|
|
is offline.
|
|
"""
|
|
components: list[ComponentHealth] = []
|
|
|
|
# --- Database check ---
|
|
db_healthy: bool = True
|
|
try:
|
|
|
|
from app.config import Settings
|
|
from app.db import open_db
|
|
|
|
effective_settings: Settings = (
|
|
app_state.runtime_settings if app_state.runtime_settings is not None else app_state.settings
|
|
)
|
|
test_db = await open_db(effective_settings.database_path)
|
|
await test_db.close()
|
|
except Exception as exc: # pragma: no cover - defensive, all paths logged
|
|
log.warning("health_check_db_failed", error=str(exc))
|
|
db_healthy = False
|
|
components.append(
|
|
ComponentHealth(name="database", healthy=False, message="Connection failed"),
|
|
)
|
|
|
|
# --- Scheduler check ---
|
|
scheduler_state: Literal["running", "stopped", "unknown"] = "unknown"
|
|
try:
|
|
scheduler = app_state.scheduler
|
|
if scheduler is not None and getattr(scheduler, "running", False):
|
|
scheduler_state = "running"
|
|
elif scheduler is not None:
|
|
scheduler_state = "stopped"
|
|
else:
|
|
scheduler_state = "unknown"
|
|
components.append(
|
|
ComponentHealth(name="scheduler", healthy=False, message="Not initialised"),
|
|
)
|
|
except Exception: # pragma: no cover - defensive
|
|
scheduler_state = "unknown"
|
|
components.append(
|
|
ComponentHealth(name="scheduler", healthy=False, message="Not accessible"),
|
|
)
|
|
|
|
# --- Cache check ---
|
|
cache_state: Literal["initialised", "uninitialised"] = "initialised"
|
|
try:
|
|
if app_state.session_cache is not None:
|
|
cache_state = "initialised"
|
|
else:
|
|
cache_state = "uninitialised"
|
|
components.append(
|
|
ComponentHealth(name="cache", healthy=False, message="Not initialised"),
|
|
)
|
|
except Exception: # pragma: no cover - defensive
|
|
cache_state = "uninitialised"
|
|
|
|
# --- fail2ban ---
|
|
fail2ban_online: bool = server_status.online
|
|
if not fail2ban_online:
|
|
components.append(
|
|
ComponentHealth(name="fail2ban", healthy=False, message="Socket not reachable"),
|
|
)
|
|
|
|
# --- Overall status ---
|
|
overall_status: Literal["ok", "degraded", "unavailable"]
|
|
if not fail2ban_online:
|
|
overall_status = "unavailable"
|
|
http_status: int = status.HTTP_503_SERVICE_UNAVAILABLE
|
|
elif components:
|
|
overall_status = "degraded"
|
|
http_status = status.HTTP_200_OK
|
|
else:
|
|
overall_status = "ok"
|
|
http_status = status.HTTP_200_OK
|
|
|
|
return JSONResponse(
|
|
status_code=http_status,
|
|
content=HealthResponse(
|
|
status=overall_status,
|
|
fail2ban="online" if fail2ban_online else "offline",
|
|
database="ok" if db_healthy else "error",
|
|
scheduler=scheduler_state,
|
|
cache=cache_state,
|
|
components=components,
|
|
).model_dump(),
|
|
)
|