feat: Stage 4 — fail2ban connection and server status
This commit is contained in:
@@ -33,7 +33,8 @@ from starlette.middleware.base import BaseHTTPMiddleware
|
||||
|
||||
from app.config import Settings, get_settings
|
||||
from app.db import init_db
|
||||
from app.routers import auth, health, setup
|
||||
from app.routers import auth, dashboard, health, setup
|
||||
from app.tasks import health_check
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ensure the bundled fail2ban package is importable from fail2ban-master/
|
||||
@@ -114,6 +115,9 @@ async def _lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
||||
scheduler.start()
|
||||
app.state.scheduler = scheduler
|
||||
|
||||
# --- Health-check background probe ---
|
||||
health_check.register(app)
|
||||
|
||||
log.info("bangui_started")
|
||||
|
||||
try:
|
||||
@@ -268,5 +272,6 @@ def create_app(settings: Settings | None = None) -> FastAPI:
|
||||
app.include_router(health.router)
|
||||
app.include_router(setup.router)
|
||||
app.include_router(auth.router)
|
||||
app.include_router(dashboard.router)
|
||||
|
||||
return app
|
||||
|
||||
46
backend/app/routers/dashboard.py
Normal file
46
backend/app/routers/dashboard.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""Dashboard router.
|
||||
|
||||
Provides the ``GET /api/dashboard/status`` endpoint that returns the cached
|
||||
fail2ban server health snapshot. The snapshot is maintained by the
|
||||
background health-check task and refreshed every 30 seconds.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
|
||||
from app.dependencies import AuthDep
|
||||
from app.models.server import ServerStatus, ServerStatusResponse
|
||||
|
||||
router: APIRouter = APIRouter(prefix="/api/dashboard", tags=["Dashboard"])
|
||||
|
||||
|
||||
@router.get(
|
||||
"/status",
|
||||
response_model=ServerStatusResponse,
|
||||
summary="Return the cached fail2ban server status",
|
||||
)
|
||||
async def get_server_status(
|
||||
request: Request,
|
||||
_auth: AuthDep,
|
||||
) -> ServerStatusResponse:
|
||||
"""Return the most recent fail2ban health snapshot.
|
||||
|
||||
The snapshot is populated by a background task that runs every 30 seconds.
|
||||
If the task has not yet executed a placeholder ``online=False`` status is
|
||||
returned so the response is always well-formed.
|
||||
|
||||
Args:
|
||||
request: The incoming request (used to access ``app.state``).
|
||||
_auth: Validated session — enforces authentication on this endpoint.
|
||||
|
||||
Returns:
|
||||
:class:`~app.models.server.ServerStatusResponse` containing the
|
||||
current health snapshot.
|
||||
"""
|
||||
cached: ServerStatus = getattr(
|
||||
request.app.state,
|
||||
"server_status",
|
||||
ServerStatus(online=False),
|
||||
)
|
||||
return ServerStatusResponse(status=cached)
|
||||
171
backend/app/services/health_service.py
Normal file
171
backend/app/services/health_service.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""Health service.
|
||||
|
||||
Probes the fail2ban socket to determine whether the daemon is reachable and
|
||||
collects aggregated server statistics (version, jail count, ban counts).
|
||||
|
||||
The probe is intentionally lightweight — it is meant to be called every 30
|
||||
seconds by the background health-check task, not on every HTTP request.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
|
||||
from app.models.server import ServerStatus
|
||||
from app.utils.fail2ban_client import Fail2BanClient, Fail2BanConnectionError, Fail2BanProtocolError
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_SOCKET_TIMEOUT: float = 5.0
|
||||
|
||||
|
||||
def _ok(response: Any) -> Any:
|
||||
"""Extract the payload from a fail2ban ``(return_code, data)`` response.
|
||||
|
||||
fail2ban wraps every response in a ``(0, data)`` success tuple or
|
||||
a ``(1, exception)`` error tuple. This helper returns ``data`` for
|
||||
successful responses or raises :class:`ValueError` for error responses.
|
||||
|
||||
Args:
|
||||
response: Raw value returned by :meth:`~Fail2BanClient.send`.
|
||||
|
||||
Returns:
|
||||
The payload ``data`` portion of the response.
|
||||
|
||||
Raises:
|
||||
ValueError: If the response indicates an error (return code ≠ 0).
|
||||
"""
|
||||
try:
|
||||
code, data = response
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise ValueError(f"Unexpected fail2ban response shape: {response!r}") from exc
|
||||
|
||||
if code != 0:
|
||||
raise ValueError(f"fail2ban returned error code {code}: {data!r}")
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _to_dict(pairs: Any) -> dict[str, Any]:
|
||||
"""Convert a list of ``(key, value)`` pairs to a plain dict.
|
||||
|
||||
fail2ban returns structured data as lists of 2-tuples rather than dicts.
|
||||
This helper converts them safely, ignoring non-pair items.
|
||||
|
||||
Args:
|
||||
pairs: A list of ``(key, value)`` pairs (or any iterable thereof).
|
||||
|
||||
Returns:
|
||||
A :class:`dict` with the keys and values from *pairs*.
|
||||
"""
|
||||
if not isinstance(pairs, (list, tuple)):
|
||||
return {}
|
||||
result: dict[str, Any] = {}
|
||||
for item in pairs:
|
||||
try:
|
||||
k, v = item
|
||||
result[str(k)] = v
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public interface
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def probe(socket_path: str, timeout: float = _SOCKET_TIMEOUT) -> ServerStatus:
|
||||
"""Probe the fail2ban daemon and return a :class:`~app.models.server.ServerStatus`.
|
||||
|
||||
Sends ``ping``, ``version``, ``status``, and per-jail ``status <jail>``
|
||||
commands. Any socket or protocol error is caught and results in an
|
||||
``online=False`` status so the dashboard can always return a safe default.
|
||||
|
||||
Args:
|
||||
socket_path: Path to the fail2ban Unix domain socket.
|
||||
timeout: Per-command socket timeout in seconds.
|
||||
|
||||
Returns:
|
||||
A :class:`~app.models.server.ServerStatus` snapshot. ``online`` is
|
||||
``True`` when the daemon is reachable, ``False`` otherwise.
|
||||
"""
|
||||
client = Fail2BanClient(socket_path=socket_path, timeout=timeout)
|
||||
|
||||
try:
|
||||
# ------------------------------------------------------------------ #
|
||||
# 1. Connectivity check #
|
||||
# ------------------------------------------------------------------ #
|
||||
ping_data = _ok(await client.send(["ping"]))
|
||||
if ping_data != "pong":
|
||||
log.warning("fail2ban_unexpected_ping_response", response=ping_data)
|
||||
return ServerStatus(online=False)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 2. Version #
|
||||
# ------------------------------------------------------------------ #
|
||||
try:
|
||||
version: str | None = str(_ok(await client.send(["version"])))
|
||||
except (ValueError, TypeError):
|
||||
version = None
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 3. Global status — jail count and names #
|
||||
# ------------------------------------------------------------------ #
|
||||
status_data = _to_dict(_ok(await client.send(["status"])))
|
||||
active_jails: int = int(status_data.get("Number of jail", 0) or 0)
|
||||
jail_list_raw: str = str(status_data.get("Jail list", "") or "").strip()
|
||||
jail_names: list[str] = (
|
||||
[j.strip() for j in jail_list_raw.split(",") if j.strip()]
|
||||
if jail_list_raw
|
||||
else []
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# 4. Per-jail aggregation #
|
||||
# ------------------------------------------------------------------ #
|
||||
total_bans: int = 0
|
||||
total_failures: int = 0
|
||||
|
||||
for jail_name in jail_names:
|
||||
try:
|
||||
jail_resp = _to_dict(_ok(await client.send(["status", jail_name])))
|
||||
filter_stats = _to_dict(jail_resp.get("Filter") or [])
|
||||
action_stats = _to_dict(jail_resp.get("Actions") or [])
|
||||
total_failures += int(filter_stats.get("Currently failed", 0) or 0)
|
||||
total_bans += int(action_stats.get("Currently banned", 0) or 0)
|
||||
except (ValueError, TypeError, KeyError) as exc:
|
||||
log.warning(
|
||||
"fail2ban_jail_status_parse_error",
|
||||
jail=jail_name,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
log.debug(
|
||||
"fail2ban_probe_ok",
|
||||
version=version,
|
||||
active_jails=active_jails,
|
||||
total_bans=total_bans,
|
||||
total_failures=total_failures,
|
||||
)
|
||||
|
||||
return ServerStatus(
|
||||
online=True,
|
||||
version=version,
|
||||
active_jails=active_jails,
|
||||
total_bans=total_bans,
|
||||
total_failures=total_failures,
|
||||
)
|
||||
|
||||
except (Fail2BanConnectionError, Fail2BanProtocolError) as exc:
|
||||
log.warning("fail2ban_probe_failed", error=str(exc))
|
||||
return ServerStatus(online=False)
|
||||
except ValueError as exc:
|
||||
log.error("fail2ban_probe_parse_error", error=str(exc))
|
||||
return ServerStatus(online=False)
|
||||
79
backend/app/tasks/health_check.py
Normal file
79
backend/app/tasks/health_check.py
Normal file
@@ -0,0 +1,79 @@
|
||||
"""Health-check background task.
|
||||
|
||||
Registers an APScheduler job that probes the fail2ban socket every 30 seconds
|
||||
and stores the result on ``app.state.server_status``. The dashboard endpoint
|
||||
reads from this cache, keeping HTTP responses fast and the daemon connection
|
||||
decoupled from user-facing requests.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import structlog
|
||||
|
||||
from app.models.server import ServerStatus
|
||||
from app.services import health_service
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from fastapi import FastAPI
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
#: How often the probe fires (seconds).
|
||||
HEALTH_CHECK_INTERVAL: int = 30
|
||||
|
||||
|
||||
async def _run_probe(app: Any) -> None:
|
||||
"""Probe fail2ban and cache the result on *app.state*.
|
||||
|
||||
This is the APScheduler job callback. It reads ``fail2ban_socket`` from
|
||||
``app.state.settings``, runs the health probe, and writes the result to
|
||||
``app.state.server_status``.
|
||||
|
||||
Args:
|
||||
app: The :class:`fastapi.FastAPI` application instance passed by the
|
||||
scheduler via the ``kwargs`` mechanism.
|
||||
"""
|
||||
socket_path: str = app.state.settings.fail2ban_socket
|
||||
status: ServerStatus = await health_service.probe(socket_path)
|
||||
app.state.server_status = status
|
||||
log.debug(
|
||||
"health_check_complete",
|
||||
online=status.online,
|
||||
version=status.version,
|
||||
active_jails=status.active_jails,
|
||||
)
|
||||
|
||||
|
||||
def register(app: FastAPI) -> None:
|
||||
"""Add the health-check job to the application scheduler.
|
||||
|
||||
Must be called after the scheduler has been started (i.e., inside the
|
||||
lifespan handler, after ``scheduler.start()``).
|
||||
|
||||
Args:
|
||||
app: The :class:`fastapi.FastAPI` application instance whose
|
||||
``app.state.scheduler`` will receive the job.
|
||||
"""
|
||||
# Initialise the cache with an offline placeholder so the dashboard
|
||||
# endpoint is always able to return a valid response even before the
|
||||
# first probe fires.
|
||||
app.state.server_status = ServerStatus(online=False)
|
||||
|
||||
app.state.scheduler.add_job(
|
||||
_run_probe,
|
||||
trigger="interval",
|
||||
seconds=HEALTH_CHECK_INTERVAL,
|
||||
kwargs={"app": app},
|
||||
id="health_check",
|
||||
replace_existing=True,
|
||||
# Fire immediately on startup too, so the UI isn't dark for 30 s.
|
||||
next_run_time=__import__("datetime").datetime.now(
|
||||
tz=__import__("datetime").timezone.utc
|
||||
),
|
||||
)
|
||||
log.info(
|
||||
"health_check_scheduled",
|
||||
interval_seconds=HEALTH_CHECK_INTERVAL,
|
||||
)
|
||||
Reference in New Issue
Block a user