172 lines
6.3 KiB
Python
172 lines
6.3 KiB
Python
"""Health service.
|
|
|
|
Probes the fail2ban socket to determine whether the daemon is reachable and
|
|
collects aggregated server statistics (version, jail count, ban counts).
|
|
|
|
The probe is intentionally lightweight — it is meant to be called every 30
|
|
seconds by the background health-check task, not on every HTTP request.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
import structlog
|
|
|
|
from app.models.server import ServerStatus
|
|
from app.utils.fail2ban_client import Fail2BanClient, Fail2BanConnectionError, Fail2BanProtocolError
|
|
|
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_SOCKET_TIMEOUT: float = 5.0
|
|
|
|
|
|
def _ok(response: Any) -> Any:
|
|
"""Extract the payload from a fail2ban ``(return_code, data)`` response.
|
|
|
|
fail2ban wraps every response in a ``(0, data)`` success tuple or
|
|
a ``(1, exception)`` error tuple. This helper returns ``data`` for
|
|
successful responses or raises :class:`ValueError` for error responses.
|
|
|
|
Args:
|
|
response: Raw value returned by :meth:`~Fail2BanClient.send`.
|
|
|
|
Returns:
|
|
The payload ``data`` portion of the response.
|
|
|
|
Raises:
|
|
ValueError: If the response indicates an error (return code ≠ 0).
|
|
"""
|
|
try:
|
|
code, data = response
|
|
except (TypeError, ValueError) as exc:
|
|
raise ValueError(f"Unexpected fail2ban response shape: {response!r}") from exc
|
|
|
|
if code != 0:
|
|
raise ValueError(f"fail2ban returned error code {code}: {data!r}")
|
|
|
|
return data
|
|
|
|
|
|
def _to_dict(pairs: Any) -> dict[str, Any]:
|
|
"""Convert a list of ``(key, value)`` pairs to a plain dict.
|
|
|
|
fail2ban returns structured data as lists of 2-tuples rather than dicts.
|
|
This helper converts them safely, ignoring non-pair items.
|
|
|
|
Args:
|
|
pairs: A list of ``(key, value)`` pairs (or any iterable thereof).
|
|
|
|
Returns:
|
|
A :class:`dict` with the keys and values from *pairs*.
|
|
"""
|
|
if not isinstance(pairs, (list, tuple)):
|
|
return {}
|
|
result: dict[str, Any] = {}
|
|
for item in pairs:
|
|
try:
|
|
k, v = item
|
|
result[str(k)] = v
|
|
except (TypeError, ValueError):
|
|
pass
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public interface
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def probe(socket_path: str, timeout: float = _SOCKET_TIMEOUT) -> ServerStatus:
|
|
"""Probe the fail2ban daemon and return a :class:`~app.models.server.ServerStatus`.
|
|
|
|
Sends ``ping``, ``version``, ``status``, and per-jail ``status <jail>``
|
|
commands. Any socket or protocol error is caught and results in an
|
|
``online=False`` status so the dashboard can always return a safe default.
|
|
|
|
Args:
|
|
socket_path: Path to the fail2ban Unix domain socket.
|
|
timeout: Per-command socket timeout in seconds.
|
|
|
|
Returns:
|
|
A :class:`~app.models.server.ServerStatus` snapshot. ``online`` is
|
|
``True`` when the daemon is reachable, ``False`` otherwise.
|
|
"""
|
|
client = Fail2BanClient(socket_path=socket_path, timeout=timeout)
|
|
|
|
try:
|
|
# ------------------------------------------------------------------ #
|
|
# 1. Connectivity check #
|
|
# ------------------------------------------------------------------ #
|
|
ping_data = _ok(await client.send(["ping"]))
|
|
if ping_data != "pong":
|
|
log.warning("fail2ban_unexpected_ping_response", response=ping_data)
|
|
return ServerStatus(online=False)
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# 2. Version #
|
|
# ------------------------------------------------------------------ #
|
|
try:
|
|
version: str | None = str(_ok(await client.send(["version"])))
|
|
except (ValueError, TypeError):
|
|
version = None
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# 3. Global status — jail count and names #
|
|
# ------------------------------------------------------------------ #
|
|
status_data = _to_dict(_ok(await client.send(["status"])))
|
|
active_jails: int = int(status_data.get("Number of jail", 0) or 0)
|
|
jail_list_raw: str = str(status_data.get("Jail list", "") or "").strip()
|
|
jail_names: list[str] = (
|
|
[j.strip() for j in jail_list_raw.split(",") if j.strip()]
|
|
if jail_list_raw
|
|
else []
|
|
)
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# 4. Per-jail aggregation #
|
|
# ------------------------------------------------------------------ #
|
|
total_bans: int = 0
|
|
total_failures: int = 0
|
|
|
|
for jail_name in jail_names:
|
|
try:
|
|
jail_resp = _to_dict(_ok(await client.send(["status", jail_name])))
|
|
filter_stats = _to_dict(jail_resp.get("Filter") or [])
|
|
action_stats = _to_dict(jail_resp.get("Actions") or [])
|
|
total_failures += int(filter_stats.get("Currently failed", 0) or 0)
|
|
total_bans += int(action_stats.get("Currently banned", 0) or 0)
|
|
except (ValueError, TypeError, KeyError) as exc:
|
|
log.warning(
|
|
"fail2ban_jail_status_parse_error",
|
|
jail=jail_name,
|
|
error=str(exc),
|
|
)
|
|
|
|
log.debug(
|
|
"fail2ban_probe_ok",
|
|
version=version,
|
|
active_jails=active_jails,
|
|
total_bans=total_bans,
|
|
total_failures=total_failures,
|
|
)
|
|
|
|
return ServerStatus(
|
|
online=True,
|
|
version=version,
|
|
active_jails=active_jails,
|
|
total_bans=total_bans,
|
|
total_failures=total_failures,
|
|
)
|
|
|
|
except (Fail2BanConnectionError, Fail2BanProtocolError) as exc:
|
|
log.warning("fail2ban_probe_failed", error=str(exc))
|
|
return ServerStatus(online=False)
|
|
except ValueError as exc:
|
|
log.error("fail2ban_probe_parse_error", error=str(exc))
|
|
return ServerStatus(online=False)
|