- Remove structlog dependency from backend/pyproject.toml - Add app.utils.logging_compat shim for keyword-arg logging API - Add app.utils.json_formatter for JSON log output with extra fields - Update all backend modules to use logging_compat.get_logger() - Update docstrings in log_sanitizer.py and json_formatter.py - Update test comment in test_async_utils.py - Record 406 failing tests in Docs/Tasks.md for tracking
232 lines
7.6 KiB
Python
232 lines
7.6 KiB
Python
"""Health service.
|
|
|
|
Probes the fail2ban socket to determine whether the daemon is reachable and
|
|
collects aggregated server statistics (version, jail count, ban counts).
|
|
|
|
The probe is intentionally lightweight — it is meant to be called every 30
|
|
seconds by the background health-check task, not on every HTTP request.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from collections.abc import Awaitable, Callable
|
|
from typing import TypeVar, cast
|
|
|
|
from app.utils.logging_compat import get_logger
|
|
|
|
from app import __version__
|
|
from app.models.config_domain import DomainServiceStatus
|
|
from app.models.server import ServerStatus
|
|
from app.utils.constants import FAIL2BAN_SOCKET_TIMEOUT_FAST
|
|
from app.utils.fail2ban_client import (
|
|
Fail2BanClient,
|
|
Fail2BanCommand,
|
|
Fail2BanConnectionError,
|
|
Fail2BanProtocolError,
|
|
)
|
|
from app.utils.fail2ban_response import (
|
|
ok,
|
|
to_dict,
|
|
)
|
|
|
|
log = get_logger(__name__)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Internal helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
async def _safe_get(
|
|
client: Fail2BanClient,
|
|
command: Fail2BanCommand,
|
|
default: object | None = None,
|
|
) -> object | None:
|
|
"""Send a command and return *default* if it fails."""
|
|
try:
|
|
return ok(await client.send(command))
|
|
except (
|
|
Fail2BanConnectionError,
|
|
Fail2BanProtocolError,
|
|
ValueError,
|
|
OSError,
|
|
):
|
|
return default
|
|
|
|
|
|
async def _safe_get_typed(
|
|
client: Fail2BanClient,
|
|
command: Fail2BanCommand,
|
|
default: T,
|
|
) -> T:
|
|
"""Send a command and return the result typed as ``default``'s type."""
|
|
return cast("T", await _safe_get(client, command, default))
|
|
|
|
|
|
async def get_service_status(
|
|
socket_path: str,
|
|
probe_fn: Callable[[str], Awaitable[ServerStatus]] | None = None,
|
|
) -> DomainServiceStatus:
|
|
"""Return fail2ban service health status with log configuration.
|
|
|
|
Delegates to an injectable *probe_fn* (defaults to
|
|
:func:`~app.services.health_service.probe`).
|
|
|
|
Args:
|
|
socket_path: Path to the fail2ban Unix domain socket.
|
|
probe_fn: Optional probe function.
|
|
|
|
Returns:
|
|
:class:`~app.models.config_domain.DomainServiceStatus`.
|
|
"""
|
|
if probe_fn is None:
|
|
raise ValueError(
|
|
"probe_fn is required to avoid service-to-service coupling"
|
|
)
|
|
|
|
server_status = await probe_fn(socket_path)
|
|
|
|
if server_status.online:
|
|
client = Fail2BanClient(
|
|
socket_path=socket_path,
|
|
timeout=FAIL2BAN_SOCKET_TIMEOUT_FAST,
|
|
)
|
|
log_level_raw, log_target_raw = await asyncio.gather(
|
|
_safe_get_typed(client, ["get", "loglevel"], "INFO"),
|
|
_safe_get_typed(client, ["get", "logtarget"], "STDOUT"),
|
|
)
|
|
log_level = str(log_level_raw or "INFO").upper()
|
|
log_target = str(log_target_raw or "STDOUT")
|
|
else:
|
|
log_level = "UNKNOWN"
|
|
log_target = "UNKNOWN"
|
|
|
|
log.info(
|
|
"service_status_fetched",
|
|
online=server_status.online,
|
|
jail_count=server_status.active_jails,
|
|
)
|
|
|
|
return DomainServiceStatus(
|
|
online=server_status.online,
|
|
version=__version__,
|
|
jail_count=server_status.active_jails,
|
|
total_bans=server_status.total_bans,
|
|
total_failures=server_status.total_failures,
|
|
log_level=log_level,
|
|
log_target=log_target,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public interface
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
async def probe(
|
|
socket_path: str,
|
|
timeout: float = FAIL2BAN_SOCKET_TIMEOUT_FAST,
|
|
) -> ServerStatus:
|
|
"""Probe the fail2ban daemon and return a
|
|
:class:`~app.models.server.ServerStatus`.
|
|
|
|
Sends ``ping``, ``version``, ``status``, and per-jail ``status <jail>``
|
|
commands. Any socket or protocol error is caught and results in an
|
|
``online=False`` status so the dashboard can always return a safe default.
|
|
|
|
Args:
|
|
socket_path: Path to the fail2ban Unix domain socket.
|
|
timeout: Per-command socket timeout in seconds.
|
|
|
|
Returns:
|
|
A :class:`~app.models.server.ServerStatus` snapshot. ``online`` is
|
|
``True`` when the daemon is reachable, ``False`` otherwise.
|
|
"""
|
|
client = Fail2BanClient(socket_path=socket_path, timeout=timeout)
|
|
|
|
try:
|
|
# ------------------------------------------------------------------ #
|
|
# 1. Connectivity check #
|
|
# ------------------------------------------------------------------ #
|
|
ping_data = ok(await client.send(["ping"]))
|
|
if ping_data != "pong":
|
|
log.warning(
|
|
"fail2ban_unexpected_ping_response",
|
|
response=ping_data,
|
|
)
|
|
return ServerStatus(online=False)
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# 2. Version
|
|
# ------------------------------------------------------------------ #
|
|
try:
|
|
version: str | None = str(ok(await client.send(["version"])))
|
|
except (ValueError, TypeError):
|
|
version = None
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# 3. Global status — jail count and names #
|
|
# ------------------------------------------------------------------ #
|
|
status_data = to_dict(ok(await client.send(["status"])))
|
|
active_jails: int = int(str(status_data.get("Number of jail", 0) or 0))
|
|
jail_list_raw: str = str(
|
|
status_data.get("Jail list", "") or ""
|
|
).strip()
|
|
jail_names: list[str] = (
|
|
[j.strip() for j in jail_list_raw.split(",") if j.strip()]
|
|
if jail_list_raw
|
|
else []
|
|
)
|
|
|
|
# ------------------------------------------------------------------ #
|
|
# 4. Per-jail aggregation #
|
|
# ------------------------------------------------------------------ #
|
|
total_bans: int = 0
|
|
total_failures: int = 0
|
|
|
|
for jail_name in jail_names:
|
|
try:
|
|
jail_resp = to_dict(
|
|
ok(await client.send(["status", jail_name]))
|
|
)
|
|
filter_stats = to_dict(jail_resp.get("Filter") or [])
|
|
action_stats = to_dict(jail_resp.get("Actions") or [])
|
|
total_failures += int(
|
|
str(filter_stats.get("Currently failed", 0) or 0)
|
|
)
|
|
total_bans += int(
|
|
str(action_stats.get("Currently banned", 0) or 0)
|
|
)
|
|
except (ValueError, TypeError, KeyError) as exc:
|
|
log.warning(
|
|
"fail2ban_jail_status_parse_error",
|
|
jail=jail_name,
|
|
error=str(exc),
|
|
)
|
|
|
|
log.debug(
|
|
"fail2ban_probe_ok",
|
|
version=version,
|
|
active_jails=active_jails,
|
|
total_bans=total_bans,
|
|
total_failures=total_failures,
|
|
)
|
|
|
|
return ServerStatus(
|
|
online=True,
|
|
version=version,
|
|
active_jails=active_jails,
|
|
total_bans=total_bans,
|
|
total_failures=total_failures,
|
|
)
|
|
|
|
except (Fail2BanConnectionError, Fail2BanProtocolError) as exc:
|
|
log.warning("fail2ban_probe_failed", error=str(exc))
|
|
return ServerStatus(online=False)
|
|
except ValueError as exc:
|
|
log.error("fail2ban_probe_parse_error", error=str(exc))
|
|
return ServerStatus(online=False)
|