Files
BanGUI/backend/app/services/health_service.py
Lukas 7ec80fdeec refactor(logging): replace structlog with stdlib logging compat layer
- Remove structlog dependency from backend/pyproject.toml
- Add app.utils.logging_compat shim for keyword-arg logging API
- Add app.utils.json_formatter for JSON log output with extra fields
- Update all backend modules to use logging_compat.get_logger()
- Update docstrings in log_sanitizer.py and json_formatter.py
- Update test comment in test_async_utils.py
- Record 406 failing tests in Docs/Tasks.md for tracking
2026-05-10 13:37:54 +02:00

232 lines
7.6 KiB
Python

"""Health service.
Probes the fail2ban socket to determine whether the daemon is reachable and
collects aggregated server statistics (version, jail count, ban counts).
The probe is intentionally lightweight — it is meant to be called every 30
seconds by the background health-check task, not on every HTTP request.
"""
from __future__ import annotations
import asyncio
from collections.abc import Awaitable, Callable
from typing import TypeVar, cast
from app.utils.logging_compat import get_logger
from app import __version__
from app.models.config_domain import DomainServiceStatus
from app.models.server import ServerStatus
from app.utils.constants import FAIL2BAN_SOCKET_TIMEOUT_FAST
from app.utils.fail2ban_client import (
Fail2BanClient,
Fail2BanCommand,
Fail2BanConnectionError,
Fail2BanProtocolError,
)
from app.utils.fail2ban_response import (
ok,
to_dict,
)
log = get_logger(__name__)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
T = TypeVar("T")
async def _safe_get(
client: Fail2BanClient,
command: Fail2BanCommand,
default: object | None = None,
) -> object | None:
"""Send a command and return *default* if it fails."""
try:
return ok(await client.send(command))
except (
Fail2BanConnectionError,
Fail2BanProtocolError,
ValueError,
OSError,
):
return default
async def _safe_get_typed(
client: Fail2BanClient,
command: Fail2BanCommand,
default: T,
) -> T:
"""Send a command and return the result typed as ``default``'s type."""
return cast("T", await _safe_get(client, command, default))
async def get_service_status(
socket_path: str,
probe_fn: Callable[[str], Awaitable[ServerStatus]] | None = None,
) -> DomainServiceStatus:
"""Return fail2ban service health status with log configuration.
Delegates to an injectable *probe_fn* (defaults to
:func:`~app.services.health_service.probe`).
Args:
socket_path: Path to the fail2ban Unix domain socket.
probe_fn: Optional probe function.
Returns:
:class:`~app.models.config_domain.DomainServiceStatus`.
"""
if probe_fn is None:
raise ValueError(
"probe_fn is required to avoid service-to-service coupling"
)
server_status = await probe_fn(socket_path)
if server_status.online:
client = Fail2BanClient(
socket_path=socket_path,
timeout=FAIL2BAN_SOCKET_TIMEOUT_FAST,
)
log_level_raw, log_target_raw = await asyncio.gather(
_safe_get_typed(client, ["get", "loglevel"], "INFO"),
_safe_get_typed(client, ["get", "logtarget"], "STDOUT"),
)
log_level = str(log_level_raw or "INFO").upper()
log_target = str(log_target_raw or "STDOUT")
else:
log_level = "UNKNOWN"
log_target = "UNKNOWN"
log.info(
"service_status_fetched",
online=server_status.online,
jail_count=server_status.active_jails,
)
return DomainServiceStatus(
online=server_status.online,
version=__version__,
jail_count=server_status.active_jails,
total_bans=server_status.total_bans,
total_failures=server_status.total_failures,
log_level=log_level,
log_target=log_target,
)
# ---------------------------------------------------------------------------
# Public interface
# ---------------------------------------------------------------------------
async def probe(
socket_path: str,
timeout: float = FAIL2BAN_SOCKET_TIMEOUT_FAST,
) -> ServerStatus:
"""Probe the fail2ban daemon and return a
:class:`~app.models.server.ServerStatus`.
Sends ``ping``, ``version``, ``status``, and per-jail ``status <jail>``
commands. Any socket or protocol error is caught and results in an
``online=False`` status so the dashboard can always return a safe default.
Args:
socket_path: Path to the fail2ban Unix domain socket.
timeout: Per-command socket timeout in seconds.
Returns:
A :class:`~app.models.server.ServerStatus` snapshot. ``online`` is
``True`` when the daemon is reachable, ``False`` otherwise.
"""
client = Fail2BanClient(socket_path=socket_path, timeout=timeout)
try:
# ------------------------------------------------------------------ #
# 1. Connectivity check #
# ------------------------------------------------------------------ #
ping_data = ok(await client.send(["ping"]))
if ping_data != "pong":
log.warning(
"fail2ban_unexpected_ping_response",
response=ping_data,
)
return ServerStatus(online=False)
# ------------------------------------------------------------------ #
# 2. Version
# ------------------------------------------------------------------ #
try:
version: str | None = str(ok(await client.send(["version"])))
except (ValueError, TypeError):
version = None
# ------------------------------------------------------------------ #
# 3. Global status — jail count and names #
# ------------------------------------------------------------------ #
status_data = to_dict(ok(await client.send(["status"])))
active_jails: int = int(str(status_data.get("Number of jail", 0) or 0))
jail_list_raw: str = str(
status_data.get("Jail list", "") or ""
).strip()
jail_names: list[str] = (
[j.strip() for j in jail_list_raw.split(",") if j.strip()]
if jail_list_raw
else []
)
# ------------------------------------------------------------------ #
# 4. Per-jail aggregation #
# ------------------------------------------------------------------ #
total_bans: int = 0
total_failures: int = 0
for jail_name in jail_names:
try:
jail_resp = to_dict(
ok(await client.send(["status", jail_name]))
)
filter_stats = to_dict(jail_resp.get("Filter") or [])
action_stats = to_dict(jail_resp.get("Actions") or [])
total_failures += int(
str(filter_stats.get("Currently failed", 0) or 0)
)
total_bans += int(
str(action_stats.get("Currently banned", 0) or 0)
)
except (ValueError, TypeError, KeyError) as exc:
log.warning(
"fail2ban_jail_status_parse_error",
jail=jail_name,
error=str(exc),
)
log.debug(
"fail2ban_probe_ok",
version=version,
active_jails=active_jails,
total_bans=total_bans,
total_failures=total_failures,
)
return ServerStatus(
online=True,
version=version,
active_jails=active_jails,
total_bans=total_bans,
total_failures=total_failures,
)
except (Fail2BanConnectionError, Fail2BanProtocolError) as exc:
log.warning("fail2ban_probe_failed", error=str(exc))
return ServerStatus(online=False)
except ValueError as exc:
log.error("fail2ban_probe_parse_error", error=str(exc))
return ServerStatus(online=False)