Refactor periodic tasks to use injected scheduler resources

This commit is contained in:
2026-04-11 20:32:36 +02:00
parent 9cba5a9fcb
commit ae81a8f5be
10 changed files with 122 additions and 94 deletions

View File

@@ -25,9 +25,14 @@ import structlog
from app.models.config import PendingRecovery
from app.models.server import ServerStatus
from app.services import health_service
from app.utils.runtime_state import get_effective_settings
from app.utils.runtime_state import (
RuntimeState,
get_effective_settings,
get_runtime_state,
)
if TYPE_CHECKING: # pragma: no cover
from app.config import Settings
from fastapi import FastAPI
log: structlog.stdlib.BoundLogger = structlog.get_logger()
@@ -48,29 +53,21 @@ HEALTH_CHECK_INTERVAL: int = 30
_ACTIVATION_CRASH_WINDOW: int = 60
async def _run_probe(app: FastAPI) -> None:
"""Probe fail2ban and cache the result on *app.state*.
Detects online/offline state transitions. When fail2ban goes offline
within :data:`_ACTIVATION_CRASH_WINDOW` seconds of the last jail
activation, writes a :class:`~app.models.config.PendingRecovery` record to
``app.state.pending_recovery``.
This is the APScheduler job callback. It reads ``fail2ban_socket`` from
the effective runtime settings, runs the health probe, and writes the
result to ``app.state.server_status``.
async def _run_probe_with_resources(settings: "Settings", runtime_state: RuntimeState) -> None:
"""Probe fail2ban and cache the result on the runtime state.
Args:
app: The :class:`fastapi.FastAPI` application instance passed by the
scheduler via the ``kwargs`` mechanism.
settings: The resolved application settings used for the probe.
runtime_state: The mutable runtime state manager.
"""
settings = get_effective_settings(app)
socket_path: str = settings.fail2ban_socket
prev_status: ServerStatus = getattr(
app.state, "server_status", ServerStatus(online=False)
runtime_state,
"server_status",
ServerStatus(online=False),
)
status: ServerStatus = await health_service.probe(socket_path)
app.state.server_status = status
runtime_state.server_status = status
now = datetime.datetime.now(tz=datetime.UTC)
@@ -78,11 +75,9 @@ async def _run_probe(app: FastAPI) -> None:
if status.online and not prev_status.online:
log.info("fail2ban_came_online", version=status.version)
# Clear any pending recovery once fail2ban is back online.
existing: PendingRecovery | None = getattr(
app.state, "pending_recovery", None
)
existing: PendingRecovery | None = getattr(runtime_state, "pending_recovery", None)
if existing is not None and not existing.recovered:
app.state.pending_recovery = PendingRecovery(
runtime_state.pending_recovery = PendingRecovery(
jail_name=existing.jail_name,
activated_at=existing.activated_at,
detected_at=existing.detected_at,
@@ -96,9 +91,7 @@ async def _run_probe(app: FastAPI) -> None:
elif not status.online and prev_status.online:
log.warning("fail2ban_went_offline")
# Check whether this crash happened shortly after a jail activation.
last_activation: ActivationRecord | None = getattr(
app.state, "last_activation", None
)
last_activation: ActivationRecord | None = getattr(runtime_state, "last_activation", None)
if last_activation is not None:
activated_at: datetime.datetime = last_activation["at"]
seconds_since = (now - activated_at).total_seconds()
@@ -106,11 +99,9 @@ async def _run_probe(app: FastAPI) -> None:
jail_name: str = last_activation["jail_name"]
# Only create a new record when there is not already an
# unresolved one for the same jail.
current: PendingRecovery | None = getattr(
app.state, "pending_recovery", None
)
current: PendingRecovery | None = getattr(runtime_state, "pending_recovery", None)
if current is None or current.recovered:
app.state.pending_recovery = PendingRecovery(
runtime_state.pending_recovery = PendingRecovery(
jail_name=jail_name,
activated_at=activated_at,
detected_at=now,
@@ -129,6 +120,13 @@ async def _run_probe(app: FastAPI) -> None:
)
async def _run_probe(app: FastAPI) -> None:
await _run_probe_with_resources(
get_effective_settings(app),
get_runtime_state(app),
)
async def run_probe(app: FastAPI) -> None:
"""Run a single health probe outside the scheduled job context."""
await _run_probe(app)
@@ -147,17 +145,20 @@ def register(app: FastAPI) -> None:
# Initialise the cache with an offline placeholder so the dashboard
# endpoint is always able to return a valid response even before the
# first probe fires.
app.state.server_status = ServerStatus(online=False)
settings = get_effective_settings(app)
runtime_state = get_runtime_state(app)
runtime_state.server_status = ServerStatus(online=False)
# Initialise activation tracking state.
app.state.last_activation = None
app.state.pending_recovery = None
runtime_state.last_activation = None
runtime_state.pending_recovery = None
app.state.scheduler.add_job(
_run_probe,
_run_probe_with_resources,
trigger="interval",
seconds=HEALTH_CHECK_INTERVAL,
kwargs={"app": app},
kwargs={"settings": settings, "runtime_state": runtime_state},
id="health_check",
replace_existing=True,
# Fire immediately on startup too, so the UI isn't dark for 30 s.