Refactor periodic tasks to use injected scheduler resources
This commit is contained in:
@@ -25,9 +25,14 @@ import structlog
|
||||
from app.models.config import PendingRecovery
|
||||
from app.models.server import ServerStatus
|
||||
from app.services import health_service
|
||||
from app.utils.runtime_state import get_effective_settings
|
||||
from app.utils.runtime_state import (
|
||||
RuntimeState,
|
||||
get_effective_settings,
|
||||
get_runtime_state,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from app.config import Settings
|
||||
from fastapi import FastAPI
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
@@ -48,29 +53,21 @@ HEALTH_CHECK_INTERVAL: int = 30
|
||||
_ACTIVATION_CRASH_WINDOW: int = 60
|
||||
|
||||
|
||||
async def _run_probe(app: FastAPI) -> None:
|
||||
"""Probe fail2ban and cache the result on *app.state*.
|
||||
|
||||
Detects online/offline state transitions. When fail2ban goes offline
|
||||
within :data:`_ACTIVATION_CRASH_WINDOW` seconds of the last jail
|
||||
activation, writes a :class:`~app.models.config.PendingRecovery` record to
|
||||
``app.state.pending_recovery``.
|
||||
|
||||
This is the APScheduler job callback. It reads ``fail2ban_socket`` from
|
||||
the effective runtime settings, runs the health probe, and writes the
|
||||
result to ``app.state.server_status``.
|
||||
async def _run_probe_with_resources(settings: "Settings", runtime_state: RuntimeState) -> None:
|
||||
"""Probe fail2ban and cache the result on the runtime state.
|
||||
|
||||
Args:
|
||||
app: The :class:`fastapi.FastAPI` application instance passed by the
|
||||
scheduler via the ``kwargs`` mechanism.
|
||||
settings: The resolved application settings used for the probe.
|
||||
runtime_state: The mutable runtime state manager.
|
||||
"""
|
||||
settings = get_effective_settings(app)
|
||||
socket_path: str = settings.fail2ban_socket
|
||||
prev_status: ServerStatus = getattr(
|
||||
app.state, "server_status", ServerStatus(online=False)
|
||||
runtime_state,
|
||||
"server_status",
|
||||
ServerStatus(online=False),
|
||||
)
|
||||
status: ServerStatus = await health_service.probe(socket_path)
|
||||
app.state.server_status = status
|
||||
runtime_state.server_status = status
|
||||
|
||||
now = datetime.datetime.now(tz=datetime.UTC)
|
||||
|
||||
@@ -78,11 +75,9 @@ async def _run_probe(app: FastAPI) -> None:
|
||||
if status.online and not prev_status.online:
|
||||
log.info("fail2ban_came_online", version=status.version)
|
||||
# Clear any pending recovery once fail2ban is back online.
|
||||
existing: PendingRecovery | None = getattr(
|
||||
app.state, "pending_recovery", None
|
||||
)
|
||||
existing: PendingRecovery | None = getattr(runtime_state, "pending_recovery", None)
|
||||
if existing is not None and not existing.recovered:
|
||||
app.state.pending_recovery = PendingRecovery(
|
||||
runtime_state.pending_recovery = PendingRecovery(
|
||||
jail_name=existing.jail_name,
|
||||
activated_at=existing.activated_at,
|
||||
detected_at=existing.detected_at,
|
||||
@@ -96,9 +91,7 @@ async def _run_probe(app: FastAPI) -> None:
|
||||
elif not status.online and prev_status.online:
|
||||
log.warning("fail2ban_went_offline")
|
||||
# Check whether this crash happened shortly after a jail activation.
|
||||
last_activation: ActivationRecord | None = getattr(
|
||||
app.state, "last_activation", None
|
||||
)
|
||||
last_activation: ActivationRecord | None = getattr(runtime_state, "last_activation", None)
|
||||
if last_activation is not None:
|
||||
activated_at: datetime.datetime = last_activation["at"]
|
||||
seconds_since = (now - activated_at).total_seconds()
|
||||
@@ -106,11 +99,9 @@ async def _run_probe(app: FastAPI) -> None:
|
||||
jail_name: str = last_activation["jail_name"]
|
||||
# Only create a new record when there is not already an
|
||||
# unresolved one for the same jail.
|
||||
current: PendingRecovery | None = getattr(
|
||||
app.state, "pending_recovery", None
|
||||
)
|
||||
current: PendingRecovery | None = getattr(runtime_state, "pending_recovery", None)
|
||||
if current is None or current.recovered:
|
||||
app.state.pending_recovery = PendingRecovery(
|
||||
runtime_state.pending_recovery = PendingRecovery(
|
||||
jail_name=jail_name,
|
||||
activated_at=activated_at,
|
||||
detected_at=now,
|
||||
@@ -129,6 +120,13 @@ async def _run_probe(app: FastAPI) -> None:
|
||||
)
|
||||
|
||||
|
||||
async def _run_probe(app: FastAPI) -> None:
|
||||
await _run_probe_with_resources(
|
||||
get_effective_settings(app),
|
||||
get_runtime_state(app),
|
||||
)
|
||||
|
||||
|
||||
async def run_probe(app: FastAPI) -> None:
|
||||
"""Run a single health probe outside the scheduled job context."""
|
||||
await _run_probe(app)
|
||||
@@ -147,17 +145,20 @@ def register(app: FastAPI) -> None:
|
||||
# Initialise the cache with an offline placeholder so the dashboard
|
||||
# endpoint is always able to return a valid response even before the
|
||||
# first probe fires.
|
||||
app.state.server_status = ServerStatus(online=False)
|
||||
settings = get_effective_settings(app)
|
||||
runtime_state = get_runtime_state(app)
|
||||
|
||||
runtime_state.server_status = ServerStatus(online=False)
|
||||
|
||||
# Initialise activation tracking state.
|
||||
app.state.last_activation = None
|
||||
app.state.pending_recovery = None
|
||||
runtime_state.last_activation = None
|
||||
runtime_state.pending_recovery = None
|
||||
|
||||
app.state.scheduler.add_job(
|
||||
_run_probe,
|
||||
_run_probe_with_resources,
|
||||
trigger="interval",
|
||||
seconds=HEALTH_CHECK_INTERVAL,
|
||||
kwargs={"app": app},
|
||||
kwargs={"settings": settings, "runtime_state": runtime_state},
|
||||
id="health_check",
|
||||
replace_existing=True,
|
||||
# Fire immediately on startup too, so the UI isn't dark for 30 s.
|
||||
|
||||
Reference in New Issue
Block a user