feat: Task 3 — invalid jail config recovery (pre-validation, crash detection, rollback)

- Backend: extend activate_jail() with pre-validation and 4-attempt post-reload
  health probe; add validate_jail_config() and rollback_jail() service functions
- Backend: new endpoints POST /api/config/jails/{name}/validate,
  GET /api/config/pending-recovery, POST /api/config/jails/{name}/rollback
- Backend: extend JailActivationResponse with fail2ban_running + validation_warnings;
  add JailValidationIssue, JailValidationResult, PendingRecovery, RollbackResponse models
- Backend: health_check task tracks last_activation and creates PendingRecovery
  record when fail2ban goes offline within 60 s of an activation
- Backend: add fail2ban_start_command setting (configurable start cmd for rollback)
- Frontend: ActivateJailDialog — pre-validation on open, crash-detected callback,
  extended spinner text during activation+verify
- Frontend: JailsTab — Validate Config button for inactive jails, validation
  result panels (blocking errors + advisory warnings)
- Frontend: RecoveryBanner component — polls pending-recovery, shows full-width
  alert with Disable & Restart / View Logs buttons
- Frontend: MainLayout — mount RecoveryBanner at layout level
- Tests: 19 new backend service tests (validate, rollback, filter/action parsing)
  + 6 health_check crash-detection tests + 11 router tests; 5 RecoveryBanner
  frontend tests; fix mock setup in existing activate_jail tests
This commit is contained in:
2026-03-14 14:13:07 +01:00
parent ab11ece001
commit 0966f347c4
17 changed files with 1862 additions and 26 deletions

View File

@@ -9,6 +9,9 @@ global settings, test regex patterns, add log paths, and preview log files.
* ``GET /api/config/jails/inactive`` — list all inactive jails
* ``POST /api/config/jails/{name}/activate`` — activate an inactive jail
* ``POST /api/config/jails/{name}/deactivate`` — deactivate an active jail
* ``POST /api/config/jails/{name}/validate`` — validate jail config pre-activation (Task 3)
* ``POST /api/config/jails/{name}/rollback`` — disable bad jail and restart fail2ban (Task 3)
* ``GET /api/config/pending-recovery`` — active crash-recovery record (Task 3)
* ``POST /api/config/jails/{name}/filter`` — assign a filter to a jail
* ``POST /api/config/jails/{name}/action`` — add an action to a jail
* ``DELETE /api/config/jails/{name}/action/{action_name}`` — remove an action from a jail
@@ -34,6 +37,7 @@ global settings, test regex patterns, add log paths, and preview log files.
from __future__ import annotations
import datetime
from typing import Annotated
from fastapi import APIRouter, HTTPException, Path, Query, Request, status
@@ -60,12 +64,15 @@ from app.models.config import (
JailConfigListResponse,
JailConfigResponse,
JailConfigUpdate,
JailValidationResult,
LogPreviewRequest,
LogPreviewResponse,
MapColorThresholdsResponse,
MapColorThresholdsUpdate,
PendingRecovery,
RegexTestRequest,
RegexTestResponse,
RollbackResponse,
ServiceStatusResponse,
)
from app.services import config_file_service, config_service, jail_service
@@ -611,7 +618,7 @@ async def activate_jail(
req = body if body is not None else ActivateJailRequest()
try:
return await config_file_service.activate_jail(
result = await config_file_service.activate_jail(
config_dir, socket_path, name, req
)
except JailNameError as exc:
@@ -631,6 +638,24 @@ async def activate_jail(
except Fail2BanConnectionError as exc:
raise _bad_gateway(exc) from exc
# Record this activation so the health-check task can attribute a
# subsequent fail2ban crash to it.
request.app.state.last_activation = {
"jail_name": name,
"at": datetime.datetime.now(tz=datetime.UTC),
}
# If fail2ban stopped responding after the reload, create a pending-recovery
# record immediately (before the background health task notices).
if not result.fail2ban_running:
request.app.state.pending_recovery = PendingRecovery(
jail_name=name,
activated_at=request.app.state.last_activation["at"],
detected_at=datetime.datetime.now(tz=datetime.UTC),
)
return result
@router.post(
"/jails/{name}/deactivate",
@@ -684,6 +709,125 @@ async def deactivate_jail(
raise _bad_gateway(exc) from exc
# ---------------------------------------------------------------------------
# Jail validation & rollback endpoints (Task 3)
# ---------------------------------------------------------------------------
@router.post(
"/jails/{name}/validate",
response_model=JailValidationResult,
summary="Validate jail configuration before activation",
)
async def validate_jail(
request: Request,
_auth: AuthDep,
name: _NamePath,
) -> JailValidationResult:
"""Run pre-activation validation checks on a jail configuration.
Validates filter and action file existence, regex pattern compilation, and
log path existence without modifying any files or reloading fail2ban.
Args:
request: FastAPI request object.
_auth: Validated session.
name: Jail name to validate.
Returns:
:class:`~app.models.config.JailValidationResult` with any issues found.
Raises:
HTTPException: 400 if *name* contains invalid characters.
HTTPException: 404 if *name* is not found in any config file.
"""
config_dir: str = request.app.state.settings.fail2ban_config_dir
try:
return await config_file_service.validate_jail_config(config_dir, name)
except JailNameError as exc:
raise _bad_request(str(exc)) from exc
@router.get(
"/pending-recovery",
response_model=PendingRecovery | None,
summary="Return active crash-recovery record if one exists",
)
async def get_pending_recovery(
request: Request,
_auth: AuthDep,
) -> PendingRecovery | None:
"""Return the current :class:`~app.models.config.PendingRecovery` record.
A non-null response means fail2ban crashed shortly after a jail activation
and the user should be offered a rollback option. Returns ``null`` (HTTP
200 with ``null`` body) when no recovery is pending.
Args:
request: FastAPI request object.
_auth: Validated session.
Returns:
:class:`~app.models.config.PendingRecovery` or ``None``.
"""
return getattr(request.app.state, "pending_recovery", None)
@router.post(
"/jails/{name}/rollback",
response_model=RollbackResponse,
summary="Disable a bad jail config and restart fail2ban",
)
async def rollback_jail(
request: Request,
_auth: AuthDep,
name: _NamePath,
) -> RollbackResponse:
"""Disable the specified jail and attempt to restart fail2ban.
Writes ``enabled = false`` to ``jail.d/{name}.local`` (works even when
fail2ban is down — no socket is needed), then runs the configured start
command and waits up to ten seconds for the daemon to come back online.
On success, clears the :class:`~app.models.config.PendingRecovery` record.
Args:
request: FastAPI request object.
_auth: Validated session.
name: Jail name to disable and roll back.
Returns:
:class:`~app.models.config.RollbackResponse`.
Raises:
HTTPException: 400 if *name* contains invalid characters.
HTTPException: 500 if writing the .local override file fails.
"""
config_dir: str = request.app.state.settings.fail2ban_config_dir
socket_path: str = request.app.state.settings.fail2ban_socket
start_cmd: str = request.app.state.settings.fail2ban_start_command
start_cmd_parts: list[str] = start_cmd.split()
try:
result = await config_file_service.rollback_jail(
config_dir, socket_path, name, start_cmd_parts
)
except JailNameError as exc:
raise _bad_request(str(exc)) from exc
except ConfigWriteError as exc:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to write config override: {exc}",
) from exc
# Clear pending recovery if fail2ban came back online.
if result.fail2ban_running:
request.app.state.pending_recovery = None
request.app.state.last_activation = None
return result
# ---------------------------------------------------------------------------
# Filter discovery endpoints (Task 2.1)
# ---------------------------------------------------------------------------