feat: Task 3 — invalid jail config recovery (pre-validation, crash detection, rollback)

- Backend: extend activate_jail() with pre-validation and 4-attempt post-reload
  health probe; add validate_jail_config() and rollback_jail() service functions
- Backend: new endpoints POST /api/config/jails/{name}/validate,
  GET /api/config/pending-recovery, POST /api/config/jails/{name}/rollback
- Backend: extend JailActivationResponse with fail2ban_running + validation_warnings;
  add JailValidationIssue, JailValidationResult, PendingRecovery, RollbackResponse models
- Backend: health_check task tracks last_activation and creates PendingRecovery
  record when fail2ban goes offline within 60 s of an activation
- Backend: add fail2ban_start_command setting (configurable start cmd for rollback)
- Frontend: ActivateJailDialog — pre-validation on open, crash-detected callback,
  extended spinner text during activation+verify
- Frontend: JailsTab — Validate Config button for inactive jails, validation
  result panels (blocking errors + advisory warnings)
- Frontend: RecoveryBanner component — polls pending-recovery, shows full-width
  alert with Disable & Restart / View Logs buttons
- Frontend: MainLayout — mount RecoveryBanner at layout level
- Tests: 19 new backend service tests (validate, rollback, filter/action parsing)
  + 6 health_check crash-detection tests + 11 router tests; 5 RecoveryBanner
  frontend tests; fix mock setup in existing activate_jail tests
This commit is contained in:
2026-03-14 14:13:07 +01:00
parent ab11ece001
commit 0966f347c4
17 changed files with 1862 additions and 26 deletions

View File

@@ -8,10 +8,12 @@ the scheduler and primes the initial status.
from __future__ import annotations
import datetime
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from app.models.config import PendingRecovery
from app.models.server import ServerStatus
from app.tasks.health_check import HEALTH_CHECK_INTERVAL, _run_probe, register
@@ -33,6 +35,8 @@ def _make_app(prev_online: bool = False) -> MagicMock:
app.state.settings.fail2ban_socket = "/var/run/fail2ban/fail2ban.sock"
app.state.server_status = ServerStatus(online=prev_online)
app.state.scheduler = MagicMock()
app.state.last_activation = None
app.state.pending_recovery = None
return app
@@ -236,3 +240,111 @@ class TestRegister:
_, kwargs = app.state.scheduler.add_job.call_args
assert kwargs["kwargs"] == {"app": app}
def test_register_initialises_last_activation_none(self) -> None:
"""``register`` must set ``app.state.last_activation = None``."""
app = _make_app()
register(app)
assert app.state.last_activation is None
def test_register_initialises_pending_recovery_none(self) -> None:
"""``register`` must set ``app.state.pending_recovery = None``."""
app = _make_app()
register(app)
assert app.state.pending_recovery is None
# ---------------------------------------------------------------------------
# Crash detection (Task 3)
# ---------------------------------------------------------------------------
class TestCrashDetection:
"""Tests for activation-crash detection in _run_probe."""
@pytest.mark.asyncio
async def test_crash_within_window_creates_pending_recovery(self) -> None:
"""An online→offline transition within 60 s of activation must set pending_recovery."""
app = _make_app(prev_online=True)
now = datetime.datetime.now(tz=datetime.timezone.utc)
app.state.last_activation = {
"jail_name": "sshd",
"at": now - datetime.timedelta(seconds=10),
}
app.state.pending_recovery = None
offline_status = ServerStatus(online=False)
with patch(
"app.tasks.health_check.health_service.probe",
new_callable=AsyncMock,
return_value=offline_status,
):
await _run_probe(app)
assert app.state.pending_recovery is not None
assert isinstance(app.state.pending_recovery, PendingRecovery)
assert app.state.pending_recovery.jail_name == "sshd"
assert app.state.pending_recovery.recovered is False
@pytest.mark.asyncio
async def test_crash_outside_window_does_not_create_pending_recovery(self) -> None:
"""A crash more than 60 s after activation must NOT set pending_recovery."""
app = _make_app(prev_online=True)
app.state.last_activation = {
"jail_name": "sshd",
"at": datetime.datetime.now(tz=datetime.timezone.utc)
- datetime.timedelta(seconds=120),
}
app.state.pending_recovery = None
with patch(
"app.tasks.health_check.health_service.probe",
new_callable=AsyncMock,
return_value=ServerStatus(online=False),
):
await _run_probe(app)
assert app.state.pending_recovery is None
@pytest.mark.asyncio
async def test_came_online_marks_pending_recovery_resolved(self) -> None:
"""An offline→online transition must mark an existing pending_recovery as recovered."""
app = _make_app(prev_online=False)
activated_at = datetime.datetime.now(tz=datetime.timezone.utc) - datetime.timedelta(seconds=30)
detected_at = datetime.datetime.now(tz=datetime.timezone.utc)
app.state.pending_recovery = PendingRecovery(
jail_name="sshd",
activated_at=activated_at,
detected_at=detected_at,
recovered=False,
)
with patch(
"app.tasks.health_check.health_service.probe",
new_callable=AsyncMock,
return_value=ServerStatus(online=True),
):
await _run_probe(app)
assert app.state.pending_recovery.recovered is True
@pytest.mark.asyncio
async def test_crash_without_recent_activation_does_nothing(self) -> None:
"""A crash when last_activation is None must not create a pending_recovery."""
app = _make_app(prev_online=True)
app.state.last_activation = None
app.state.pending_recovery = None
with patch(
"app.tasks.health_check.health_service.probe",
new_callable=AsyncMock,
return_value=ServerStatus(online=False),
):
await _run_probe(app)
assert app.state.pending_recovery is None