Implement tasks 1-3: sidebar order, jail activation rollback, pie chart colors

Task 1: Move Configuration to last position in sidebar NAV_ITEMS Task 2: Add automatic rollback when jail activation fails - Back up .local override file before writing - Restore original file (or delete) on reload failure, health-check failure, or jail not appearing post-reload - Return recovered=True/False in JailActivationResponse - Show warning/critical banner in ActivateJailDialog based on recovery - Add _restore_local_file_sync and _rollback_activation_async helpers - Add 3 new tests: rollback on reload failure, health-check failure, and double failure (recovered=False) Task 3: Color pie chart legend labels to match their slice color - legendFormatter now returns ReactNode with span style={{ color }} - Import LegendPayload from recharts/types/component/DefaultLegendContent
2026-03-14 21:16:58 +01:00
parent 6bb38dbd8c
commit 4be2469f92
8 changed files with 449 additions and 581 deletions
--- a/backend/app/models/config.py
+++ b/backend/app/models/config.py
@@ -873,6 +873,16 @@ class JailActivationResponse(BaseModel):
        default_factory=list,
        description="Non-fatal warnings from the pre-activation validation step.",
    )
+    recovered: bool | None = Field(
+        default=None,
+        description=(
+            "Set when activation failed after writing the config file.  "
+            "``True`` means the system automatically rolled back the change and "
+            "restarted fail2ban.  ``False`` means the rollback itself also "
+            "failed and manual intervention is required.  ``None`` when "
+            "activation succeeded or failed before the file was written."
+        ),
+    )


 # ---------------------------------------------------------------------------
--- a/backend/app/services/config_file_service.py
+++ b/backend/app/services/config_file_service.py
@@ -887,6 +887,50 @@ def _write_local_override_sync(
    )


+def _restore_local_file_sync(local_path: Path, original_content: bytes | None) -> None:
+    """Restore a ``.local`` file to its pre-activation state.
+
+    If *original_content* is ``None``, the file is deleted (it did not exist
+    before the activation).  Otherwise the original bytes are written back
+    atomically via a temp-file rename.
+
+    Args:
+        local_path: Absolute path to the ``.local`` file to restore.
+        original_content: Original raw bytes to write back, or ``None`` to
+            delete the file.
+
+    Raises:
+        ConfigWriteError: If the write or delete operation fails.
+    """
+    if original_content is None:
+        try:
+            local_path.unlink(missing_ok=True)
+        except OSError as exc:
+            raise ConfigWriteError(
+                f"Failed to delete {local_path} during rollback: {exc}"
+            ) from exc
+        return
+
+    tmp_name: str | None = None
+    try:
+        with tempfile.NamedTemporaryFile(
+            mode="wb",
+            dir=local_path.parent,
+            delete=False,
+            suffix=".tmp",
+        ) as tmp:
+            tmp.write(original_content)
+            tmp_name = tmp.name
+        os.replace(tmp_name, local_path)
+    except OSError as exc:
+        with contextlib.suppress(OSError):
+            if tmp_name is not None:
+                os.unlink(tmp_name)
+        raise ConfigWriteError(
+            f"Failed to restore {local_path} during rollback: {exc}"
+        ) from exc
+
+
 def _validate_regex_patterns(patterns: list[str]) -> None:
    """Validate each pattern in *patterns* using Python's ``re`` module.

@@ -1163,6 +1207,16 @@ async def activate_jail(
        "logpath": req.logpath,
    }

+    # ---------------------------------------------------------------------- #
+    # Backup the existing .local file (if any) before overwriting it so that  #
+    # we can restore it if activation fails.                                   #
+    # ---------------------------------------------------------------------- #
+    local_path = Path(config_dir) / "jail.d" / f"{name}.local"
+    original_content: bytes | None = await loop.run_in_executor(
+        None,
+        lambda: local_path.read_bytes() if local_path.exists() else None,
+    )
+
    await loop.run_in_executor(
        None,
        _write_local_override_sync,
@@ -1172,10 +1226,28 @@ async def activate_jail(
        overrides,
    )

+    # ---------------------------------------------------------------------- #
+    # Activation reload — if it fails, roll back immediately                  #
+    # ---------------------------------------------------------------------- #
    try:
        await jail_service.reload_all(socket_path, include_jails=[name])
    except Exception as exc:  # noqa: BLE001
        log.warning("reload_after_activate_failed", jail=name, error=str(exc))
+        recovered = await _rollback_activation_async(
+            config_dir, name, socket_path, original_content
+        )
+        return JailActivationResponse(
+            name=name,
+            active=False,
+            fail2ban_running=False,
+            recovered=recovered,
+            validation_warnings=warnings,
+            message=(
+                f"Jail {name!r} activation failed during reload and the "
+                "configuration was "
+                + ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
+            ),
+        )

    # ---------------------------------------------------------------------- #
    # Post-reload health probe with retries                                   #
@@ -1192,16 +1264,21 @@ async def activate_jail(
        log.warning(
            "fail2ban_down_after_activate",
            jail=name,
-            message="fail2ban socket unreachable after reload — daemon may have crashed.",
+            message="fail2ban socket unreachable after reload — initiating rollback.",
+        )
+        recovered = await _rollback_activation_async(
+            config_dir, name, socket_path, original_content
        )
        return JailActivationResponse(
            name=name,
            active=False,
            fail2ban_running=False,
+            recovered=recovered,
            validation_warnings=warnings,
            message=(
-                f"Jail {name!r} was written to config but fail2ban stopped "
-                "responding after reload.  The jail configuration may be invalid."
+                f"Jail {name!r} activation failed: fail2ban stopped responding "
+                "after reload. The configuration was "
+                + ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
            ),
        )

@@ -1212,16 +1289,21 @@ async def activate_jail(
        log.warning(
            "jail_activation_unverified",
            jail=name,
-            message="Jail did not appear in running jails after reload.",
+            message="Jail did not appear in running jails — initiating rollback.",
+        )
+        recovered = await _rollback_activation_async(
+            config_dir, name, socket_path, original_content
        )
        return JailActivationResponse(
            name=name,
            active=False,
            fail2ban_running=True,
+            recovered=recovered,
            validation_warnings=warnings,
            message=(
                f"Jail {name!r} was written to config but did not start after "
-                "reload — check the jail configuration (filters, log paths, regex)."
+                "reload. The configuration was "
+                + ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
            ),
        )

@@ -1235,6 +1317,70 @@ async def activate_jail(
    )


+async def _rollback_activation_async(
+    config_dir: str,
+    name: str,
+    socket_path: str,
+    original_content: bytes | None,
+) -> bool:
+    """Restore the pre-activation ``.local`` file and reload fail2ban.
+
+    Called internally by :func:`activate_jail` when the activation fails after
+    the config file was already written.  Tries to:
+
+    1. Restore the original file content (or delete the file if it was newly
+       created by the activation attempt).
+    2. Reload fail2ban so the daemon runs with the restored configuration.
+    3. Probe fail2ban to confirm it came back up.
+
+    Args:
+        config_dir: Absolute path to the fail2ban configuration directory.
+        name: Name of the jail whose ``.local`` file should be restored.
+        socket_path: Path to the fail2ban Unix domain socket.
+        original_content: Raw bytes of the original ``.local`` file, or
+            ``None`` if the file did not exist before the activation.
+
+    Returns:
+        ``True`` if fail2ban is responsive again after the rollback, ``False``
+        if recovery also failed.
+    """
+    loop = asyncio.get_event_loop()
+    local_path = Path(config_dir) / "jail.d" / f"{name}.local"
+
+    # Step 1 — restore original file (or delete it).
+    try:
+        await loop.run_in_executor(
+            None, _restore_local_file_sync, local_path, original_content
+        )
+        log.info("jail_activation_rollback_file_restored", jail=name)
+    except ConfigWriteError as exc:
+        log.error(
+            "jail_activation_rollback_restore_failed", jail=name, error=str(exc)
+        )
+        return False
+
+    # Step 2 — reload fail2ban with the restored config.
+    try:
+        await jail_service.reload_all(socket_path)
+        log.info("jail_activation_rollback_reload_ok", jail=name)
+    except Exception as exc:  # noqa: BLE001
+        log.warning(
+            "jail_activation_rollback_reload_failed", jail=name, error=str(exc)
+        )
+        return False
+
+    # Step 3 — wait for fail2ban to come back.
+    for attempt in range(_POST_RELOAD_MAX_ATTEMPTS):
+        if attempt > 0:
+            await asyncio.sleep(_POST_RELOAD_PROBE_INTERVAL)
+        if await _probe_fail2ban_running(socket_path):
+            log.info("jail_activation_rollback_recovered", jail=name)
+            return True
+
+    log.warning("jail_activation_rollback_still_down", jail=name)
+    return False
+
+
 async def deactivate_jail(
    config_dir: str,
    socket_path: str,
--- a/backend/tests/test_services/test_config_file_service.py
+++ b/backend/tests/test_services/test_config_file_service.py
@@ -502,7 +502,8 @@ class TestActivateJail:
        with (
            patch(
                "app.services.config_file_service._get_active_jail_names",
-                new=AsyncMock(side_effect=[set(), set()]),
+                # First call: pre-activation (not active); second: post-reload (started).
+                new=AsyncMock(side_effect=[set(), {"apache-auth"}]),
            ),
            patch("app.services.config_file_service.jail_service") as mock_js,
            patch(
@@ -2947,3 +2948,166 @@ class TestActivateJailBlocking:
        assert result.active is True
        mock_js.reload_all.assert_awaited_once()

+
+# ---------------------------------------------------------------------------
+# activate_jail — rollback on failure (Task 2)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestActivateJailRollback:
+    """Rollback logic in activate_jail restores the .local file and recovers."""
+
+    async def test_activate_jail_rollback_on_reload_failure(
+        self, tmp_path: Path
+    ) -> None:
+        """Rollback when reload_all raises on the activation reload.
+
+        Expects:
+        - The .local file is restored to its original content.
+        - The response indicates recovered=True.
+        """
+        from app.models.config import ActivateJailRequest, JailValidationResult
+
+        _write(tmp_path / "jail.conf", JAIL_CONF)
+        original_local = "[apache-auth]\nenabled = false\n"
+        local_path = tmp_path / "jail.d" / "apache-auth.local"
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        local_path.write_text(original_local)
+
+        req = ActivateJailRequest()
+        reload_call_count = 0
+
+        async def reload_side_effect(socket_path: str, **kwargs: object) -> None:
+            nonlocal reload_call_count
+            reload_call_count += 1
+            if reload_call_count == 1:
+                raise RuntimeError("fail2ban crashed")
+            # Recovery reload succeeds.
+
+        with (
+            patch(
+                "app.services.config_file_service._get_active_jail_names",
+                new=AsyncMock(return_value=set()),
+            ),
+            patch("app.services.config_file_service.jail_service") as mock_js,
+            patch(
+                "app.services.config_file_service._probe_fail2ban_running",
+                new=AsyncMock(return_value=True),
+            ),
+            patch(
+                "app.services.config_file_service._validate_jail_config_sync",
+                return_value=JailValidationResult(
+                    jail_name="apache-auth", valid=True
+                ),
+            ),
+        ):
+            mock_js.reload_all = AsyncMock(side_effect=reload_side_effect)
+            result = await activate_jail(
+                str(tmp_path), "/fake.sock", "apache-auth", req
+            )
+
+        assert result.active is False
+        assert result.recovered is True
+        assert local_path.read_text() == original_local
+
+    async def test_activate_jail_rollback_on_health_check_failure(
+        self, tmp_path: Path
+    ) -> None:
+        """Rollback when fail2ban is unreachable after the activation reload.
+
+        Expects:
+        - The .local file is restored to its original content.
+        - The response indicates recovered=True.
+        """
+        from app.models.config import ActivateJailRequest, JailValidationResult
+
+        _write(tmp_path / "jail.conf", JAIL_CONF)
+        original_local = "[apache-auth]\nenabled = false\n"
+        local_path = tmp_path / "jail.d" / "apache-auth.local"
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        local_path.write_text(original_local)
+
+        req = ActivateJailRequest()
+        probe_call_count = 0
+
+        async def probe_side_effect(socket_path: str) -> bool:
+            nonlocal probe_call_count
+            probe_call_count += 1
+            # First _POST_RELOAD_MAX_ATTEMPTS probes (health-check after
+            # activation) all fail; subsequent probes (recovery) succeed.
+            from app.services.config_file_service import _POST_RELOAD_MAX_ATTEMPTS
+
+            return probe_call_count > _POST_RELOAD_MAX_ATTEMPTS
+
+        with (
+            patch(
+                "app.services.config_file_service._get_active_jail_names",
+                new=AsyncMock(return_value=set()),
+            ),
+            patch("app.services.config_file_service.jail_service") as mock_js,
+            patch(
+                "app.services.config_file_service._probe_fail2ban_running",
+                new=AsyncMock(side_effect=probe_side_effect),
+            ),
+            patch(
+                "app.services.config_file_service._validate_jail_config_sync",
+                return_value=JailValidationResult(
+                    jail_name="apache-auth", valid=True
+                ),
+            ),
+        ):
+            mock_js.reload_all = AsyncMock()
+            result = await activate_jail(
+                str(tmp_path), "/fake.sock", "apache-auth", req
+            )
+
+        assert result.active is False
+        assert result.recovered is True
+        assert local_path.read_text() == original_local
+
+    async def test_activate_jail_rollback_failure(self, tmp_path: Path) -> None:
+        """recovered=False when both the activation and recovery reloads fail.
+
+        Expects:
+        - The response indicates recovered=False.
+        """
+        from app.models.config import ActivateJailRequest, JailValidationResult
+
+        _write(tmp_path / "jail.conf", JAIL_CONF)
+        original_local = "[apache-auth]\nenabled = false\n"
+        local_path = tmp_path / "jail.d" / "apache-auth.local"
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        local_path.write_text(original_local)
+
+        req = ActivateJailRequest()
+
+        with (
+            patch(
+                "app.services.config_file_service._get_active_jail_names",
+                new=AsyncMock(return_value=set()),
+            ),
+            patch("app.services.config_file_service.jail_service") as mock_js,
+            patch(
+                "app.services.config_file_service._probe_fail2ban_running",
+                new=AsyncMock(return_value=True),
+            ),
+            patch(
+                "app.services.config_file_service._validate_jail_config_sync",
+                return_value=JailValidationResult(
+                    jail_name="apache-auth", valid=True
+                ),
+            ),
+        ):
+            # Both the activation reload and the recovery reload fail.
+            mock_js.reload_all = AsyncMock(
+                side_effect=RuntimeError("fail2ban unavailable")
+            )
+            result = await activate_jail(
+                str(tmp_path), "/fake.sock", "apache-auth", req
+            )
+
+        assert result.active is False
+        assert result.recovered is False
+
+