Fix fail2ban runtime errors: jail not found, action locks, log noise

This commit implements fixes for three independent bugs in the fail2ban configuration and integration layer: 1. Task 1: Detect UnknownJailException and prevent silent failures - Added JailNotFoundError detection in jail_service.reload_all() - Enhanced error handling in config_file_service to catch JailNotFoundError - Added specific error message with logpath validation hints - Added rollback test for this scenario 2. Task 2: Fix iptables-allports exit code 4 (xtables lock contention) - Added global banaction setting in jail.conf with -w 5 lockingopt - Removed redundant per-jail banaction overrides from bangui-sim and blocklist-import - Added production compose documentation note 3. Task 3: Suppress log noise from unsupported backend/idle commands - Implemented capability detection to cache command support status - Double-check locking to minimize lock contention - Avoids sending unsupported get <jail> backend/idle commands - Returns default values without socket calls when unsupported All changes include comprehensive tests and maintain backward compatibility.
2026-03-15 10:57:00 +01:00
parent 1e33220f59
commit f62785aaf2
8 changed files with 446 additions and 145 deletions
--- a/backend/tests/test_services/test_config_file_service.py
+++ b/backend/tests/test_services/test_config_file_service.py
@@ -3110,4 +3110,68 @@ class TestActivateJailRollback:
        assert result.active is False
        assert result.recovered is False

+    async def test_activate_jail_rollback_on_jail_not_found_error(
+        self, tmp_path: Path
+    ) -> None:
+        """Rollback when reload_all raises JailNotFoundError (invalid config).
+
+        When fail2ban cannot create a jail due to invalid configuration
+        (e.g., missing logpath), it raises UnknownJailException which becomes
+        JailNotFoundError. This test verifies proper handling and rollback.
+
+        Expects:
+        - The .local file is restored to its original content.
+        - The response indicates recovered=True.
+        - The error message mentions the logpath issue.
+        """
+        from app.models.config import ActivateJailRequest, JailValidationResult
+        from app.services.jail_service import JailNotFoundError
+
+        _write(tmp_path / "jail.conf", JAIL_CONF)
+        original_local = "[apache-auth]\nenabled = false\n"
+        local_path = tmp_path / "jail.d" / "apache-auth.local"
+        local_path.parent.mkdir(parents=True, exist_ok=True)
+        local_path.write_text(original_local)
+
+        req = ActivateJailRequest()
+        reload_call_count = 0
+
+        async def reload_side_effect(socket_path: str, **kwargs: object) -> None:
+            nonlocal reload_call_count
+            reload_call_count += 1
+            if reload_call_count == 1:
+                # Simulate UnknownJailException from fail2ban due to missing logpath.
+                raise JailNotFoundError("apache-auth")
+            # Recovery reload succeeds.
+
+        with (
+            patch(
+                "app.services.config_file_service._get_active_jail_names",
+                new=AsyncMock(return_value=set()),
+            ),
+            patch("app.services.config_file_service.jail_service") as mock_js,
+            patch(
+                "app.services.config_file_service._probe_fail2ban_running",
+                new=AsyncMock(return_value=True),
+            ),
+            patch(
+                "app.services.config_file_service._validate_jail_config_sync",
+                return_value=JailValidationResult(
+                    jail_name="apache-auth", valid=True
+                ),
+            ),
+        ):
+            mock_js.reload_all = AsyncMock(side_effect=reload_side_effect)
+            mock_js.JailNotFoundError = JailNotFoundError
+            result = await activate_jail(
+                str(tmp_path), "/fake.sock", "apache-auth", req
+            )
+
+        assert result.active is False
+        assert result.recovered is True
+        assert local_path.read_text() == original_local
+        # Verify the error message mentions logpath issues.
+        assert "logpath" in result.message.lower() or "check that all logpath" in result.message.lower()
+
+

--- a/backend/tests/test_services/test_jail_service.py
+++ b/backend/tests/test_services/test_jail_service.py
@@ -184,10 +184,90 @@ class TestListJails:
        with patch("app.services.jail_service.Fail2BanClient", _FailClient), pytest.raises(Fail2BanConnectionError):
            await jail_service.list_jails(_SOCKET)

+    async def test_backend_idle_commands_unsupported(self) -> None:
+        """list_jails handles unsupported backend and idle commands gracefully.

-# ---------------------------------------------------------------------------
-# get_jail
-# ---------------------------------------------------------------------------
+        When the fail2ban daemon does not support get ... backend/idle commands,
+        list_jails should not send them, avoiding "Invalid command" errors in the
+        fail2ban log.
+        """
+        # Reset the capability cache to test detection.
+        jail_service._backend_cmd_supported = None
+
+        responses = {
+            "status": _make_global_status("sshd"),
+            "status|sshd|short": _make_short_status(),
+            # Capability probe: get backend fails (command not supported).
+            "get|sshd|backend": (1, Exception("Invalid command (no get action or not yet implemented)")),
+            # Subsequent gets should still work.
+            "get|sshd|bantime": (0, 600),
+            "get|sshd|findtime": (0, 600),
+            "get|sshd|maxretry": (0, 5),
+        }
+        with _patch_client(responses):
+            result = await jail_service.list_jails(_SOCKET)
+
+        # Verify the result uses the default values for backend and idle.
+        jail = result.jails[0]
+        assert jail.backend == "polling"  # default
+        assert jail.idle is False  # default
+        # Capability should now be cached as False.
+        assert jail_service._backend_cmd_supported is False
+
+    async def test_backend_idle_commands_supported(self) -> None:
+        """list_jails detects and sends backend/idle commands when supported."""
+        # Reset the capability cache to test detection.
+        jail_service._backend_cmd_supported = None
+
+        responses = {
+            "status": _make_global_status("sshd"),
+            "status|sshd|short": _make_short_status(),
+            # Capability probe: get backend succeeds.
+            "get|sshd|backend": (0, "systemd"),
+            # All other commands.
+            "get|sshd|bantime": (0, 600),
+            "get|sshd|findtime": (0, 600),
+            "get|sshd|maxretry": (0, 5),
+            "get|sshd|idle": (0, True),
+        }
+        with _patch_client(responses):
+            result = await jail_service.list_jails(_SOCKET)
+
+        # Verify real values are returned.
+        jail = result.jails[0]
+        assert jail.backend == "systemd"  # real value
+        assert jail.idle is True  # real value
+        # Capability should now be cached as True.
+        assert jail_service._backend_cmd_supported is True
+
+    async def test_backend_idle_commands_cached_after_first_probe(self) -> None:
+        """list_jails caches capability result and reuses it across polling cycles."""
+        # Reset the capability cache.
+        jail_service._backend_cmd_supported = None
+
+        responses = {
+            "status": _make_global_status("sshd, nginx"),
+            # Probes happen once per polling cycle (for the first jail listed).
+            "status|sshd|short": _make_short_status(),
+            "status|nginx|short": _make_short_status(),
+            # Capability probe: backend is unsupported.
+            "get|sshd|backend": (1, Exception("Invalid command")),
+            # Subsequent jails do not trigger another probe; they use cached result.
+            # (The mock doesn't have get|nginx|backend because it shouldn't be called.)
+            "get|sshd|bantime": (0, 600),
+            "get|sshd|findtime": (0, 600),
+            "get|sshd|maxretry": (0, 5),
+            "get|nginx|bantime": (0, 600),
+            "get|nginx|findtime": (0, 600),
+            "get|nginx|maxretry": (0, 5),
+        }
+        with _patch_client(responses):
+            result = await jail_service.list_jails(_SOCKET)
+
+        # Both jails should return default values (cached result is False).
+        for jail in result.jails:
+            assert jail.backend == "polling"
+            assert jail.idle is False


 class TestGetJail:
@@ -339,6 +419,28 @@ class TestJailControls:
                _SOCKET, include_jails=["new"], exclude_jails=["old"]
            )

+    async def test_reload_all_unknown_jail_raises_jail_not_found(self) -> None:
+        """reload_all detects UnknownJailException and raises JailNotFoundError.
+
+        When fail2ban cannot load a jail due to invalid configuration (e.g.,
+        missing logpath), it raises UnknownJailException during reload. This
+        test verifies that reload_all detects this and re-raises as
+        JailNotFoundError instead of the generic JailOperationError.
+        """
+        with _patch_client(
+            {
+                "status": _make_global_status("sshd"),
+                "reload|--all|[]|[['start', 'airsonic-auth'], ['start', 'sshd']]": (
+                    1,
+                    Exception("UnknownJailException('airsonic-auth')"),
+                ),
+            }
+        ), pytest.raises(jail_service.JailNotFoundError) as exc_info:
+            await jail_service.reload_all(
+                _SOCKET, include_jails=["airsonic-auth"]
+            )
+        assert exc_info.value.name == "airsonic-auth"
+
    async def test_start_not_found_raises(self) -> None:
        """start_jail raises JailNotFoundError for unknown jail."""
        with _patch_client({"start|ghost": (1, Exception("Unknown jail: 'ghost'"))}), pytest.raises(JailNotFoundError):