Fix fail2ban runtime errors: jail not found, action locks, log noise

This commit implements fixes for three independent bugs in the fail2ban configuration and integration layer: 1. Task 1: Detect UnknownJailException and prevent silent failures - Added JailNotFoundError detection in jail_service.reload_all() - Enhanced error handling in config_file_service to catch JailNotFoundError - Added specific error message with logpath validation hints - Added rollback test for this scenario 2. Task 2: Fix iptables-allports exit code 4 (xtables lock contention) - Added global banaction setting in jail.conf with -w 5 lockingopt - Removed redundant per-jail banaction overrides from bangui-sim and blocklist-import - Added production compose documentation note 3. Task 3: Suppress log noise from unsupported backend/idle commands - Implemented capability detection to cache command support status - Double-check locking to minimize lock contention - Avoids sending unsupported get <jail> backend/idle commands - Returns default values without socket calls when unsupported All changes include comprehensive tests and maintain backward compatibility.
2026-03-15 10:57:00 +01:00
parent 1e33220f59
commit f62785aaf2
8 changed files with 446 additions and 145 deletions
--- a/backend/app/services/config_file_service.py
+++ b/backend/app/services/config_file_service.py
@@ -1231,6 +1231,30 @@ async def activate_jail(
    # ---------------------------------------------------------------------- #
    try:
        await jail_service.reload_all(socket_path, include_jails=[name])
+    except jail_service.JailNotFoundError as exc:
+        # Jail configuration is invalid (e.g. missing logpath that prevents
+        # fail2ban from loading the jail). Roll back and provide a specific error.
+        log.warning(
+            "reload_after_activate_failed_jail_not_found",
+            jail=name,
+            error=str(exc),
+        )
+        recovered = await _rollback_activation_async(
+            config_dir, name, socket_path, original_content
+        )
+        return JailActivationResponse(
+            name=name,
+            active=False,
+            fail2ban_running=False,
+            recovered=recovered,
+            validation_warnings=warnings,
+            message=(
+                f"Jail {name!r} activation failed: {str(exc)}. "
+                "Check that all logpath files exist and are readable. "
+                "The configuration was "
+                + ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
+            ),
+        )
    except Exception as exc:  # noqa: BLE001
        log.warning("reload_after_activate_failed", jail=name, error=str(exc))
        recovered = await _rollback_activation_async(
--- a/backend/app/services/jail_service.py
+++ b/backend/app/services/jail_service.py
@@ -43,6 +43,13 @@ _SOCKET_TIMEOUT: float = 10.0
 # ensures only one reload stream is in-flight at a time.
 _reload_all_lock: asyncio.Lock = asyncio.Lock()

+# Capability detection for optional fail2ban transmitter commands (backend, idle).
+# These commands are not supported in all fail2ban versions. Caching the result
+# avoids sending unsupported commands every polling cycle and spamming the
+# fail2ban log with "Invalid command" errors.
+_backend_cmd_supported: bool | None = None
+_backend_cmd_lock: asyncio.Lock = asyncio.Lock()
+
 # ---------------------------------------------------------------------------
 # Custom exceptions
 # ---------------------------------------------------------------------------
@@ -185,6 +192,51 @@ async def _safe_get(
        return default


+async def _check_backend_cmd_supported(
+    client: Fail2BanClient,
+    jail_name: str,
+) -> bool:
+    """Detect whether the fail2ban daemon supports optional ``get ... backend`` command.
+
+    Some fail2ban versions (e.g. LinuxServer.io container) do not implement the
+    optional ``get <jail> backend`` and ``get <jail> idle`` transmitter sub-commands.
+    This helper probes the daemon once and caches the result to avoid repeated
+    "Invalid command" errors in the fail2ban log.
+
+    Uses double-check locking to minimize lock contention in concurrent polls.
+
+    Args:
+        client: The :class:`~app.utils.fail2ban_client.Fail2BanClient` to use.
+        jail_name: Name of any jail to use for the probe command.
+
+    Returns:
+        ``True`` if the command is supported, ``False`` otherwise.
+        Once determined, the result is cached and reused for all jails.
+    """
+    global _backend_cmd_supported
+
+    # Fast path: return cached result if already determined.
+    if _backend_cmd_supported is not None:
+        return _backend_cmd_supported
+
+    # Slow path: acquire lock and probe the command once.
+    async with _backend_cmd_lock:
+        # Double-check idiom: another coroutine may have probed while we waited.
+        if _backend_cmd_supported is not None:
+            return _backend_cmd_supported
+
+        # Probe: send the command and catch any exception.
+        try:
+            _ok(await client.send(["get", jail_name, "backend"]))
+            _backend_cmd_supported = True
+            log.debug("backend_cmd_supported_detected")
+        except Exception:
+            _backend_cmd_supported = False
+            log.debug("backend_cmd_unsupported_detected")
+
+        return _backend_cmd_supported
+
+
 # ---------------------------------------------------------------------------
 # Public API — Jail listing & detail
 # ---------------------------------------------------------------------------
@@ -238,7 +290,11 @@ async def _fetch_jail_summary(
    """Fetch and build a :class:`~app.models.jail.JailSummary` for one jail.

    Sends the ``status``, ``get ... bantime``, ``findtime``, ``maxretry``,
-    ``backend``, and ``idle`` commands in parallel.
+    ``backend``, and ``idle`` commands in parallel (if supported).
+
+    The ``backend`` and ``idle`` commands are optional and not supported in
+    all fail2ban versions. If not supported, this function will not send them
+    to avoid spamming the fail2ban log with "Invalid command" errors.

    Args:
        client: Shared :class:`~app.utils.fail2ban_client.Fail2BanClient`.
@@ -247,15 +303,38 @@ async def _fetch_jail_summary(
    Returns:
        A :class:`~app.models.jail.JailSummary` populated from the responses.
    """
-    _r = await asyncio.gather(
+    # Check whether optional backend/idle commands are supported.
+    # This probe happens once per session and is cached to avoid repeated
+    # "Invalid command" errors in the fail2ban log.
+    backend_cmd_is_supported = await _check_backend_cmd_supported(client, name)
+
+    # Build the gather list based on command support.
+    gather_list: list[Any] = [
        client.send(["status", name, "short"]),
        client.send(["get", name, "bantime"]),
        client.send(["get", name, "findtime"]),
        client.send(["get", name, "maxretry"]),
-        client.send(["get", name, "backend"]),
-        client.send(["get", name, "idle"]),
-        return_exceptions=True,
-    )
+    ]
+
+    if backend_cmd_is_supported:
+        # Commands are supported; send them for real values.
+        gather_list.extend([
+            client.send(["get", name, "backend"]),
+            client.send(["get", name, "idle"]),
+        ])
+        uses_backend_backend_commands = True
+    else:
+        # Commands not supported; return default values without sending.
+        async def _return_default(value: Any) -> tuple[int, Any]:
+            return (0, value)
+
+        gather_list.extend([
+            _return_default("polling"),  # backend default
+            _return_default(False),      # idle default
+        ])
+        uses_backend_backend_commands = False
+
+    _r = await asyncio.gather(*gather_list, return_exceptions=True)
    status_raw: Any = _r[0]
    bantime_raw: Any = _r[1]
    findtime_raw: Any = _r[2]
@@ -569,7 +648,10 @@ async def reload_all(
        exclude_jails: Jail names to remove from the start stream.

    Raises:
-        JailOperationError: If fail2ban reports the operation failed.
+        JailNotFoundError: If a jail in *include_jails* does not exist or
+            its configuration is invalid (e.g. missing logpath).
+        JailOperationError: If fail2ban reports the operation failed for
+            a different reason.
        ~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
            cannot be reached.
    """
@@ -593,6 +675,12 @@ async def reload_all(
            _ok(await client.send(["reload", "--all", [], stream]))
            log.info("all_jails_reloaded")
        except ValueError as exc:
+            # Detect UnknownJailException (missing or invalid jail configuration)
+            # and re-raise as JailNotFoundError for better error specificity.
+            if _is_not_found_error(exc):
+                # Extract the jail name from include_jails if available.
+                jail_name = include_jails[0] if include_jails else "unknown"
+                raise JailNotFoundError(jail_name) from exc
            raise JailOperationError(str(exc)) from exc