Fix fail2ban runtime errors: jail not found, action locks, log noise
This commit implements fixes for three independent bugs in the fail2ban configuration and integration layer: 1. Task 1: Detect UnknownJailException and prevent silent failures - Added JailNotFoundError detection in jail_service.reload_all() - Enhanced error handling in config_file_service to catch JailNotFoundError - Added specific error message with logpath validation hints - Added rollback test for this scenario 2. Task 2: Fix iptables-allports exit code 4 (xtables lock contention) - Added global banaction setting in jail.conf with -w 5 lockingopt - Removed redundant per-jail banaction overrides from bangui-sim and blocklist-import - Added production compose documentation note 3. Task 3: Suppress log noise from unsupported backend/idle commands - Implemented capability detection to cache command support status - Double-check locking to minimize lock contention - Avoids sending unsupported get <jail> backend/idle commands - Returns default values without socket calls when unsupported All changes include comprehensive tests and maintain backward compatibility.
This commit is contained in:
@@ -1231,6 +1231,30 @@ async def activate_jail(
|
||||
# ---------------------------------------------------------------------- #
|
||||
try:
|
||||
await jail_service.reload_all(socket_path, include_jails=[name])
|
||||
except jail_service.JailNotFoundError as exc:
|
||||
# Jail configuration is invalid (e.g. missing logpath that prevents
|
||||
# fail2ban from loading the jail). Roll back and provide a specific error.
|
||||
log.warning(
|
||||
"reload_after_activate_failed_jail_not_found",
|
||||
jail=name,
|
||||
error=str(exc),
|
||||
)
|
||||
recovered = await _rollback_activation_async(
|
||||
config_dir, name, socket_path, original_content
|
||||
)
|
||||
return JailActivationResponse(
|
||||
name=name,
|
||||
active=False,
|
||||
fail2ban_running=False,
|
||||
recovered=recovered,
|
||||
validation_warnings=warnings,
|
||||
message=(
|
||||
f"Jail {name!r} activation failed: {str(exc)}. "
|
||||
"Check that all logpath files exist and are readable. "
|
||||
"The configuration was "
|
||||
+ ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
|
||||
),
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("reload_after_activate_failed", jail=name, error=str(exc))
|
||||
recovered = await _rollback_activation_async(
|
||||
|
||||
@@ -43,6 +43,13 @@ _SOCKET_TIMEOUT: float = 10.0
|
||||
# ensures only one reload stream is in-flight at a time.
|
||||
_reload_all_lock: asyncio.Lock = asyncio.Lock()
|
||||
|
||||
# Capability detection for optional fail2ban transmitter commands (backend, idle).
|
||||
# These commands are not supported in all fail2ban versions. Caching the result
|
||||
# avoids sending unsupported commands every polling cycle and spamming the
|
||||
# fail2ban log with "Invalid command" errors.
|
||||
_backend_cmd_supported: bool | None = None
|
||||
_backend_cmd_lock: asyncio.Lock = asyncio.Lock()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Custom exceptions
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -185,6 +192,51 @@ async def _safe_get(
|
||||
return default
|
||||
|
||||
|
||||
async def _check_backend_cmd_supported(
|
||||
client: Fail2BanClient,
|
||||
jail_name: str,
|
||||
) -> bool:
|
||||
"""Detect whether the fail2ban daemon supports optional ``get ... backend`` command.
|
||||
|
||||
Some fail2ban versions (e.g. LinuxServer.io container) do not implement the
|
||||
optional ``get <jail> backend`` and ``get <jail> idle`` transmitter sub-commands.
|
||||
This helper probes the daemon once and caches the result to avoid repeated
|
||||
"Invalid command" errors in the fail2ban log.
|
||||
|
||||
Uses double-check locking to minimize lock contention in concurrent polls.
|
||||
|
||||
Args:
|
||||
client: The :class:`~app.utils.fail2ban_client.Fail2BanClient` to use.
|
||||
jail_name: Name of any jail to use for the probe command.
|
||||
|
||||
Returns:
|
||||
``True`` if the command is supported, ``False`` otherwise.
|
||||
Once determined, the result is cached and reused for all jails.
|
||||
"""
|
||||
global _backend_cmd_supported
|
||||
|
||||
# Fast path: return cached result if already determined.
|
||||
if _backend_cmd_supported is not None:
|
||||
return _backend_cmd_supported
|
||||
|
||||
# Slow path: acquire lock and probe the command once.
|
||||
async with _backend_cmd_lock:
|
||||
# Double-check idiom: another coroutine may have probed while we waited.
|
||||
if _backend_cmd_supported is not None:
|
||||
return _backend_cmd_supported
|
||||
|
||||
# Probe: send the command and catch any exception.
|
||||
try:
|
||||
_ok(await client.send(["get", jail_name, "backend"]))
|
||||
_backend_cmd_supported = True
|
||||
log.debug("backend_cmd_supported_detected")
|
||||
except Exception:
|
||||
_backend_cmd_supported = False
|
||||
log.debug("backend_cmd_unsupported_detected")
|
||||
|
||||
return _backend_cmd_supported
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API — Jail listing & detail
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -238,7 +290,11 @@ async def _fetch_jail_summary(
|
||||
"""Fetch and build a :class:`~app.models.jail.JailSummary` for one jail.
|
||||
|
||||
Sends the ``status``, ``get ... bantime``, ``findtime``, ``maxretry``,
|
||||
``backend``, and ``idle`` commands in parallel.
|
||||
``backend``, and ``idle`` commands in parallel (if supported).
|
||||
|
||||
The ``backend`` and ``idle`` commands are optional and not supported in
|
||||
all fail2ban versions. If not supported, this function will not send them
|
||||
to avoid spamming the fail2ban log with "Invalid command" errors.
|
||||
|
||||
Args:
|
||||
client: Shared :class:`~app.utils.fail2ban_client.Fail2BanClient`.
|
||||
@@ -247,15 +303,38 @@ async def _fetch_jail_summary(
|
||||
Returns:
|
||||
A :class:`~app.models.jail.JailSummary` populated from the responses.
|
||||
"""
|
||||
_r = await asyncio.gather(
|
||||
# Check whether optional backend/idle commands are supported.
|
||||
# This probe happens once per session and is cached to avoid repeated
|
||||
# "Invalid command" errors in the fail2ban log.
|
||||
backend_cmd_is_supported = await _check_backend_cmd_supported(client, name)
|
||||
|
||||
# Build the gather list based on command support.
|
||||
gather_list: list[Any] = [
|
||||
client.send(["status", name, "short"]),
|
||||
client.send(["get", name, "bantime"]),
|
||||
client.send(["get", name, "findtime"]),
|
||||
client.send(["get", name, "maxretry"]),
|
||||
client.send(["get", name, "backend"]),
|
||||
client.send(["get", name, "idle"]),
|
||||
return_exceptions=True,
|
||||
)
|
||||
]
|
||||
|
||||
if backend_cmd_is_supported:
|
||||
# Commands are supported; send them for real values.
|
||||
gather_list.extend([
|
||||
client.send(["get", name, "backend"]),
|
||||
client.send(["get", name, "idle"]),
|
||||
])
|
||||
uses_backend_backend_commands = True
|
||||
else:
|
||||
# Commands not supported; return default values without sending.
|
||||
async def _return_default(value: Any) -> tuple[int, Any]:
|
||||
return (0, value)
|
||||
|
||||
gather_list.extend([
|
||||
_return_default("polling"), # backend default
|
||||
_return_default(False), # idle default
|
||||
])
|
||||
uses_backend_backend_commands = False
|
||||
|
||||
_r = await asyncio.gather(*gather_list, return_exceptions=True)
|
||||
status_raw: Any = _r[0]
|
||||
bantime_raw: Any = _r[1]
|
||||
findtime_raw: Any = _r[2]
|
||||
@@ -569,7 +648,10 @@ async def reload_all(
|
||||
exclude_jails: Jail names to remove from the start stream.
|
||||
|
||||
Raises:
|
||||
JailOperationError: If fail2ban reports the operation failed.
|
||||
JailNotFoundError: If a jail in *include_jails* does not exist or
|
||||
its configuration is invalid (e.g. missing logpath).
|
||||
JailOperationError: If fail2ban reports the operation failed for
|
||||
a different reason.
|
||||
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
|
||||
cannot be reached.
|
||||
"""
|
||||
@@ -593,6 +675,12 @@ async def reload_all(
|
||||
_ok(await client.send(["reload", "--all", [], stream]))
|
||||
log.info("all_jails_reloaded")
|
||||
except ValueError as exc:
|
||||
# Detect UnknownJailException (missing or invalid jail configuration)
|
||||
# and re-raise as JailNotFoundError for better error specificity.
|
||||
if _is_not_found_error(exc):
|
||||
# Extract the jail name from include_jails if available.
|
||||
jail_name = include_jails[0] if include_jails else "unknown"
|
||||
raise JailNotFoundError(jail_name) from exc
|
||||
raise JailOperationError(str(exc)) from exc
|
||||
|
||||
|
||||
|
||||
@@ -3110,4 +3110,68 @@ class TestActivateJailRollback:
|
||||
assert result.active is False
|
||||
assert result.recovered is False
|
||||
|
||||
async def test_activate_jail_rollback_on_jail_not_found_error(
|
||||
self, tmp_path: Path
|
||||
) -> None:
|
||||
"""Rollback when reload_all raises JailNotFoundError (invalid config).
|
||||
|
||||
When fail2ban cannot create a jail due to invalid configuration
|
||||
(e.g., missing logpath), it raises UnknownJailException which becomes
|
||||
JailNotFoundError. This test verifies proper handling and rollback.
|
||||
|
||||
Expects:
|
||||
- The .local file is restored to its original content.
|
||||
- The response indicates recovered=True.
|
||||
- The error message mentions the logpath issue.
|
||||
"""
|
||||
from app.models.config import ActivateJailRequest, JailValidationResult
|
||||
from app.services.jail_service import JailNotFoundError
|
||||
|
||||
_write(tmp_path / "jail.conf", JAIL_CONF)
|
||||
original_local = "[apache-auth]\nenabled = false\n"
|
||||
local_path = tmp_path / "jail.d" / "apache-auth.local"
|
||||
local_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
local_path.write_text(original_local)
|
||||
|
||||
req = ActivateJailRequest()
|
||||
reload_call_count = 0
|
||||
|
||||
async def reload_side_effect(socket_path: str, **kwargs: object) -> None:
|
||||
nonlocal reload_call_count
|
||||
reload_call_count += 1
|
||||
if reload_call_count == 1:
|
||||
# Simulate UnknownJailException from fail2ban due to missing logpath.
|
||||
raise JailNotFoundError("apache-auth")
|
||||
# Recovery reload succeeds.
|
||||
|
||||
with (
|
||||
patch(
|
||||
"app.services.config_file_service._get_active_jail_names",
|
||||
new=AsyncMock(return_value=set()),
|
||||
),
|
||||
patch("app.services.config_file_service.jail_service") as mock_js,
|
||||
patch(
|
||||
"app.services.config_file_service._probe_fail2ban_running",
|
||||
new=AsyncMock(return_value=True),
|
||||
),
|
||||
patch(
|
||||
"app.services.config_file_service._validate_jail_config_sync",
|
||||
return_value=JailValidationResult(
|
||||
jail_name="apache-auth", valid=True
|
||||
),
|
||||
),
|
||||
):
|
||||
mock_js.reload_all = AsyncMock(side_effect=reload_side_effect)
|
||||
mock_js.JailNotFoundError = JailNotFoundError
|
||||
result = await activate_jail(
|
||||
str(tmp_path), "/fake.sock", "apache-auth", req
|
||||
)
|
||||
|
||||
assert result.active is False
|
||||
assert result.recovered is True
|
||||
assert local_path.read_text() == original_local
|
||||
# Verify the error message mentions logpath issues.
|
||||
assert "logpath" in result.message.lower() or "check that all logpath" in result.message.lower()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -184,10 +184,90 @@ class TestListJails:
|
||||
with patch("app.services.jail_service.Fail2BanClient", _FailClient), pytest.raises(Fail2BanConnectionError):
|
||||
await jail_service.list_jails(_SOCKET)
|
||||
|
||||
async def test_backend_idle_commands_unsupported(self) -> None:
|
||||
"""list_jails handles unsupported backend and idle commands gracefully.
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# get_jail
|
||||
# ---------------------------------------------------------------------------
|
||||
When the fail2ban daemon does not support get ... backend/idle commands,
|
||||
list_jails should not send them, avoiding "Invalid command" errors in the
|
||||
fail2ban log.
|
||||
"""
|
||||
# Reset the capability cache to test detection.
|
||||
jail_service._backend_cmd_supported = None
|
||||
|
||||
responses = {
|
||||
"status": _make_global_status("sshd"),
|
||||
"status|sshd|short": _make_short_status(),
|
||||
# Capability probe: get backend fails (command not supported).
|
||||
"get|sshd|backend": (1, Exception("Invalid command (no get action or not yet implemented)")),
|
||||
# Subsequent gets should still work.
|
||||
"get|sshd|bantime": (0, 600),
|
||||
"get|sshd|findtime": (0, 600),
|
||||
"get|sshd|maxretry": (0, 5),
|
||||
}
|
||||
with _patch_client(responses):
|
||||
result = await jail_service.list_jails(_SOCKET)
|
||||
|
||||
# Verify the result uses the default values for backend and idle.
|
||||
jail = result.jails[0]
|
||||
assert jail.backend == "polling" # default
|
||||
assert jail.idle is False # default
|
||||
# Capability should now be cached as False.
|
||||
assert jail_service._backend_cmd_supported is False
|
||||
|
||||
async def test_backend_idle_commands_supported(self) -> None:
|
||||
"""list_jails detects and sends backend/idle commands when supported."""
|
||||
# Reset the capability cache to test detection.
|
||||
jail_service._backend_cmd_supported = None
|
||||
|
||||
responses = {
|
||||
"status": _make_global_status("sshd"),
|
||||
"status|sshd|short": _make_short_status(),
|
||||
# Capability probe: get backend succeeds.
|
||||
"get|sshd|backend": (0, "systemd"),
|
||||
# All other commands.
|
||||
"get|sshd|bantime": (0, 600),
|
||||
"get|sshd|findtime": (0, 600),
|
||||
"get|sshd|maxretry": (0, 5),
|
||||
"get|sshd|idle": (0, True),
|
||||
}
|
||||
with _patch_client(responses):
|
||||
result = await jail_service.list_jails(_SOCKET)
|
||||
|
||||
# Verify real values are returned.
|
||||
jail = result.jails[0]
|
||||
assert jail.backend == "systemd" # real value
|
||||
assert jail.idle is True # real value
|
||||
# Capability should now be cached as True.
|
||||
assert jail_service._backend_cmd_supported is True
|
||||
|
||||
async def test_backend_idle_commands_cached_after_first_probe(self) -> None:
|
||||
"""list_jails caches capability result and reuses it across polling cycles."""
|
||||
# Reset the capability cache.
|
||||
jail_service._backend_cmd_supported = None
|
||||
|
||||
responses = {
|
||||
"status": _make_global_status("sshd, nginx"),
|
||||
# Probes happen once per polling cycle (for the first jail listed).
|
||||
"status|sshd|short": _make_short_status(),
|
||||
"status|nginx|short": _make_short_status(),
|
||||
# Capability probe: backend is unsupported.
|
||||
"get|sshd|backend": (1, Exception("Invalid command")),
|
||||
# Subsequent jails do not trigger another probe; they use cached result.
|
||||
# (The mock doesn't have get|nginx|backend because it shouldn't be called.)
|
||||
"get|sshd|bantime": (0, 600),
|
||||
"get|sshd|findtime": (0, 600),
|
||||
"get|sshd|maxretry": (0, 5),
|
||||
"get|nginx|bantime": (0, 600),
|
||||
"get|nginx|findtime": (0, 600),
|
||||
"get|nginx|maxretry": (0, 5),
|
||||
}
|
||||
with _patch_client(responses):
|
||||
result = await jail_service.list_jails(_SOCKET)
|
||||
|
||||
# Both jails should return default values (cached result is False).
|
||||
for jail in result.jails:
|
||||
assert jail.backend == "polling"
|
||||
assert jail.idle is False
|
||||
|
||||
|
||||
class TestGetJail:
|
||||
@@ -339,6 +419,28 @@ class TestJailControls:
|
||||
_SOCKET, include_jails=["new"], exclude_jails=["old"]
|
||||
)
|
||||
|
||||
async def test_reload_all_unknown_jail_raises_jail_not_found(self) -> None:
|
||||
"""reload_all detects UnknownJailException and raises JailNotFoundError.
|
||||
|
||||
When fail2ban cannot load a jail due to invalid configuration (e.g.,
|
||||
missing logpath), it raises UnknownJailException during reload. This
|
||||
test verifies that reload_all detects this and re-raises as
|
||||
JailNotFoundError instead of the generic JailOperationError.
|
||||
"""
|
||||
with _patch_client(
|
||||
{
|
||||
"status": _make_global_status("sshd"),
|
||||
"reload|--all|[]|[['start', 'airsonic-auth'], ['start', 'sshd']]": (
|
||||
1,
|
||||
Exception("UnknownJailException('airsonic-auth')"),
|
||||
),
|
||||
}
|
||||
), pytest.raises(jail_service.JailNotFoundError) as exc_info:
|
||||
await jail_service.reload_all(
|
||||
_SOCKET, include_jails=["airsonic-auth"]
|
||||
)
|
||||
assert exc_info.value.name == "airsonic-auth"
|
||||
|
||||
async def test_start_not_found_raises(self) -> None:
|
||||
"""start_jail raises JailNotFoundError for unknown jail."""
|
||||
with _patch_client({"start|ghost": (1, Exception("Unknown jail: 'ghost'"))}), pytest.raises(JailNotFoundError):
|
||||
|
||||
Reference in New Issue
Block a user