fix: retry, semaphore, reload lock, activation verify, bans_by_jail diagnostics
Stage 1.1-1.3: reload_all include/exclude_jails params already implemented; added keyword-arg assertions in router and service tests. Stage 2.1/6.1: _send_command_sync retry loop (3 attempts, 150ms exp backoff) retrying on EAGAIN/ECONNREFUSED/ENOBUFS; immediate raise on all other errors. Stage 2.2: asyncio.Lock at module level in jail_service.reload_all to serialize concurrent reload--all commands. Stage 3.1: activate_jail re-queries _get_active_jail_names after reload; returns active=False with descriptive message if jail did not start. Stage 4.1/6.2: asyncio.Semaphore (max 10) in Fail2BanClient.send, lazy- initialized; logs fail2ban_command_waiting_semaphore at debug when waiting. Stage 5.1/5.2: unit tests asserting reload_all is called with include_jails and exclude_jails; activation verification happy/sad path tests. Stage 6.3: TestSendCommandSyncRetry (5 cases) + TestFail2BanClientSemaphore concurrency test. Stage 7.1-7.3: _since_unix uses time.time(); bans_by_jail debug logging with since_iso; diagnostic warning when total==0 despite table rows; unit test verifying the warning fires for stale data.
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
"""Tests for app.utils.fail2ban_client."""
|
||||
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
@@ -287,6 +288,21 @@ class TestFail2BanClientSend:
|
||||
with pytest.raises(Fail2BanProtocolError):
|
||||
await client.send(["status"])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_raises_on_protocol_error(self) -> None:
|
||||
"""``send()`` must propagate :class:`Fail2BanProtocolError` to the caller."""
|
||||
client = Fail2BanClient(socket_path="/fake/fail2ban.sock")
|
||||
|
||||
with patch("asyncio.get_event_loop") as mock_get_loop:
|
||||
mock_loop = AsyncMock()
|
||||
mock_loop.run_in_executor = AsyncMock(
|
||||
side_effect=Fail2BanProtocolError("bad pickle")
|
||||
)
|
||||
mock_get_loop.return_value = mock_loop
|
||||
|
||||
with pytest.raises(Fail2BanProtocolError):
|
||||
await client.send(["status"])
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_send_logs_error_on_protocol_error(self) -> None:
|
||||
"""``send()`` must log an error when a protocol error occurs."""
|
||||
@@ -307,3 +323,202 @@ class TestFail2BanClientSend:
|
||||
if c[0][0] == "fail2ban_protocol_error"
|
||||
]
|
||||
assert len(error_calls) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests for _send_command_sync retry logic (Stage 6.1 / 6.3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSendCommandSyncRetry:
|
||||
"""Tests for the retry-on-transient-OSError logic in :func:`_send_command_sync`."""
|
||||
|
||||
def _make_sock(self) -> MagicMock:
|
||||
"""Return a mock socket that connects without error."""
|
||||
mock_sock = MagicMock()
|
||||
mock_sock.connect.return_value = None
|
||||
return mock_sock
|
||||
|
||||
def _eagain(self) -> OSError:
|
||||
"""Return an ``OSError`` with ``errno.EAGAIN``."""
|
||||
import errno as _errno
|
||||
|
||||
err = OSError("Resource temporarily unavailable")
|
||||
err.errno = _errno.EAGAIN
|
||||
return err
|
||||
|
||||
def _enoent(self) -> OSError:
|
||||
"""Return an ``OSError`` with ``errno.ENOENT``."""
|
||||
import errno as _errno
|
||||
|
||||
err = OSError("No such file or directory")
|
||||
err.errno = _errno.ENOENT
|
||||
return err
|
||||
|
||||
def test_transient_eagain_retried_succeeds_on_second_attempt(self) -> None:
|
||||
"""A single EAGAIN on connect is retried; success on the second attempt."""
|
||||
from app.utils.fail2ban_client import _PROTO_END
|
||||
|
||||
call_count = 0
|
||||
|
||||
def _connect_side_effect(sock_path: str) -> None:
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
raise self._eagain()
|
||||
# Second attempt succeeds (no-op).
|
||||
|
||||
mock_sock = self._make_sock()
|
||||
mock_sock.connect.side_effect = _connect_side_effect
|
||||
mock_sock.recv.return_value = _PROTO_END
|
||||
expected = [0, "pong"]
|
||||
|
||||
with (
|
||||
patch("socket.socket", return_value=mock_sock),
|
||||
patch("app.utils.fail2ban_client.loads", return_value=expected),
|
||||
patch("app.utils.fail2ban_client.time.sleep"), # suppress backoff delay
|
||||
):
|
||||
result = _send_command_sync("/fake.sock", ["ping"], 1.0)
|
||||
|
||||
assert result == expected
|
||||
assert call_count == 2
|
||||
|
||||
def test_three_eagain_failures_raise_connection_error(self) -> None:
|
||||
"""Three consecutive EAGAIN failures must raise :class:`Fail2BanConnectionError`."""
|
||||
mock_sock = self._make_sock()
|
||||
mock_sock.connect.side_effect = self._eagain()
|
||||
|
||||
with (
|
||||
patch("socket.socket", return_value=mock_sock),
|
||||
patch("app.utils.fail2ban_client.time.sleep"),
|
||||
pytest.raises(Fail2BanConnectionError),
|
||||
):
|
||||
_send_command_sync("/fake.sock", ["status"], 1.0)
|
||||
|
||||
# connect() should have been called exactly _RETRY_MAX_ATTEMPTS times.
|
||||
from app.utils.fail2ban_client import _RETRY_MAX_ATTEMPTS
|
||||
|
||||
assert mock_sock.connect.call_count == _RETRY_MAX_ATTEMPTS
|
||||
|
||||
def test_enoent_raises_immediately_without_retry(self) -> None:
|
||||
"""A non-retryable ``OSError`` (``ENOENT``) must be raised on the first attempt."""
|
||||
mock_sock = self._make_sock()
|
||||
mock_sock.connect.side_effect = self._enoent()
|
||||
|
||||
with (
|
||||
patch("socket.socket", return_value=mock_sock),
|
||||
patch("app.utils.fail2ban_client.time.sleep") as mock_sleep,
|
||||
pytest.raises(Fail2BanConnectionError),
|
||||
):
|
||||
_send_command_sync("/fake.sock", ["status"], 1.0)
|
||||
|
||||
# No back-off sleep should have been triggered.
|
||||
mock_sleep.assert_not_called()
|
||||
assert mock_sock.connect.call_count == 1
|
||||
|
||||
def test_protocol_error_never_retried(self) -> None:
|
||||
"""A :class:`Fail2BanProtocolError` must be re-raised immediately."""
|
||||
from app.utils.fail2ban_client import _PROTO_END
|
||||
|
||||
mock_sock = self._make_sock()
|
||||
mock_sock.recv.return_value = _PROTO_END
|
||||
|
||||
with (
|
||||
patch("socket.socket", return_value=mock_sock),
|
||||
patch(
|
||||
"app.utils.fail2ban_client.loads",
|
||||
side_effect=Exception("bad pickle"),
|
||||
),
|
||||
patch("app.utils.fail2ban_client.time.sleep") as mock_sleep,
|
||||
pytest.raises(Fail2BanProtocolError),
|
||||
):
|
||||
_send_command_sync("/fake.sock", ["status"], 1.0)
|
||||
|
||||
mock_sleep.assert_not_called()
|
||||
|
||||
def test_retry_emits_structured_log_event(self) -> None:
|
||||
"""Each retry attempt logs a ``fail2ban_socket_retry`` warning."""
|
||||
mock_sock = self._make_sock()
|
||||
mock_sock.connect.side_effect = self._eagain()
|
||||
|
||||
with (
|
||||
patch("socket.socket", return_value=mock_sock),
|
||||
patch("app.utils.fail2ban_client.time.sleep"),
|
||||
patch("app.utils.fail2ban_client.log") as mock_log,
|
||||
pytest.raises(Fail2BanConnectionError),
|
||||
):
|
||||
_send_command_sync("/fake.sock", ["status"], 1.0)
|
||||
|
||||
retry_calls = [
|
||||
c for c in mock_log.warning.call_args_list
|
||||
if c[0][0] == "fail2ban_socket_retry"
|
||||
]
|
||||
from app.utils.fail2ban_client import _RETRY_MAX_ATTEMPTS
|
||||
|
||||
# One retry log per attempt except the last (which raises directly).
|
||||
assert len(retry_calls) == _RETRY_MAX_ATTEMPTS - 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests for Fail2BanClient semaphore (Stage 6.2 / 6.3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFail2BanClientSemaphore:
|
||||
"""Tests for the concurrency semaphore in :meth:`Fail2BanClient.send`."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_semaphore_limits_concurrency(self) -> None:
|
||||
"""No more than _COMMAND_SEMAPHORE_CONCURRENCY commands overlap."""
|
||||
import asyncio as _asyncio
|
||||
|
||||
import app.utils.fail2ban_client as _module
|
||||
|
||||
# Reset module-level semaphore so this test starts fresh.
|
||||
_module._command_semaphore = None
|
||||
|
||||
concurrency_limit = 3
|
||||
_module._COMMAND_SEMAPHORE_CONCURRENCY = concurrency_limit
|
||||
_module._command_semaphore = _asyncio.Semaphore(concurrency_limit)
|
||||
|
||||
in_flight: list[int] = []
|
||||
peak_concurrent: list[int] = []
|
||||
|
||||
async def _slow_send(command: list[Any]) -> Any:
|
||||
in_flight.append(1)
|
||||
peak_concurrent.append(len(in_flight))
|
||||
await _asyncio.sleep(0) # yield to allow other coroutines to run
|
||||
in_flight.pop()
|
||||
return (0, "ok")
|
||||
|
||||
client = Fail2BanClient(socket_path="/fake/fail2ban.sock")
|
||||
with patch.object(client, "send", wraps=_slow_send) as _patched:
|
||||
# Bypass the semaphore wrapper — test the actual send directly.
|
||||
pass
|
||||
|
||||
# Override _command_semaphore and run concurrently via the real send path
|
||||
# but mock _send_command_sync to avoid actual socket I/O.
|
||||
async def _fast_executor(_fn: Any, *_args: Any) -> Any:
|
||||
in_flight.append(1)
|
||||
peak_concurrent.append(len(in_flight))
|
||||
await _asyncio.sleep(0)
|
||||
in_flight.pop()
|
||||
return (0, "ok")
|
||||
|
||||
client2 = Fail2BanClient(socket_path="/fake/fail2ban.sock")
|
||||
with patch("asyncio.get_event_loop") as mock_loop_getter:
|
||||
mock_loop = MagicMock()
|
||||
mock_loop.run_in_executor = _fast_executor
|
||||
mock_loop_getter.return_value = mock_loop
|
||||
|
||||
tasks = [
|
||||
_asyncio.create_task(client2.send(["ping"])) for _ in range(10)
|
||||
]
|
||||
await _asyncio.gather(*tasks)
|
||||
|
||||
# Peak concurrent activity must never exceed the semaphore limit.
|
||||
assert max(peak_concurrent) <= concurrency_limit
|
||||
|
||||
# Restore module defaults after test.
|
||||
_module._COMMAND_SEMAPHORE_CONCURRENCY = 10
|
||||
_module._command_semaphore = None
|
||||
|
||||
Reference in New Issue
Block a user