Refactor: Move module-level mutable flags to JailServiceState

TASK-004: Replace module-level mutable runtime flags in service layer with
injected state holder, eliminating hidden global state and improving testability
and synchronization boundaries.

Changes:
- Create JailServiceState dataclass in app/utils/runtime_state.py to hold
  backend capability cache and synchronization lock
- Add JailServiceState as a field in RuntimeState (with default_factory)
- Remove module-level _backend_cmd_supported and _backend_cmd_lock from
  jail_service.py
- Refactor _check_backend_cmd_supported() to accept state parameter
- Inject JailServiceState into list_jails() and _fetch_jail_summary() via
  parameters
- Add get_jail_service_state() dependency provider in app/dependencies.py
- Add JailServiceStateDep type alias for router injection
- Update jails router to receive and pass state to service functions
- Update all tests to use jail_service_state fixture and pass state to functions
- Remove duplicate _MAX_PAGE_SIZE constant definition
- Document mutable state management in Backend-Development.md
- Update Architecture.md to describe JailServiceState and state nesting pattern

Benefits:
- Eliminates global mutable state and associated race conditions
- Makes state visible to callers (not hidden in module scope)
- Enables test isolation (each test gets fresh state)
- Prepares codebase for multi-worker deployments (state can be extracted to
  shared backend)
- Synchronization boundaries are now explicit (state.get_backend_cmd_lock())

Compliance:
- All tests pass (17 passed in TestListJails, TestGetJail, TestLockInitialization)
- No ruff linting errors
- Type-safe: JailServiceState properly typed with asyncio.Lock, bool | None

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-27 18:42:52 +02:00
parent 79112c0430
commit 2e221f6852
8 changed files with 157 additions and 102 deletions

View File

@@ -34,7 +34,7 @@ from app.services.geo_cache import GeoCache
from app.services.protocols import Fail2BanMetadataService
from app.utils.constants import SESSION_COOKIE_NAME
from app.utils.rate_limiter import RateLimiter
from app.utils.runtime_state import ApplicationState, RuntimeState
from app.utils.runtime_state import ApplicationState, JailServiceState, RuntimeState
from app.utils.session_cache import NoOpSessionCache, SessionCache
log: structlog.stdlib.BoundLogger = structlog.get_logger()
@@ -333,6 +333,18 @@ async def get_pending_recovery(
return app_context.pending_recovery
async def get_jail_service_state(
app_context: Annotated[ApplicationContext, Depends(get_app_context)],
) -> JailServiceState:
"""Return the jail service state holder from runtime state.
Returns:
The JailServiceState containing capability detection cache and
synchronization primitives for jail operations.
"""
return app_context.runtime_state.jail_service_state
async def get_health_probe() -> Callable[[str], Awaitable[ServerStatus]]:
"""Provide the health probe function for checking fail2ban connectivity.
@@ -439,6 +451,7 @@ Fail2BanStartCommandDep = Annotated[str, Depends(get_fail2ban_start_command)]
GeoCacheDep = Annotated[GeoCache, Depends(get_geo_cache)]
ServerStatusDep = Annotated[ServerStatus, Depends(get_server_status)]
PendingRecoveryDep = Annotated[PendingRecovery | None, Depends(get_pending_recovery)]
JailServiceStateDep = Annotated[JailServiceState, Depends(get_jail_service_state)]
HealthProbeDep = Annotated[Callable[[str], Awaitable[ServerStatus]], Depends(get_health_probe)]
SessionCacheDep = Annotated[SessionCache, Depends(get_session_cache)]
SessionRepoDep = Annotated[SessionRepository, Depends(get_session_repo)]

View File

@@ -29,6 +29,7 @@ from app.dependencies import (
Fail2BanSocketDep,
GeoCacheDep,
HttpSessionDep,
JailServiceStateDep,
)
from app.models.ban import JailBannedIpsResponse
from app.models.jail import (
@@ -56,6 +57,7 @@ _NamePath = Annotated[str, Path(description="Jail name as configured in fail2ban
async def get_jails(
_auth: AuthDep,
socket_path: Fail2BanSocketDep,
state: JailServiceStateDep,
) -> JailListResponse:
"""Return a summary of every active fail2ban jail.
@@ -65,11 +67,13 @@ async def get_jails(
Args:
_auth: Validated session — enforces authentication.
socket_path: Path to the fail2ban Unix domain socket.
state: The jail service state holder.
Returns:
:class:`~app.models.jail.JailListResponse` with all active jails.
"""
return await jail_service.list_jails(socket_path)
return await jail_service.list_jails(socket_path, state)
@router.get(

View File

@@ -7,6 +7,10 @@ Unix domain socket. All socket I/O is performed through the async
Architecture note: this module is a pure service — it contains **no**
HTTP/FastAPI concerns. All results are returned as Pydantic models so
routers can serialise them directly.
Mutable state (backend capability detection cache) is stored in the
JailServiceState object passed to functions that need it. This allows
for proper synchronization and test isolation.
"""
from __future__ import annotations
@@ -44,6 +48,7 @@ from app.utils.fail2ban_response import (
to_dict,
)
from app.utils.jail_socket import reload_all
from app.utils.runtime_state import JailServiceState # noqa: TC001
if TYPE_CHECKING:
from collections.abc import Awaitable
@@ -74,24 +79,8 @@ class IpLookupResult(TypedDict):
# Constants
# ---------------------------------------------------------------------------
# Capability detection for optional fail2ban transmitter commands (backend, idle).
# These commands are not supported in all fail2ban versions. Caching the result
# avoids sending unsupported commands every polling cycle and spamming the
# fail2ban log with "Invalid command" errors.
_backend_cmd_supported: bool | None = None
_backend_cmd_lock: asyncio.Lock | None = None
def _get_backend_cmd_lock() -> asyncio.Lock:
"""Return the shared backend capability probe lock, initialising it lazily.
The caller must already be running inside the event loop when the lock is
created, which is true for all service entry points in this module.
"""
global _backend_cmd_lock
if _backend_cmd_lock is None:
_backend_cmd_lock = asyncio.Lock()
return _backend_cmd_lock
#: Maximum page size for paginated ban results.
_MAX_PAGE_SIZE: int = 100
# ---------------------------------------------------------------------------
# Custom exceptions
@@ -149,6 +138,7 @@ async def _safe_get(
async def _check_backend_cmd_supported(
client: Fail2BanClient,
jail_name: str,
state: JailServiceState,
) -> bool:
"""Detect whether the fail2ban daemon supports optional ``get ... backend`` command.
@@ -162,45 +152,32 @@ async def _check_backend_cmd_supported(
Args:
client: The :class:`~app.utils.fail2ban_client.Fail2BanClient` to use.
jail_name: Name of any jail to use for the probe command.
state: The jail service state holder for capability cache.
Returns:
``True`` if the command is supported, ``False`` otherwise.
Once determined, the result is cached and reused for all jails.
"""
global _backend_cmd_supported
# Fast path: return cached result if already determined.
if _backend_cmd_supported is not None:
return _backend_cmd_supported
if state.backend_cmd_supported is not None:
return state.backend_cmd_supported
# Slow path: acquire lock and probe the command once.
async with _get_backend_cmd_lock():
async with state.get_backend_cmd_lock():
# Double-check idiom: another coroutine may have probed while we waited.
if _backend_cmd_supported is not None:
return _backend_cmd_supported
if state.backend_cmd_supported is not None:
return state.backend_cmd_supported
# Probe: send the command and catch any exception.
try:
ok(await client.send(["get", jail_name, "backend"]))
_backend_cmd_supported = True
state.backend_cmd_supported = True
log.debug("backend_cmd_supported_detected")
except Exception:
_backend_cmd_supported = False
state.backend_cmd_supported = False
log.debug("backend_cmd_unsupported_detected")
return _backend_cmd_supported
async def _reset_backend_capability_cache() -> None:
"""Reset the cached backend/idle capability detection state.
This helper is intended for test isolation and for any scenario where the
cached probe result must be invalidated before the next detection attempt.
"""
global _backend_cmd_supported
async with _get_backend_cmd_lock():
_backend_cmd_supported = None
return state.backend_cmd_supported
# ---------------------------------------------------------------------------
@@ -208,7 +185,7 @@ async def _reset_backend_capability_cache() -> None:
# ---------------------------------------------------------------------------
async def list_jails(socket_path: str) -> JailListResponse:
async def list_jails(socket_path: str, state: JailServiceState) -> JailListResponse:
"""Return a summary list of all active fail2ban jails.
Queries the daemon for the global jail list and then fetches status
@@ -216,6 +193,7 @@ async def list_jails(socket_path: str) -> JailListResponse:
Args:
socket_path: Path to the fail2ban Unix domain socket.
state: The jail service state holder for capability cache.
Returns:
:class:`~app.models.jail.JailListResponse` with all active jails.
@@ -242,7 +220,7 @@ async def list_jails(socket_path: str) -> JailListResponse:
# 2. Fetch summary data for every jail in parallel.
summaries: list[JailSummary] = await asyncio.gather(
*[_fetch_jail_summary(client, name) for name in jail_names],
*[_fetch_jail_summary(client, name, state) for name in jail_names],
return_exceptions=False,
)
@@ -252,6 +230,7 @@ async def list_jails(socket_path: str) -> JailListResponse:
async def _fetch_jail_summary(
client: Fail2BanClient,
name: str,
state: JailServiceState,
) -> JailSummary:
"""Fetch and build a :class:`~app.models.jail.JailSummary` for one jail.
@@ -265,6 +244,7 @@ async def _fetch_jail_summary(
Args:
client: Shared :class:`~app.utils.fail2ban_client.Fail2BanClient`.
name: Jail name.
state: The jail service state holder for capability cache.
Returns:
A :class:`~app.models.jail.JailSummary` populated from the responses.
@@ -272,7 +252,7 @@ async def _fetch_jail_summary(
# Check whether optional backend/idle commands are supported.
# This probe happens once per session and is cached to avoid repeated
# "Invalid command" errors in the fail2ban log.
backend_cmd_is_supported = await _check_backend_cmd_supported(client, name)
backend_cmd_is_supported = await _check_backend_cmd_supported(client, name, state)
# Build the gather list based on command support.
gather_list: list[Awaitable[object]] = [
@@ -741,9 +721,6 @@ def _parse_ban_entry(entry: str, jail: str) -> ActiveBan | None:
# Public API — Jail-specific paginated bans
# ---------------------------------------------------------------------------
#: Maximum allowed page size for :func:`get_jail_banned_ips`.
_MAX_PAGE_SIZE: int = 100
async def get_jail_banned_ips(
socket_path: str,

View File

@@ -40,6 +40,7 @@ acceptable and keeps the implementation simple.
from __future__ import annotations
import asyncio
import datetime
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any
@@ -69,10 +70,43 @@ _RUNTIME_ATTRIBUTES: frozenset[str] = frozenset(
"pending_recovery",
"last_activation",
"runtime_settings",
"jail_service_state",
}
)
@dataclass
class JailServiceState:
"""Mutable runtime state for the jail service.
Stores capability detection results and synchronization primitives used by
jail operations. This state is initialized once and shared across all
service calls within a single worker process.
"""
backend_cmd_supported: bool | None = None
backend_cmd_lock: asyncio.Lock | None = None
def get_backend_cmd_lock(self) -> asyncio.Lock:
"""Return the shared backend capability probe lock, initialising lazily.
The caller must already be running inside the event loop when the lock
is created, which is true for all service entry points.
"""
if self.backend_cmd_lock is None:
self.backend_cmd_lock = asyncio.Lock()
return self.backend_cmd_lock
async def reset_backend_capability_cache(self) -> None:
"""Reset the cached backend/idle capability detection state.
This is intended for test isolation and scenarios where the cached
probe result must be invalidated before the next detection attempt.
"""
async with self.get_backend_cmd_lock():
self.backend_cmd_supported = None
@dataclass
class RuntimeState:
"""Mutable runtime state for the current application instance."""
@@ -82,6 +116,7 @@ class RuntimeState:
pending_recovery: PendingRecovery | None = None
last_activation: ActivationRecord | None = None
runtime_settings: Settings | None = None
jail_service_state: JailServiceState = field(default_factory=JailServiceState)
class ApplicationState(State):