Fix restart/reload endpoint correctness and safety
- jail_service.restart(): replace invalid ["restart"] socket command with ["stop"], matching fail2ban transmitter protocol. The daemon is now stopped via socket; the caller starts it via subprocess. - config_file_service: expose _start_daemon and _wait_for_fail2ban as public start_daemon / wait_for_fail2ban functions. - restart_fail2ban router: orchestrate stop (socket) → start (subprocess) → probe (socket). Returns 204 on success, 503 when fail2ban does not come back within 10 s. Catches JailOperationError → 409. - reload_fail2ban router: add JailOperationError catch → 409 Conflict, consistent with other jail control endpoints. - Tests: add TestJailControls.test_restart_* (3 cases), TestReloadFail2ban 502/409 cases, TestRestartFail2ban (5 cases), TestRollbackJail (6 integration tests verifying file-write, subprocess invocation, socket- probe truthiness, active_jails count, and offline-at-call-time).
This commit is contained in:
@@ -40,9 +40,12 @@ from __future__ import annotations
|
||||
import datetime
|
||||
from typing import Annotated
|
||||
|
||||
import structlog
|
||||
from fastapi import APIRouter, HTTPException, Path, Query, Request, status
|
||||
|
||||
from app.dependencies import AuthDep
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
from app.models.config import (
|
||||
ActionConfig,
|
||||
ActionCreateRequest,
|
||||
@@ -97,6 +100,7 @@ from app.services.config_service import (
|
||||
ConfigValidationError,
|
||||
JailNotFoundError,
|
||||
)
|
||||
from app.services.jail_service import JailOperationError
|
||||
from app.tasks.health_check import _run_probe
|
||||
from app.utils.fail2ban_client import Fail2BanConnectionError
|
||||
|
||||
@@ -357,11 +361,17 @@ async def reload_fail2ban(
|
||||
_auth: Validated session.
|
||||
|
||||
Raises:
|
||||
HTTPException: 409 when fail2ban reports the reload failed.
|
||||
HTTPException: 502 when fail2ban is unreachable.
|
||||
"""
|
||||
socket_path: str = request.app.state.settings.fail2ban_socket
|
||||
try:
|
||||
await jail_service.reload_all(socket_path)
|
||||
except JailOperationError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"fail2ban reload failed: {exc}",
|
||||
) from exc
|
||||
except Fail2BanConnectionError as exc:
|
||||
raise _bad_gateway(exc) from exc
|
||||
|
||||
@@ -381,24 +391,57 @@ async def restart_fail2ban(
|
||||
) -> None:
|
||||
"""Trigger a full fail2ban service restart.
|
||||
|
||||
The fail2ban daemon is completely stopped and then started again,
|
||||
re-reading all configuration files in the process.
|
||||
Stops the fail2ban daemon via the Unix domain socket, then starts it
|
||||
again using the configured ``fail2ban_start_command``. After starting,
|
||||
probes the socket for up to 10 seconds to confirm the daemon came back
|
||||
online.
|
||||
|
||||
Args:
|
||||
request: Incoming request.
|
||||
_auth: Validated session.
|
||||
|
||||
Raises:
|
||||
HTTPException: 502 when fail2ban is unreachable.
|
||||
HTTPException: 409 when fail2ban reports the stop command failed.
|
||||
HTTPException: 502 when fail2ban is unreachable for the stop command.
|
||||
HTTPException: 503 when fail2ban does not come back online within
|
||||
10 seconds after being started. Check the fail2ban log for
|
||||
initialisation errors. Use
|
||||
``POST /api/config/jails/{name}/rollback`` if a specific jail
|
||||
is suspect.
|
||||
"""
|
||||
socket_path: str = request.app.state.settings.fail2ban_socket
|
||||
start_cmd: str = request.app.state.settings.fail2ban_start_command
|
||||
start_cmd_parts: list[str] = start_cmd.split()
|
||||
|
||||
# Step 1: stop the daemon via socket.
|
||||
try:
|
||||
# Perform restart by sending the restart command via the fail2ban socket.
|
||||
# If fail2ban is not running, this will raise an exception, and we return 502.
|
||||
await jail_service.restart(socket_path)
|
||||
except JailOperationError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=f"fail2ban stop command failed: {exc}",
|
||||
) from exc
|
||||
except Fail2BanConnectionError as exc:
|
||||
raise _bad_gateway(exc) from exc
|
||||
|
||||
# Step 2: start the daemon via subprocess.
|
||||
await config_file_service.start_daemon(start_cmd_parts)
|
||||
|
||||
# Step 3: probe the socket until fail2ban is responsive or the budget expires.
|
||||
fail2ban_running: bool = await config_file_service.wait_for_fail2ban(
|
||||
socket_path, max_wait_seconds=10.0
|
||||
)
|
||||
if not fail2ban_running:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail=(
|
||||
"fail2ban was stopped but did not come back online within 10 seconds. "
|
||||
"Check the fail2ban log for initialisation errors. "
|
||||
"Use POST /api/config/jails/{name}/rollback if a specific jail is suspect."
|
||||
),
|
||||
)
|
||||
log.info("fail2ban_restarted")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Regex tester (stateless)
|
||||
|
||||
Reference in New Issue
Block a user