Files
BanGUI/backend/app/services/jail_config_service.py
Lukas 420ea18fd9 Refactor backend services and utilities
- Update service layer implementations
- Improve configuration handling utilities
- Update documentation tasks

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-25 18:39:30 +02:00

804 lines
28 KiB
Python

"""Jail configuration management for BanGUI.
Handles parsing, validation, and lifecycle operations (activate/deactivate)
for fail2ban jail configurations. Provides functions to discover inactive
jails, validate their configurations before activation, and manage jail
overrides in jail.d/*.local files.
"""
from __future__ import annotations
import asyncio
import contextlib
import os
import re
import tempfile
from pathlib import Path
from typing import cast
import structlog
from app.exceptions import (
ConfigWriteError,
JailAlreadyActiveError,
JailAlreadyInactiveError,
JailNotFoundError,
JailNotFoundInConfigError,
)
from app.models.config import (
ActivateJailRequest,
InactiveJail,
InactiveJailListResponse,
JailActivationResponse,
JailValidationResult,
RollbackResponse,
)
from app.services import health_service
from app.utils.async_utils import run_blocking
from app.utils.config_file_utils import (
_build_inactive_jail,
_probe_fail2ban_running,
_safe_jail_name,
start_daemon,
wait_for_fail2ban,
)
from app.utils.config_file_utils import (
_get_active_jail_names as _config_file_get_active_jail_names,
)
from app.utils.config_file_utils import (
_parse_jails_sync as _config_file_parse_jails_sync,
)
from app.utils.config_file_utils import (
_validate_jail_config_sync as _config_file_validate_jail_config_sync,
)
from app.utils.jail_socket import reload_all
log: structlog.stdlib.BoundLogger = structlog.get_logger()
def _parse_jails_sync(config_dir: Path) -> tuple[dict[str, dict[str, str]], dict[str, str]]:
"""Parse jail config files using the shared config helper."""
return _config_file_parse_jails_sync(config_dir)
async def _get_active_jail_names(socket_path: str) -> set[str]:
"""Return the currently active jail names from fail2ban."""
return await _config_file_get_active_jail_names(socket_path)
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
# Sections that are not jail definitions.
_META_SECTIONS: frozenset[str] = frozenset({"INCLUDES", "DEFAULT"})
# Seconds to wait between fail2ban liveness probes after a reload.
_POST_RELOAD_PROBE_INTERVAL: float = 2.0
async def run_probe(socket_path: str) -> "ServerStatus":
"""Run a health probe against the fail2ban socket."""
return await health_service.probe(socket_path)
# Maximum number of post-reload probe attempts (initial attempt + retries).
_POST_RELOAD_MAX_ATTEMPTS: int = 4
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _write_local_override_sync(
config_dir: Path,
jail_name: str,
enabled: bool,
overrides: dict[str, object],
) -> None:
"""Write a ``jail.d/{name}.local`` file atomically.
Always writes to ``jail.d/{jail_name}.local``. If the file already
exists it is replaced entirely. The write is atomic: content is
written to a temp file first, then renamed into place.
Args:
config_dir: The fail2ban configuration root directory.
jail_name: Validated jail name (used as filename stem).
enabled: Value to write for ``enabled =``.
overrides: Optional setting overrides (bantime, findtime, maxretry,
port, logpath).
Raises:
ConfigWriteError: If writing fails.
"""
jail_d = config_dir / "jail.d"
try:
jail_d.mkdir(parents=True, exist_ok=True)
except OSError as exc:
raise ConfigWriteError(f"Cannot create jail.d directory: {exc}") from exc
local_path = jail_d / f"{jail_name}.local"
lines: list[str] = [
"# Managed by BanGUI — do not edit manually",
"",
f"[{jail_name}]",
"",
f"enabled = {'true' if enabled else 'false'}",
# Provide explicit banaction defaults so fail2ban can resolve the
# %(banaction)s interpolation used in the built-in action_ chain.
"banaction = iptables-multiport",
"banaction_allports = iptables-allports",
]
if overrides.get("bantime") is not None:
lines.append(f"bantime = {overrides['bantime']}")
if overrides.get("findtime") is not None:
lines.append(f"findtime = {overrides['findtime']}")
if overrides.get("maxretry") is not None:
lines.append(f"maxretry = {overrides['maxretry']}")
if overrides.get("port") is not None:
lines.append(f"port = {overrides['port']}")
if overrides.get("logpath"):
paths: list[str] = cast("list[str]", overrides["logpath"])
if paths:
lines.append(f"logpath = {paths[0]}")
for p in paths[1:]:
lines.append(f" {p}")
content = "\n".join(lines) + "\n"
try:
with tempfile.NamedTemporaryFile(
mode="w",
encoding="utf-8",
dir=jail_d,
delete=False,
suffix=".tmp",
) as tmp:
tmp.write(content)
tmp_name = tmp.name
os.replace(tmp_name, local_path)
except OSError as exc:
# Clean up temp file if rename failed.
with contextlib.suppress(OSError):
os.unlink(tmp_name) # noqa: F821 — only reachable when tmp_name is set
raise ConfigWriteError(f"Failed to write {local_path}: {exc}") from exc
log.info(
"jail_local_written",
jail=jail_name,
path=str(local_path),
enabled=enabled,
)
def _restore_local_file_sync(local_path: Path, original_content: bytes | None) -> None:
"""Restore a ``.local`` file to its pre-activation state.
If *original_content* is ``None``, the file is deleted (it did not exist
before the activation). Otherwise the original bytes are written back
atomically via a temp-file rename.
Args:
local_path: Absolute path to the ``.local`` file to restore.
original_content: Original raw bytes to write back, or ``None`` to
delete the file.
Raises:
ConfigWriteError: If the write or delete operation fails.
"""
if original_content is None:
try:
local_path.unlink(missing_ok=True)
except OSError as exc:
raise ConfigWriteError(f"Failed to delete {local_path} during rollback: {exc}") from exc
return
tmp_name: str | None = None
try:
with tempfile.NamedTemporaryFile(
mode="wb",
dir=local_path.parent,
delete=False,
suffix=".tmp",
) as tmp:
tmp.write(original_content)
tmp_name = tmp.name
os.replace(tmp_name, local_path)
except OSError as exc:
with contextlib.suppress(OSError):
if tmp_name is not None:
os.unlink(tmp_name)
raise ConfigWriteError(f"Failed to restore {local_path} during rollback: {exc}") from exc
def _validate_regex_patterns(patterns: list[str]) -> None:
"""Validate each pattern in *patterns* using Python's ``re`` module.
Args:
patterns: List of regex strings to validate.
Raises:
FilterInvalidRegexError: If any pattern fails to compile.
"""
for pattern in patterns:
try:
re.compile(pattern)
except re.error as exc:
# Import here to avoid circular dependency
from app.exceptions import FilterInvalidRegexError
raise FilterInvalidRegexError(pattern, str(exc)) from exc
# Shared functions from the legacy config helper are imported directly from
# the canonical shared helper module.
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def list_inactive_jails(
config_dir: str,
socket_path: str,
) -> InactiveJailListResponse:
"""Return all jails defined in config files that are not currently active.
Parses ``jail.conf``, ``jail.local``, and ``jail.d/`` following the
fail2ban merge order. A jail is considered inactive when:
- Its merged ``enabled`` value is ``false`` (or absent, which defaults to
``false`` in fail2ban), **or**
- Its ``enabled`` value is ``true`` in config but fail2ban does not report
it as running.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
socket_path: Path to the fail2ban Unix domain socket.
Returns:
:class:`~app.models.config.InactiveJailListResponse` with all
inactive jails.
"""
parsed_result: tuple[dict[str, dict[str, str]], dict[str, str]] = await run_blocking(
_parse_jails_sync,
Path(config_dir),
)
all_jails, source_files = parsed_result
active_names: set[str] = await _get_active_jail_names(socket_path)
inactive: list[InactiveJail] = []
for jail_name, settings in sorted(all_jails.items()):
if jail_name in active_names:
# fail2ban reports this jail as running — skip it.
continue
source = source_files.get(jail_name, config_dir)
inactive.append(_build_inactive_jail(jail_name, settings, source, Path(config_dir)))
log.info(
"inactive_jails_listed",
total_defined=len(all_jails),
active=len(active_names),
inactive=len(inactive),
)
return InactiveJailListResponse(jails=inactive, total=len(inactive))
async def activate_jail(
config_dir: str,
socket_path: str,
name: str,
req: ActivateJailRequest,
) -> JailActivationResponse:
"""Activate a jail and update the health-check cache.
This wrapper delegates the file-based activation workflow to the
lower-level implementation and runs an immediate probe so the UI
reflects the current fail2ban state.
"""
result = await _activate_jail(config_dir, socket_path, name, req)
await run_probe(socket_path)
return result
async def _activate_jail(
config_dir: str,
socket_path: str,
name: str,
req: ActivateJailRequest,
) -> JailActivationResponse:
"""Enable an inactive jail and reload fail2ban.
Performs pre-activation validation, writes ``enabled = true`` (plus any
override values from *req*) to ``jail.d/{name}.local``, and triggers a
full fail2ban reload. After the reload a multi-attempt health probe
determines whether fail2ban (and the specific jail) are still running.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
socket_path: Path to the fail2ban Unix domain socket.
name: Name of the jail to activate. Must exist in the parsed config.
req: Optional override values to write alongside ``enabled = true``.
Returns:
:class:`~app.models.config.JailActivationResponse` including
``fail2ban_running`` and ``validation_warnings`` fields.
Raises:
JailNameError: If *name* contains invalid characters.
JailNotFoundInConfigError: If *name* is not defined in any config file.
JailAlreadyActiveError: If fail2ban already reports *name* as running.
ConfigWriteError: If writing the ``.local`` file fails.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the fail2ban
socket is unreachable during reload.
"""
_safe_jail_name(name)
all_jails, _source_files = await run_blocking(_parse_jails_sync, Path(config_dir))
if name not in all_jails:
raise JailNotFoundInConfigError(name)
active_names = await _get_active_jail_names(socket_path)
if name in active_names:
raise JailAlreadyActiveError(name)
# ---------------------------------------------------------------------- #
# Pre-activation validation — collect warnings but do not block #
# ---------------------------------------------------------------------- #
validation_result: JailValidationResult = await run_blocking(
_config_file_validate_jail_config_sync,
Path(config_dir),
name,
)
warnings: list[str] = [f"{i.field}: {i.message}" for i in validation_result.issues]
if warnings:
log.warning(
"jail_activation_validation_warnings",
jail=name,
warnings=warnings,
)
# Block activation on critical validation failures (missing filter or logpath).
blocking = [i for i in validation_result.issues if i.field in ("filter", "logpath")]
if blocking:
log.warning(
"jail_activation_blocked",
jail=name,
issues=[f"{i.field}: {i.message}" for i in blocking],
)
return JailActivationResponse(
name=name,
active=False,
fail2ban_running=True,
validation_warnings=warnings,
message=(f"Jail {name!r} cannot be activated: " + "; ".join(i.message for i in blocking)),
)
overrides: dict[str, object] = {
"bantime": req.bantime,
"findtime": req.findtime,
"maxretry": req.maxretry,
"port": req.port,
"logpath": req.logpath,
}
# ---------------------------------------------------------------------- #
# Backup the existing .local file (if any) before overwriting it so that #
# we can restore it if activation fails. #
# ---------------------------------------------------------------------- #
local_path = Path(config_dir) / "jail.d" / f"{name}.local"
original_content: bytes | None = await run_blocking(
lambda: local_path.read_bytes() if local_path.exists() else None,
)
await run_blocking(_write_local_override_sync,
Path(config_dir),
name,
True,
overrides,
)
# ---------------------------------------------------------------------- #
# Activation reload — if it fails, roll back immediately #
# ---------------------------------------------------------------------- #
try:
await reload_all(socket_path, include_jails=[name])
except JailNotFoundError as exc:
# Jail configuration is invalid (e.g. missing logpath that prevents
# fail2ban from loading the jail). Roll back and provide a specific error.
log.warning(
"reload_after_activate_failed_jail_not_found",
jail=name,
error=str(exc),
)
recovered = await _rollback_activation_async(config_dir, name, socket_path, original_content)
return JailActivationResponse(
name=name,
active=False,
fail2ban_running=False,
recovered=recovered,
validation_warnings=warnings,
message=(
f"Jail {name!r} activation failed: {str(exc)}. "
"Check that all logpath files exist and are readable. "
"The configuration was "
+ ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
),
)
except Exception as exc: # noqa: BLE001
log.warning("reload_after_activate_failed", jail=name, error=str(exc))
recovered = await _rollback_activation_async(config_dir, name, socket_path, original_content)
return JailActivationResponse(
name=name,
active=False,
fail2ban_running=False,
recovered=recovered,
validation_warnings=warnings,
message=(
f"Jail {name!r} activation failed during reload and the "
"configuration was "
+ ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
),
)
# ---------------------------------------------------------------------- #
# Post-reload health probe with retries #
# ---------------------------------------------------------------------- #
fail2ban_running = False
for attempt in range(_POST_RELOAD_MAX_ATTEMPTS):
if attempt > 0:
await asyncio.sleep(_POST_RELOAD_PROBE_INTERVAL)
if await _probe_fail2ban_running(socket_path):
fail2ban_running = True
break
if not fail2ban_running:
log.warning(
"fail2ban_down_after_activate",
jail=name,
message="fail2ban socket unreachable after reload — initiating rollback.",
)
recovered = await _rollback_activation_async(config_dir, name, socket_path, original_content)
return JailActivationResponse(
name=name,
active=False,
fail2ban_running=False,
recovered=recovered,
validation_warnings=warnings,
message=(
f"Jail {name!r} activation failed: fail2ban stopped responding "
"after reload. The configuration was "
+ ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
),
)
# Verify the jail actually started (config error may prevent it silently).
post_reload_names = await _get_active_jail_names(socket_path)
actually_running = name in post_reload_names
if not actually_running:
log.warning(
"jail_activation_unverified",
jail=name,
message="Jail did not appear in running jails — initiating rollback.",
)
recovered = await _rollback_activation_async(config_dir, name, socket_path, original_content)
return JailActivationResponse(
name=name,
active=False,
fail2ban_running=True,
recovered=recovered,
validation_warnings=warnings,
message=(
f"Jail {name!r} was written to config but did not start after "
"reload. The configuration was "
+ ("automatically recovered." if recovered else "not recovered — manual intervention is required.")
),
)
log.info("jail_activated", jail=name)
return JailActivationResponse(
name=name,
active=True,
fail2ban_running=True,
validation_warnings=warnings,
message=f"Jail {name!r} activated successfully.",
)
async def _rollback_activation_async(
config_dir: str,
name: str,
socket_path: str,
original_content: bytes | None,
) -> bool:
"""Restore the pre-activation ``.local`` file and reload fail2ban.
Called internally by :func:`activate_jail` when the activation fails after
the config file was already written. Tries to:
1. Restore the original file content (or delete the file if it was newly
created by the activation attempt).
2. Reload fail2ban so the daemon runs with the restored configuration.
3. Probe fail2ban to confirm it came back up.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
name: Name of the jail whose ``.local`` file should be restored.
socket_path: Path to the fail2ban Unix domain socket.
original_content: Raw bytes of the original ``.local`` file, or
``None`` if the file did not exist before the activation.
Returns:
``True`` if fail2ban is responsive again after the rollback, ``False``
if recovery also failed.
"""
local_path = Path(config_dir) / "jail.d" / f"{name}.local"
# Step 1 — restore original file (or delete it).
try:
await run_blocking( _restore_local_file_sync, local_path, original_content)
log.info("jail_activation_rollback_file_restored", jail=name)
except ConfigWriteError as exc:
log.error("jail_activation_rollback_restore_failed", jail=name, error=str(exc))
return False
# Step 2 — reload fail2ban with the restored config.
try:
await reload_all(socket_path)
log.info("jail_activation_rollback_reload_ok", jail=name)
except Exception as exc: # noqa: BLE001
log.warning("jail_activation_rollback_reload_failed", jail=name, error=str(exc))
return False
# Step 3 — wait for fail2ban to come back.
for attempt in range(_POST_RELOAD_MAX_ATTEMPTS):
if attempt > 0:
await asyncio.sleep(_POST_RELOAD_PROBE_INTERVAL)
if await _probe_fail2ban_running(socket_path):
log.info("jail_activation_rollback_recovered", jail=name)
return True
log.warning("jail_activation_rollback_still_down", jail=name)
return False
async def deactivate_jail(
config_dir: str,
socket_path: str,
name: str,
) -> JailActivationResponse:
"""Deactivate a jail and update the health-check cache.
This wrapper disables the jail in the config, reloads fail2ban, and then
forces an immediate health probe so any cached dashboard status reflects
the current daemon state.
"""
result = await _deactivate_jail(config_dir, socket_path, name)
await run_probe(socket_path)
return result
async def _deactivate_jail(
config_dir: str,
socket_path: str,
name: str,
) -> JailActivationResponse:
"""Disable an active jail and reload fail2ban.
Writes ``enabled = false`` to ``jail.d/{name}.local`` and triggers a
full fail2ban reload so the jail stops immediately.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
socket_path: Path to the fail2ban Unix domain socket.
name: Name of the jail to deactivate. Must exist in the parsed config.
Returns:
:class:`~app.models.config.JailActivationResponse`.
Raises:
JailNameError: If *name* contains invalid characters.
JailNotFoundInConfigError: If *name* is not defined in any config file.
JailAlreadyInactiveError: If fail2ban already reports *name* as not
running.
ConfigWriteError: If writing the ``.local`` file fails.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the fail2ban
socket is unreachable during reload.
"""
_safe_jail_name(name)
all_jails, _source_files = await run_blocking(_config_file_parse_jails_sync, Path(config_dir))
if name not in all_jails:
raise JailNotFoundInConfigError(name)
active_names = await _get_active_jail_names(socket_path)
if name not in active_names:
raise JailAlreadyInactiveError(name)
await run_blocking(_write_local_override_sync,
Path(config_dir),
name,
False,
{},
)
try:
await reload_all(socket_path, exclude_jails=[name])
except Exception as exc: # noqa: BLE001
log.warning("reload_after_deactivate_failed", jail=name, error=str(exc))
log.info("jail_deactivated", jail=name)
return JailActivationResponse(
name=name,
active=False,
message=f"Jail {name!r} deactivated successfully.",
)
async def delete_jail_local_override(
config_dir: str,
socket_path: str,
name: str,
) -> None:
"""Delete the ``jail.d/{name}.local`` override file for an inactive jail.
This is the clean-up action shown in the config UI when an inactive jail
still has a ``.local`` override file (e.g. ``enabled = false``). The
file is deleted outright; no fail2ban reload is required because the jail
is already inactive.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
socket_path: Path to the fail2ban Unix domain socket.
name: Name of the jail whose ``.local`` file should be removed.
Raises:
JailNameError: If *name* contains invalid characters.
JailNotFoundInConfigError: If *name* is not defined in any config file.
JailAlreadyActiveError: If the jail is currently active (refusing to
delete the live config file).
ConfigWriteError: If the file cannot be deleted.
"""
_safe_jail_name(name)
all_jails, _source_files = await run_blocking(_config_file_parse_jails_sync, Path(config_dir))
if name not in all_jails:
raise JailNotFoundInConfigError(name)
active_names = await _get_active_jail_names(socket_path)
if name in active_names:
raise JailAlreadyActiveError(name)
local_path = Path(config_dir) / "jail.d" / f"{name}.local"
try:
await run_blocking( lambda: local_path.unlink(missing_ok=True))
except OSError as exc:
raise ConfigWriteError(f"Failed to delete {local_path}: {exc}") from exc
log.info("jail_local_override_deleted", jail=name, path=str(local_path))
async def validate_jail_config(
config_dir: str,
name: str,
) -> JailValidationResult:
"""Run pre-activation validation checks on a jail configuration.
Validates that referenced filter and action files exist in ``filter.d/``
and ``action.d/``, that all regex patterns compile, and that declared log
paths exist on disk.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
name: Name of the jail to validate.
Returns:
:class:`~app.models.config.JailValidationResult` with any issues found.
Raises:
JailNameError: If *name* contains invalid characters.
"""
_safe_jail_name(name)
return await run_blocking(
_config_file_validate_jail_config_sync,
Path(config_dir),
name,
)
async def rollback_jail(
config_dir: str,
socket_path: str,
name: str,
start_cmd_parts: list[str],
) -> RollbackResponse:
"""Rollback a jail and return the result.
The caller is responsible for clearing pending recovery state when the
operation succeeds.
"""
return await _rollback_jail(config_dir, socket_path, name, start_cmd_parts)
async def _rollback_jail(
config_dir: str,
socket_path: str,
name: str,
start_cmd_parts: list[str],
) -> RollbackResponse:
"""Disable a bad jail config and restart the fail2ban daemon.
Writes ``enabled = false`` to ``jail.d/{name}.local`` (works even when
fail2ban is down — only a file write), then attempts to start the daemon
with *start_cmd_parts*. Waits up to 10 seconds for the socket to respond.
Args:
config_dir: Absolute path to the fail2ban configuration directory.
socket_path: Path to the fail2ban Unix domain socket.
name: Name of the jail to disable.
start_cmd_parts: Argument list for the daemon start command, e.g.
``["fail2ban-client", "start"]``.
Returns:
:class:`~app.models.config.RollbackResponse`.
Raises:
JailNameError: If *name* contains invalid characters.
ConfigWriteError: If writing the ``.local`` file fails.
"""
_safe_jail_name(name)
# Write enabled=false — this must succeed even when fail2ban is down.
await run_blocking(_write_local_override_sync,
Path(config_dir),
name,
False,
{},
)
log.info("jail_rolled_back_disabled", jail=name)
# Attempt to start the daemon.
started = await start_daemon(start_cmd_parts)
log.info("jail_rollback_start_attempted", jail=name, start_ok=started)
# Wait for the socket to come back.
fail2ban_running = await wait_for_fail2ban(
socket_path,
max_wait_seconds=10.0,
poll_interval=2.0,
)
active_jails = 0
if fail2ban_running:
names = await _get_active_jail_names(socket_path)
active_jails = len(names)
if fail2ban_running:
log.info("jail_rollback_success", jail=name, active_jails=active_jails)
return RollbackResponse(
jail_name=name,
disabled=True,
fail2ban_running=True,
active_jails=active_jails,
message=(f"Jail {name!r} disabled and fail2ban restarted successfully with {active_jails} active jail(s)."),
)
log.warning("jail_rollback_fail2ban_still_down", jail=name)
return RollbackResponse(
jail_name=name,
disabled=True,
fail2ban_running=False,
active_jails=0,
message=(
f"Jail {name!r} was disabled but fail2ban did not come back online. "
"Check the fail2ban log for additional errors."
),
)