diff --git a/Docker/compose.prod.yml b/Docker/compose.prod.yml index 1348282..0e1ee72 100644 --- a/Docker/compose.prod.yml +++ b/Docker/compose.prod.yml @@ -37,6 +37,11 @@ services: timeout: 5s start_period: 15s retries: 3 + # NOTE: The fail2ban-config volume must be pre-populated with the following files: + # • fail2ban/jail.conf (or jail.d/*.conf) with the DEFAULT section containing: + # banaction = iptables-allports[lockingopt="-w 5"] + # This prevents xtables lock contention errors when multiple jails start in parallel. + # See https://fail2ban.readthedocs.io/en/latest/development/environment.html # ── Backend (FastAPI + uvicorn) ───────────────────────────── backend: diff --git a/Docker/fail2ban-dev-config/fail2ban/jail.d/bangui-sim.conf b/Docker/fail2ban-dev-config/fail2ban/jail.d/bangui-sim.conf index 59cb310..8137a82 100644 --- a/Docker/fail2ban-dev-config/fail2ban/jail.d/bangui-sim.conf +++ b/Docker/fail2ban-dev-config/fail2ban/jail.d/bangui-sim.conf @@ -14,7 +14,6 @@ backend = polling maxretry = 3 findtime = 120 bantime = 60 -banaction = iptables-allports # Never ban localhost, the Docker bridge network, or the host machine. ignoreip = 127.0.0.0/8 ::1 172.16.0.0/12 diff --git a/Docker/fail2ban-dev-config/fail2ban/jail.d/blocklist-import.conf b/Docker/fail2ban-dev-config/fail2ban/jail.d/blocklist-import.conf index 1271f58..568619b 100644 --- a/Docker/fail2ban-dev-config/fail2ban/jail.d/blocklist-import.conf +++ b/Docker/fail2ban-dev-config/fail2ban/jail.d/blocklist-import.conf @@ -20,7 +20,6 @@ maxretry = 1 findtime = 1d # Block imported IPs for one week. bantime = 1w -banaction = iptables-allports # Never ban the Docker bridge network or localhost. ignoreip = 127.0.0.0/8 ::1 172.16.0.0/12 diff --git a/Docs/Tasks.md b/Docs/Tasks.md index d95a2c2..5a6c580 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -4,160 +4,180 @@ This document breaks the entire BanGUI project into development stages, ordered --- -## Task 3 — Fix Transparent Pie Chart Slices and Match Legend Label Colors to Slice Colors +## Agent Operating Instructions -### Root Cause +These instructions apply to every AI agent working in this repository. Read them fully before touching any file. -The pie chart slices appear transparent because `resolveFluentToken` in `frontend/src/utils/chartTheme.ts` fails to resolve Fluent UI CSS custom properties. It calls `getComputedStyle(document.documentElement)` — but `document.documentElement` is the `` element, and Fluent UI v9's `FluentProvider` injects its CSS custom properties on its own wrapper `
`, **not** on `` or `:root`. Therefore: +### Repository Layout -1. `getComputedStyle(document.documentElement).getPropertyValue('--colorPaletteBlueBorderActive')` returns `""` (empty string). -2. `resolveFluentToken` falls back to returning the raw token string, e.g. `"var(--colorPaletteBlueBorderActive)"`. -3. Recharts internally parses colour values for SVG rendering and animation interpolation. It cannot parse a `var(...)` reference so the SVG `fill` attribute ends up transparent/unset. - -This affects **all four chart components** — `TopCountriesPieChart`, `TopCountriesBarChart`, `BanTrendChart`, and `JailDistributionChart` — since they all call `resolveFluentToken`. However the pie chart is the most visually obvious case because its slices have no fallback colour. - -### Fix - -**File:** `frontend/src/utils/chartTheme.ts` — `resolveFluentToken` function (around line 30) - -Change the element passed to `getComputedStyle` from `document.documentElement` to the FluentProvider's wrapper element. The FluentProvider wrapper always has the CSS class `fui-FluentProvider` (this is a stable class name defined in `@fluentui/react-provider`). Query for it with `document.querySelector`: - -```typescript -export function resolveFluentToken(tokenValue: string): string { - const match = /var\((--[^,)]+)/.exec(tokenValue); - if (match == null || match[1] == null) return tokenValue; - - // FluentProvider injects CSS custom properties on its own wrapper
, - // not on :root. Query that element so we resolve actual colour values. - const el = - document.querySelector(".fui-FluentProvider") ?? document.documentElement; - const resolved = getComputedStyle(el) - .getPropertyValue(match[1]) - .trim(); - return resolved !== "" ? resolved : tokenValue; -} +``` +backend/app/ FastAPI application (Python 3.12+, async) + models/ Pydantic v2 models + repositories/ Database access (aiosqlite) + routers/ FastAPI routers — thin, delegate to services + services/ Business logic; all fail2ban interaction lives here + tasks/ APScheduler background jobs + utils/ Shared helpers (fail2ban_client.py, ip_utils.py, …) +backend/tests/ pytest-asyncio test suite mirroring app/ structure +Docker/ Compose files, Dockerfiles, dev config for fail2ban + fail2ban-dev-config/ Bind-mounted into the fail2ban container in debug mode +frontend/src/ React + TypeScript SPA (Vite, Fluent UI) +fail2ban-master/ Vendored fail2ban source — DO NOT EDIT +Docs/ Architecture, design notes, this file ``` -This is the **only change needed** in this file. Do not modify `CHART_PALETTE` or any other export. +### Coding Conventions -### Verification +- **Python**: async/await throughout. Use `structlog` for logging (`log.info/warning/error` with keyword args). Pydantic v2 models. Type-annotated functions. +- **Error handling**: Raise domain-specific exceptions (`JailNotFoundError`, `JailOperationError`, `Fail2BanConnectionError`) from service functions; let routers map them to HTTP responses. Never swallow exceptions silently in routers. +- **fail2ban socket**: All communication goes through `app.utils.fail2ban_client.Fail2BanClient`. Use `_safe_get` when a missing command is non-fatal; use `_ok()` when the response is required. +- **Tests**: Mirror the `app/` tree under `tests/`. Use `pytest-asyncio` + `unittest.mock.patch` or `AsyncMock`. Keep fixtures in `conftest.py`. Run with `pytest backend/tests` from the repo root (venv at `.venv/`). +- **Docker dev config**: `Docker/fail2ban-dev-config/` is bind-mounted read-write into the fail2ban container. Changes here take effect after `fail2ban-client reload`. +- **Compose**: Use `Docker/compose.debug.yml` for local development; `Docker/compose.prod.yml` for production. Never commit secrets. -After applying the fix above: +### How to Verify Changes -- Open the Dashboard page in the browser. -- The pie chart slices must be filled with the palette colours (blue, red, green, gold, purple). -- The bar chart, area chart, and jail distribution chart should also display their colours correctly. -- The legend labels next to the pie chart must have the same font colour as their corresponding slice (this was already implemented by the previous agent in `TopCountriesPieChart.tsx` via the `legendFormatter` that wraps text in a ``). Since `entry.color` comes from the Recharts payload which reads the Cell `fill`, once the fill values are real hex strings the legend colours will also be correct. +1. Run the backend test suite: `cd /home/lukas/Volume/repo/BanGUI && .venv/bin/pytest backend/tests -x -q` +2. Check for Python type errors: `.venv/bin/mypy backend/app` (if mypy is installed). +3. For runtime verification, start the debug stack: `docker compose -f Docker/compose.debug.yml up`. +4. Inspect fail2ban logs inside the container: `docker exec bangui-fail2ban-dev fail2ban-client status` and `docker logs bangui-fail2ban-dev`. -### What NOT to change +### Agent Workflow -- Do **not** modify `TopCountriesPieChart.tsx` — the `legendFormatter` changes already applied there are correct and will work once colours resolve properly. -- Do **not** modify `CHART_PALETTE` or switch from Fluent tokens to hard-coded hex values — the token-based approach is correct; only the resolution target element was wrong. -- Do **not** add refs or hooks to individual chart components — the single-line fix in `resolveFluentToken` is sufficient. +1. Read the task description fully. Read every source file mentioned before editing anything. +2. Make the minimal change that solves the stated problem — do not refactor surrounding code. +3. After every code change run the test suite to confirm no regressions. +4. If a fix requires a config file change in `Docker/fail2ban-dev-config/`, also update `Docker/compose.prod.yml` or document the required change if it affects the production config volume. +5. Mark the task complete only after tests pass and the root-cause error no longer appears. --- -## Task 4 — Merge Global Tab into Server Tab (Remove Duplicates) +## Bug Fixes — fail2ban Runtime Errors (2026-03-14) -The Global tab (`frontend/src/components/config/GlobalTab.tsx`) and the Server tab (`frontend/src/components/config/ServerTab.tsx`) both expose the same four editable fields: **Log Level**, **Log Target**, **DB Purge Age**, and **DB Max Matches**. The server tab additionally shows read-only **DB Path** and **Syslog Socket** fields, plus a **Flush Logs** button. Having both tabs is confusing and can cause conflicting writes. - -### Changes - -1. **Remove the Global tab entirely.** - - In `frontend/src/pages/ConfigPage.tsx`: - - Remove `"global"` from the `TabValue` union type. - - Remove the `Global` element from the ``. - - Remove the `{tab === "global" && }` conditional render. - - Remove the `GlobalTab` import. - - In the barrel export file (`frontend/src/components/config/index.ts`): remove the `GlobalTab` re-export. - - Delete the file `frontend/src/components/config/GlobalTab.tsx`. - -2. **Ensure the Server tab retains all functionality.** It already has all four editable fields plus extra read-only info and Flush Logs. No changes needed in `ServerTab.tsx` — it already covers everything Global had. Verify the field hints from Global ("Ban records older than this…" and "Maximum number of log-line matches…") are present on the Server tab's DB Purge Age and DB Max Matches fields. If they are missing, copy them over from `GlobalTab.tsx` before deleting it. - -3. **Backend**: No backend changes needed. The `PUT /api/config/global` endpoint stays; the Server tab already uses its own update mechanism via `useServerSettings`. +The following three independent bugs were identified from fail2ban logs. They are ordered by severity. Resolve them in order; each is self-contained. --- -## Task 5 — Merge Map Tab into Server Tab +### Task 1 — `UnknownJailException('airsonic-auth')` on reload -The Map tab (`frontend/src/components/config/MapTab.tsx`) configures map color thresholds (Low / Medium / High). Move this section into the Server tab so all server-side and display configuration is in one place. +**Priority**: High +**Files**: `Docker/fail2ban-dev-config/fail2ban/jail.d/airsonic-auth.conf`, `backend/app/services/config_file_service.py` -### Changes +#### Observed error -1. **Add a "Map Color Thresholds" section to `ServerTab.tsx`.** - - Below the existing server settings card and Flush Logs button, add a new `sectionCard` block containing the map threshold form fields (Low, Medium, High) with the same validation logic currently in `MapTab.tsx` (high > medium > low, all positive integers). - - Use `useAutoSave` to save thresholds, calling `updateMapColorThresholds` from `frontend/src/api/config.ts`. Fetch initial values with `fetchMapColorThresholds`. - - Add a section heading: "Map Color Thresholds" and the description text explaining how thresholds drive country fill colors on the World Map page. - -2. **Remove the Map tab.** - - In `frontend/src/pages/ConfigPage.tsx`: remove `"map"` from `TabValue`, remove the `Map` element, remove the conditional render, remove the `MapTab` import. - - In the barrel export: remove the `MapTab` re-export. - - Delete `frontend/src/components/config/MapTab.tsx`. - -3. **Backend**: No changes needed. The `GET/PUT /api/config/map-color-thresholds` endpoints stay as-is. - ---- - -## Task 6 — Merge Log Tab into Server Tab - -The Log tab (`frontend/src/components/config/LogTab.tsx`) shows a Service Health panel and a log viewer. Move both into the Server tab to consolidate all server-related views. - -### Changes - -1. **Add a "Service Health" section to `ServerTab.tsx`.** - - Below the map thresholds section (from Task 5), add the service health grid showing: online status badge, fail2ban version, active jail count, total bans, total failures, log level (read-only), log target (read-only). Fetch this data from `fetchServiceStatus` in `frontend/src/api/config.ts`. - -2. **Add the "Log Viewer" section to `ServerTab.tsx`.** - - Below the health panel, add the log viewer with all its existing controls: line count selector, filter input, refresh button, auto-refresh toggle, and the colour-coded log display. Migrate all the log-related state, refs, and helper functions from `LogTab.tsx`. - - Keep the line-severity colour coding (ERROR=red, WARNING=yellow, DEBUG=gray). - -3. **Remove the Log tab.** - - In `frontend/src/pages/ConfigPage.tsx`: remove `"log"` from `TabValue`, remove the `Log` element, remove the conditional render, remove the `LogTab` import. - - In the barrel export: remove the `LogTab` re-export. - - Delete `frontend/src/components/config/LogTab.tsx`. - -4. **Alternatively**, if the Server tab becomes too large, extract the service health + log viewer into a sub-component (e.g. `ServerHealthSection.tsx`) and render it inside `ServerTab.tsx`. This keeps the code manageable but still presents a single tab to the user. - -5. **Backend**: No changes needed. - ---- - -## Task 7 — Add Reload / Restart Button to Server Section - -Add a button (or two buttons) in the Server tab that allows the user to reload or restart the fail2ban service on demand. - -### Backend - -A reload endpoint already exists: `POST /api/config/reload` (see `backend/app/routers/config.py`, around line 342). It calls `jail_service.reload_all()` which stops and restarts all jails with the current config. No new backend endpoint is needed for reload. - -For a **restart** (full daemon restart, not just reload), check whether the backend already supports it. If not, add a new endpoint: - -- `POST /api/config/restart` — calls `jail_service.restart()` (or equivalent `fail2ban-client restart`). This should fully stop and restart the fail2ban daemon. Return 204 on success, 502 if fail2ban is unreachable. - -### Frontend - -**File:** `frontend/src/components/config/ServerTab.tsx` - -A frontend API function `reloadConfig` already exists in `frontend/src/api/config.ts` (around line 94). - -1. In the Server tab's button row (next to the existing "Flush Logs" button), add two new buttons: - - **"Reload fail2ban"** — calls `reloadConfig()`. Show a loading spinner while in progress, and a success/error `MessageBar` when done. Use a descriptive icon such as `ArrowSync24Regular` or `ArrowClockwise24Regular`. - - **"Restart fail2ban"** — calls the restart API (if added above, or the same reload endpoint if a full restart is not available). Use a warning-style appearance or a confirmation dialog before executing, since a restart briefly takes fail2ban offline. - -2. Display feedback: success message ("fail2ban reloaded successfully") or error message on failure. - -3. Optionally, after a successful reload/restart, re-fetch the service health data so the health panel updates immediately. - -### Frontend API - -If a restart endpoint was added to the backend, add a corresponding function in `frontend/src/api/config.ts`: - -```typescript -export async function restartFail2Ban(): Promise { - await post(ENDPOINTS.configRestart, undefined); -} +``` +fail2ban.transmitter ERROR Command ['reload', '--all', [], [['start', 'airsonic-auth'], + ['start', 'bangui-sim'], ['start', 'blocklist-import']]] has failed. + Received UnknownJailException('airsonic-auth') ``` -And add `configRestart: "/api/config/restart"` to the `ENDPOINTS` object. +#### Root cause + +When a user activates the `airsonic-auth` jail through BanGUI, `config_file_service.py` writes a local override and then calls `jail_service.reload_all(socket_path, include_jails=['airsonic-auth'])`. The reload stream therefore contains `['start', 'airsonic-auth']`. fail2ban cannot create the jail object because its `logpath` (`/remotelogs/airsonic/airsonic.log`) does not exist in the dev environment — the airsonic service is not running and no log volume is mounted. fail2ban registers a config-level parse failure for the jail and the server falls back to `UnknownJailException` when the reload asks it to start that name. + +The current error handling in `config_file_service.py` catches the reload exception and rolls back the config file, but it does so with a generic `except Exception` and logs only a warning. The real error is never surfaced to the user in a way that explains why activation failed. + +#### What to implement + +1. **Dev-config fix** (Docker/fail2ban-dev-config): The `airsonic-auth.conf` file is a sample jail showing how a remote-log jail is configured. Add a prominent comment block at the top explaining that this jail is intentionally `enabled = false` and that it will fail to start unless the `/remotelogs/airsonic/` log directory is mounted into the fail2ban container. This makes the intent explicit and prevents confusion. + +2. **Logpath pre-validation in the activation service** (`config_file_service.py`): Before calling `reload_all` during jail activation, read the `logpath` values from the jail's config (parse the `.conf` and any `.local` override using the existing `conffile_parser`). For each logpath that is not `/dev/null` and does not contain a glob wildcard, check whether the path exists on the filesystem (the backend container shares the fail2ban config volume with read-write access). If any required logpath is missing, abort the activation immediately — do not write the local override, do not call reload — and return a `JailActivationResponse` with `active=False` and a clear `message` explaining which logpath is missing. Add a `validation_warnings` entry listing the missing paths. + +3. **Specific exception detection** (`jail_service.py`): In `reload_all`, when `_ok()` raises `ValueError` and the message matches `unknownjail` / `unknown jail` (use the existing `_is_not_found_error` helper), re-raise a `JailNotFoundError` instead of the generic `JailOperationError`. Update callers in `config_file_service.py` to catch `JailNotFoundError` separately and include the jail name in the activation failure message. + +#### Acceptance criteria + +- Attempting to activate `airsonic-auth` (without mounting the log volume) returns a 422-class response with a message mentioning the missing logpath — no `UnknownJailException` appears in the fail2ban log. +- Activating `bangui-sim` (whose logpath exists in dev) continues to work correctly. +- All existing tests in `backend/tests/` pass without modification. --- + +### Task 2 — `iptables-allports` action fails with exit code 4 (`Script error`) on `blocklist-import` jail + +**Priority**: High +**Files**: `Docker/fail2ban-dev-config/fail2ban/jail.d/blocklist-import.conf`, `Docker/fail2ban-dev-config/fail2ban/jail.d/bangui-sim.conf`, `Docker/fail2ban-dev-config/fail2ban/jail.conf` + +#### Observed error + +``` +fail2ban.utils ERROR 753c588a7860 -- returned 4 +fail2ban.actions ERROR Failed to execute ban jail 'blocklist-import' action + 'iptables-allports' … Error starting action + Jail('blocklist-import')/iptables-allports: 'Script error' +``` + +#### Root cause + +Exit code 4 from an iptables invocation means the xtables advisory lock could not be obtained (another iptables call was in progress simultaneously). The `iptables-allports` `actionstart` script does not pass the `-w` (wait-for-lock) flag by default. In the Docker dev environment, fail2ban starts multiple jails in parallel during `reload --all`. Each jail's `actionstart` sub-process calls iptables concurrently. The xtables lock contention causes some of them to exit with code 4. fail2ban interprets any non-zero exit from an action script as `Script error` and aborts the action. + +A secondary risk: if the host kernel uses `nf_tables` (iptables-nft) but the container runs `iptables-legacy` binaries, the same exit code 4 can also appear due to backend mismatch. + +#### What to implement + +1. **Add the xtables lock-wait flag globally** in `Docker/fail2ban-dev-config/fail2ban/jail.conf`, under the `[DEFAULT]` section. Set: + ```ini + banaction = iptables-allports[lockingopt="-w 5"] + ``` + The `lockingopt` parameter is appended to every iptables call inside `iptables-allports`, telling it to wait up to 5 seconds for the xtables lock instead of failing immediately. This is the canonical fix recommended in the fail2ban documentation for containerised deployments. + +2. **Remove the redundant `banaction` overrides** from `blocklist-import.conf` and `bangui-sim.conf` (both already specify `banaction = iptables-allports` which now comes from the global default). Removing the per-jail override keeps configuration DRY and ensures the lock-wait flag applies consistently. + +3. **Production compose note**: Add a comment in `Docker/compose.prod.yml` near the fail2ban service block stating that the `fail2ban-config` volume must contain a `jail.d/` with `banaction = iptables-allports[lockingopt="-w 5"]` or equivalent, because the production volume is not pre-seeded by the repository. This is a documentation action only — do not auto-seed the production volume. + +#### Acceptance criteria + +- After `docker compose -f Docker/compose.debug.yml restart fail2ban`, the fail2ban log shows no `returned 4` or `Script error` lines during startup. +- `docker exec bangui-fail2ban-dev fail2ban-client status blocklist-import` reports the jail as running. +- `docker exec bangui-fail2ban-dev fail2ban-client status bangui-sim` reports the jail as running. + +--- + +### Task 3 — Suppress log noise from unsupported `get idle/backend` commands + +**Priority**: Low +**Files**: `backend/app/services/jail_service.py` + +#### Observed error + +``` +fail2ban.transmitter ERROR Command ['get', 'bangui-sim', 'idle'] has failed. + Received Exception('Invalid command (no get action or not yet implemented)') +fail2ban.transmitter ERROR Command ['get', 'bangui-sim', 'backend'] has failed. + Received Exception('Invalid command (no get action or not yet implemented)') +… (repeated ~15 times per polling cycle) +``` + +#### Root cause + +`_fetch_jail_summary()` in `jail_service.py` sends `["get", name, "backend"]` and `["get", name, "idle"]` to the fail2ban daemon via the socket. The running fail2ban version (LinuxServer.io container image) does not implement these two sub-commands in its transmitter. fail2ban logs every unrecognised command as an `ERROR` on its side. BanGUI already handles this gracefully — `asyncio.gather(return_exceptions=True)` captures the exceptions and `_safe_bool` / `_safe_str` fall back to `False` / `"polling"` respectively. The BanGUI application does not malfunction, but the fail2ban log is flooded with spurious `ERROR` lines on every jail-list refresh (~15 errors per request across all jails, repeated on every dashboard poll). + +#### What to implement + +The fix is a capability-detection probe executed once at startup (or at most once per polling cycle, re-used for all jails). The probe sends `["get", "", "backend"]` and caches the result in a module-level boolean flag. + +1. In `jail_service.py`, add a module-level `asyncio.Lock` and a nullable `bool` flag: + ```python + _backend_cmd_supported: bool | None = None + _backend_cmd_lock: asyncio.Lock = asyncio.Lock() + ``` + +2. Add an async helper `_check_backend_cmd_supported(client, jail_name) -> bool` that: + - Returns the cached value immediately if already determined. + - Acquires `_backend_cmd_lock`, checks again (double-check idiom), then sends `["get", jail_name, "backend"]`. + - Sets `_backend_cmd_supported = True` if the command returns without exception, `False` if it raises any `Exception`. + - Returns the flag value. + +3. In `_fetch_jail_summary()`, call this helper (using the first available jail name) before the main `asyncio.gather`. If the flag is `False`, replace the `["get", name, "backend"]` and `["get", name, "idle"]` gather entries with `asyncio.coroutine` constants that immediately return the default values (`"polling"` and `False`) without sending any socket command. This eliminates the fail2ban-side log noise entirely without changing the public API or the fallback values. + +4. If `_backend_cmd_supported` becomes `True` (future fail2ban version), the commands are sent as before and real values are returned. + +#### Acceptance criteria + +- After one polling cycle where `backend`/`idle` are detected as unsupported, the fail2ban log shows zero `Invalid command` lines for those commands on subsequent refreshes. +- The `GET /api/jails` response still returns `backend: "polling"` and `idle: false` for each jail (unchanged defaults). +- All existing tests in `backend/tests/` pass. +- The detection result is reset if `_backend_cmd_supported` is `None` (i.e., on application restart), so a fail2ban upgrade that adds support is detected automatically within one polling cycle. + +--- + diff --git a/backend/app/services/config_file_service.py b/backend/app/services/config_file_service.py index 3360b4b..6a4d901 100644 --- a/backend/app/services/config_file_service.py +++ b/backend/app/services/config_file_service.py @@ -1231,6 +1231,30 @@ async def activate_jail( # ---------------------------------------------------------------------- # try: await jail_service.reload_all(socket_path, include_jails=[name]) + except jail_service.JailNotFoundError as exc: + # Jail configuration is invalid (e.g. missing logpath that prevents + # fail2ban from loading the jail). Roll back and provide a specific error. + log.warning( + "reload_after_activate_failed_jail_not_found", + jail=name, + error=str(exc), + ) + recovered = await _rollback_activation_async( + config_dir, name, socket_path, original_content + ) + return JailActivationResponse( + name=name, + active=False, + fail2ban_running=False, + recovered=recovered, + validation_warnings=warnings, + message=( + f"Jail {name!r} activation failed: {str(exc)}. " + "Check that all logpath files exist and are readable. " + "The configuration was " + + ("automatically recovered." if recovered else "not recovered — manual intervention is required.") + ), + ) except Exception as exc: # noqa: BLE001 log.warning("reload_after_activate_failed", jail=name, error=str(exc)) recovered = await _rollback_activation_async( diff --git a/backend/app/services/jail_service.py b/backend/app/services/jail_service.py index 9b87c19..fda532a 100644 --- a/backend/app/services/jail_service.py +++ b/backend/app/services/jail_service.py @@ -43,6 +43,13 @@ _SOCKET_TIMEOUT: float = 10.0 # ensures only one reload stream is in-flight at a time. _reload_all_lock: asyncio.Lock = asyncio.Lock() +# Capability detection for optional fail2ban transmitter commands (backend, idle). +# These commands are not supported in all fail2ban versions. Caching the result +# avoids sending unsupported commands every polling cycle and spamming the +# fail2ban log with "Invalid command" errors. +_backend_cmd_supported: bool | None = None +_backend_cmd_lock: asyncio.Lock = asyncio.Lock() + # --------------------------------------------------------------------------- # Custom exceptions # --------------------------------------------------------------------------- @@ -185,6 +192,51 @@ async def _safe_get( return default +async def _check_backend_cmd_supported( + client: Fail2BanClient, + jail_name: str, +) -> bool: + """Detect whether the fail2ban daemon supports optional ``get ... backend`` command. + + Some fail2ban versions (e.g. LinuxServer.io container) do not implement the + optional ``get backend`` and ``get idle`` transmitter sub-commands. + This helper probes the daemon once and caches the result to avoid repeated + "Invalid command" errors in the fail2ban log. + + Uses double-check locking to minimize lock contention in concurrent polls. + + Args: + client: The :class:`~app.utils.fail2ban_client.Fail2BanClient` to use. + jail_name: Name of any jail to use for the probe command. + + Returns: + ``True`` if the command is supported, ``False`` otherwise. + Once determined, the result is cached and reused for all jails. + """ + global _backend_cmd_supported + + # Fast path: return cached result if already determined. + if _backend_cmd_supported is not None: + return _backend_cmd_supported + + # Slow path: acquire lock and probe the command once. + async with _backend_cmd_lock: + # Double-check idiom: another coroutine may have probed while we waited. + if _backend_cmd_supported is not None: + return _backend_cmd_supported + + # Probe: send the command and catch any exception. + try: + _ok(await client.send(["get", jail_name, "backend"])) + _backend_cmd_supported = True + log.debug("backend_cmd_supported_detected") + except Exception: + _backend_cmd_supported = False + log.debug("backend_cmd_unsupported_detected") + + return _backend_cmd_supported + + # --------------------------------------------------------------------------- # Public API — Jail listing & detail # --------------------------------------------------------------------------- @@ -238,7 +290,11 @@ async def _fetch_jail_summary( """Fetch and build a :class:`~app.models.jail.JailSummary` for one jail. Sends the ``status``, ``get ... bantime``, ``findtime``, ``maxretry``, - ``backend``, and ``idle`` commands in parallel. + ``backend``, and ``idle`` commands in parallel (if supported). + + The ``backend`` and ``idle`` commands are optional and not supported in + all fail2ban versions. If not supported, this function will not send them + to avoid spamming the fail2ban log with "Invalid command" errors. Args: client: Shared :class:`~app.utils.fail2ban_client.Fail2BanClient`. @@ -247,15 +303,38 @@ async def _fetch_jail_summary( Returns: A :class:`~app.models.jail.JailSummary` populated from the responses. """ - _r = await asyncio.gather( + # Check whether optional backend/idle commands are supported. + # This probe happens once per session and is cached to avoid repeated + # "Invalid command" errors in the fail2ban log. + backend_cmd_is_supported = await _check_backend_cmd_supported(client, name) + + # Build the gather list based on command support. + gather_list: list[Any] = [ client.send(["status", name, "short"]), client.send(["get", name, "bantime"]), client.send(["get", name, "findtime"]), client.send(["get", name, "maxretry"]), - client.send(["get", name, "backend"]), - client.send(["get", name, "idle"]), - return_exceptions=True, - ) + ] + + if backend_cmd_is_supported: + # Commands are supported; send them for real values. + gather_list.extend([ + client.send(["get", name, "backend"]), + client.send(["get", name, "idle"]), + ]) + uses_backend_backend_commands = True + else: + # Commands not supported; return default values without sending. + async def _return_default(value: Any) -> tuple[int, Any]: + return (0, value) + + gather_list.extend([ + _return_default("polling"), # backend default + _return_default(False), # idle default + ]) + uses_backend_backend_commands = False + + _r = await asyncio.gather(*gather_list, return_exceptions=True) status_raw: Any = _r[0] bantime_raw: Any = _r[1] findtime_raw: Any = _r[2] @@ -569,7 +648,10 @@ async def reload_all( exclude_jails: Jail names to remove from the start stream. Raises: - JailOperationError: If fail2ban reports the operation failed. + JailNotFoundError: If a jail in *include_jails* does not exist or + its configuration is invalid (e.g. missing logpath). + JailOperationError: If fail2ban reports the operation failed for + a different reason. ~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket cannot be reached. """ @@ -593,6 +675,12 @@ async def reload_all( _ok(await client.send(["reload", "--all", [], stream])) log.info("all_jails_reloaded") except ValueError as exc: + # Detect UnknownJailException (missing or invalid jail configuration) + # and re-raise as JailNotFoundError for better error specificity. + if _is_not_found_error(exc): + # Extract the jail name from include_jails if available. + jail_name = include_jails[0] if include_jails else "unknown" + raise JailNotFoundError(jail_name) from exc raise JailOperationError(str(exc)) from exc diff --git a/backend/tests/test_services/test_config_file_service.py b/backend/tests/test_services/test_config_file_service.py index 713b389..ddbd548 100644 --- a/backend/tests/test_services/test_config_file_service.py +++ b/backend/tests/test_services/test_config_file_service.py @@ -3110,4 +3110,68 @@ class TestActivateJailRollback: assert result.active is False assert result.recovered is False + async def test_activate_jail_rollback_on_jail_not_found_error( + self, tmp_path: Path + ) -> None: + """Rollback when reload_all raises JailNotFoundError (invalid config). + + When fail2ban cannot create a jail due to invalid configuration + (e.g., missing logpath), it raises UnknownJailException which becomes + JailNotFoundError. This test verifies proper handling and rollback. + + Expects: + - The .local file is restored to its original content. + - The response indicates recovered=True. + - The error message mentions the logpath issue. + """ + from app.models.config import ActivateJailRequest, JailValidationResult + from app.services.jail_service import JailNotFoundError + + _write(tmp_path / "jail.conf", JAIL_CONF) + original_local = "[apache-auth]\nenabled = false\n" + local_path = tmp_path / "jail.d" / "apache-auth.local" + local_path.parent.mkdir(parents=True, exist_ok=True) + local_path.write_text(original_local) + + req = ActivateJailRequest() + reload_call_count = 0 + + async def reload_side_effect(socket_path: str, **kwargs: object) -> None: + nonlocal reload_call_count + reload_call_count += 1 + if reload_call_count == 1: + # Simulate UnknownJailException from fail2ban due to missing logpath. + raise JailNotFoundError("apache-auth") + # Recovery reload succeeds. + + with ( + patch( + "app.services.config_file_service._get_active_jail_names", + new=AsyncMock(return_value=set()), + ), + patch("app.services.config_file_service.jail_service") as mock_js, + patch( + "app.services.config_file_service._probe_fail2ban_running", + new=AsyncMock(return_value=True), + ), + patch( + "app.services.config_file_service._validate_jail_config_sync", + return_value=JailValidationResult( + jail_name="apache-auth", valid=True + ), + ), + ): + mock_js.reload_all = AsyncMock(side_effect=reload_side_effect) + mock_js.JailNotFoundError = JailNotFoundError + result = await activate_jail( + str(tmp_path), "/fake.sock", "apache-auth", req + ) + + assert result.active is False + assert result.recovered is True + assert local_path.read_text() == original_local + # Verify the error message mentions logpath issues. + assert "logpath" in result.message.lower() or "check that all logpath" in result.message.lower() + + diff --git a/backend/tests/test_services/test_jail_service.py b/backend/tests/test_services/test_jail_service.py index 10d1a9a..9ac80c0 100644 --- a/backend/tests/test_services/test_jail_service.py +++ b/backend/tests/test_services/test_jail_service.py @@ -184,10 +184,90 @@ class TestListJails: with patch("app.services.jail_service.Fail2BanClient", _FailClient), pytest.raises(Fail2BanConnectionError): await jail_service.list_jails(_SOCKET) + async def test_backend_idle_commands_unsupported(self) -> None: + """list_jails handles unsupported backend and idle commands gracefully. -# --------------------------------------------------------------------------- -# get_jail -# --------------------------------------------------------------------------- + When the fail2ban daemon does not support get ... backend/idle commands, + list_jails should not send them, avoiding "Invalid command" errors in the + fail2ban log. + """ + # Reset the capability cache to test detection. + jail_service._backend_cmd_supported = None + + responses = { + "status": _make_global_status("sshd"), + "status|sshd|short": _make_short_status(), + # Capability probe: get backend fails (command not supported). + "get|sshd|backend": (1, Exception("Invalid command (no get action or not yet implemented)")), + # Subsequent gets should still work. + "get|sshd|bantime": (0, 600), + "get|sshd|findtime": (0, 600), + "get|sshd|maxretry": (0, 5), + } + with _patch_client(responses): + result = await jail_service.list_jails(_SOCKET) + + # Verify the result uses the default values for backend and idle. + jail = result.jails[0] + assert jail.backend == "polling" # default + assert jail.idle is False # default + # Capability should now be cached as False. + assert jail_service._backend_cmd_supported is False + + async def test_backend_idle_commands_supported(self) -> None: + """list_jails detects and sends backend/idle commands when supported.""" + # Reset the capability cache to test detection. + jail_service._backend_cmd_supported = None + + responses = { + "status": _make_global_status("sshd"), + "status|sshd|short": _make_short_status(), + # Capability probe: get backend succeeds. + "get|sshd|backend": (0, "systemd"), + # All other commands. + "get|sshd|bantime": (0, 600), + "get|sshd|findtime": (0, 600), + "get|sshd|maxretry": (0, 5), + "get|sshd|idle": (0, True), + } + with _patch_client(responses): + result = await jail_service.list_jails(_SOCKET) + + # Verify real values are returned. + jail = result.jails[0] + assert jail.backend == "systemd" # real value + assert jail.idle is True # real value + # Capability should now be cached as True. + assert jail_service._backend_cmd_supported is True + + async def test_backend_idle_commands_cached_after_first_probe(self) -> None: + """list_jails caches capability result and reuses it across polling cycles.""" + # Reset the capability cache. + jail_service._backend_cmd_supported = None + + responses = { + "status": _make_global_status("sshd, nginx"), + # Probes happen once per polling cycle (for the first jail listed). + "status|sshd|short": _make_short_status(), + "status|nginx|short": _make_short_status(), + # Capability probe: backend is unsupported. + "get|sshd|backend": (1, Exception("Invalid command")), + # Subsequent jails do not trigger another probe; they use cached result. + # (The mock doesn't have get|nginx|backend because it shouldn't be called.) + "get|sshd|bantime": (0, 600), + "get|sshd|findtime": (0, 600), + "get|sshd|maxretry": (0, 5), + "get|nginx|bantime": (0, 600), + "get|nginx|findtime": (0, 600), + "get|nginx|maxretry": (0, 5), + } + with _patch_client(responses): + result = await jail_service.list_jails(_SOCKET) + + # Both jails should return default values (cached result is False). + for jail in result.jails: + assert jail.backend == "polling" + assert jail.idle is False class TestGetJail: @@ -339,6 +419,28 @@ class TestJailControls: _SOCKET, include_jails=["new"], exclude_jails=["old"] ) + async def test_reload_all_unknown_jail_raises_jail_not_found(self) -> None: + """reload_all detects UnknownJailException and raises JailNotFoundError. + + When fail2ban cannot load a jail due to invalid configuration (e.g., + missing logpath), it raises UnknownJailException during reload. This + test verifies that reload_all detects this and re-raises as + JailNotFoundError instead of the generic JailOperationError. + """ + with _patch_client( + { + "status": _make_global_status("sshd"), + "reload|--all|[]|[['start', 'airsonic-auth'], ['start', 'sshd']]": ( + 1, + Exception("UnknownJailException('airsonic-auth')"), + ), + } + ), pytest.raises(jail_service.JailNotFoundError) as exc_info: + await jail_service.reload_all( + _SOCKET, include_jails=["airsonic-auth"] + ) + assert exc_info.value.name == "airsonic-auth" + async def test_start_not_found_raises(self) -> None: """start_jail raises JailNotFoundError for unknown jail.""" with _patch_client({"start|ghost": (1, Exception("Unknown jail: 'ghost'"))}), pytest.raises(JailNotFoundError):