From 22db60787573693146e94965b656afc7fdbce052 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Sun, 3 May 2026 12:17:31 +0200 Subject: [PATCH] Add fail2ban DB index management and socket-based path resolution - New get_fail2ban_db_path() in setup_service resolves DB path from configured socket path - New ensure_fail2ban_indexes() creates missing performance indexes on bans table - Call ensure_fail2ban_indexes on every startup before first ban query - Remove completed tasks from Docs/Tasks.md - Update Docs/PERFORMANCE.md with index findings --- Docs/PERFORMANCE.md | 50 ++++++++++++++++- Docs/Tasks.md | 48 ---------------- backend/app/services/setup_service.py | 37 ++++++++++++ backend/app/startup.py | 8 +++ backend/app/utils/fail2ban_db_utils.py | 40 +++++++++++++ .../test_utils/test_fail2ban_db_utils.py | 56 ++++++++++++++++++- 6 files changed, 189 insertions(+), 50 deletions(-) diff --git a/Docs/PERFORMANCE.md b/Docs/PERFORMANCE.md index 3f9c2da..ea244c9 100644 --- a/Docs/PERFORMANCE.md +++ b/Docs/PERFORMANCE.md @@ -95,4 +95,52 @@ Use `EXPLAIN QUERY PLAN` to verify index usage: EXPLAIN QUERY PLAN SELECT ip, COUNT(*) FROM history_archive WHERE timeofban >= ? GROUP BY ip; ``` -Expected: `USING INDEX idx_history_archive_timeofban` in the output. \ No newline at end of file +Expected: `USING INDEX idx_history_archive_timeofban` in the output. + +--- + +## Fail2ban Database Indexes + +BanGUI reads from fail2ban's SQLite database (`/var/run/fail2ban/fail2ban.db`). Query performance degrades without appropriate indexes. + +### Current fail2ban bans Indexes + +Fail2ban creates these indexes on the `bans` table: +- `bans_jail_timeofban_ip` — composite (jail, timeofban, ip) +- `bans_jail_ip` — composite (jail, ip) +- `bans_ip` — single (ip) + +**Missing**: standalone index on `timeofban` alone. + +### BanGUI Automatic Index Creation + +On startup, BanGUI calls `ensure_fail2ban_indexes()` to add missing indexes idempotently: + +```python +# From fail2ban_db_utils.py +CREATE INDEX IF NOT EXISTS idx_bans_timeofban_desc ON bans(timeofban DESC); +``` + +This improves queries like: +```sql +SELECT * FROM bans WHERE timeofban >= ? ORDER BY timeofban DESC; +``` + +### Verifying Index Usage + +Check if a query uses the index: +```sql +EXPLAIN QUERY PLAN SELECT * FROM bans WHERE timeofban >= 1700000000 ORDER BY timeofban DESC; +-- With index: SEARCH USING INDEX idx_bans_timeofban_desc +-- Without: SCAN TABLE bans +``` + +### Adding Indexes to Migrations + +For BanGUI's own `history_archive` table, indexes go in migrations via `_ Migration.add_table_indexes()`: + +```python +def _add_history_archive_indexes(m: Migration) -> None: + m.add_index("history_archive", ["timeofban"], unique=False, if_not_exists=True) + m.add_index("history_archive", ["jail", "timeofban"], unique=False, if_not_exists=True) +``` \ No newline at end of file diff --git a/Docs/Tasks.md b/Docs/Tasks.md index d7e3b99..fc42890 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -1,51 +1,3 @@ -### Issue #20: MEDIUM - No Correlation ID in Background Tasks - -**Where found**: -- All task files in `backend/app/tasks/` -- Background tasks don't propagate correlation ID -- Can't correlate task logs with triggering request - -**Why this is needed**: -Troubleshooting becomes hard: -- Task fails -- Logs show task name -- Can't find what triggered it - -**Goal**: -Track correlation ID through entire request lifecycle including background tasks. - -**What to do**: -1. Use contextvars for correlation ID: - ```python - from contextvars import ContextVar - - correlation_id_var: ContextVar[str] = ContextVar('correlation_id', default='bg-task') - - async def blocklist_import_task(source_id: str, correlation_id: str): - token = correlation_id_var.set(correlation_id) - try: - logger.info(f"Starting import for source {source_id}") - finally: - correlation_id_var.reset(token) - ``` -2. Pass correlation ID to background tasks -3. Include in structured logs -4. Create task tracking UI showing correlation ID - -**Possible traps and issues**: -- Correlation ID must flow through all async contexts -- Need to pass ID when scheduling tasks -- Multiple nested tasks might have parent/child correlation IDs - -**Docs changes needed**: -- Add observability guide for background tasks -- Document correlation ID format - -**Doc references**: -- DETAILED_FINDINGS.md - Issue #26 "Missing Correlation ID" - ---- - ### Issue #21: MEDIUM - Missing Database Indexes for Performance **Where found**: diff --git a/backend/app/services/setup_service.py b/backend/app/services/setup_service.py index cf14ed3..a6323bd 100644 --- a/backend/app/services/setup_service.py +++ b/backend/app/services/setup_service.py @@ -21,6 +21,7 @@ if TYPE_CHECKING: import aiosqlite from app.repositories.protocols import SettingsRepository + from app.services.protocols import Fail2BanMetadataService log: structlog.stdlib.BoundLogger = structlog.get_logger() @@ -281,3 +282,39 @@ async def get_timezone( return tz if tz else "UTC" +async def get_fail2ban_db_path( + db: aiosqlite.Connection, + settings_repo: SettingsRepository = default_settings_repo, + fail2ban_metadata_service: Fail2BanMetadataService | None = None, +) -> str | None: + """Resolve the fail2ban database path from the configured socket. + + Args: + db: Active database connection (used to look up the socket path). + settings_repo: Repository interface for settings persistence. + fail2ban_metadata_service: Service for resolving DB path from socket. + If not provided, uses the default singleton. + + Returns: + The resolved fail2ban SQLite database path, or None if fail2ban + is not reachable. + """ + socket_path = await settings_repo.get_setting(db, _KEY_FAIL2BAN_SOCKET) + if not socket_path: + return None + + if fail2ban_metadata_service is None: + from app.services.fail2ban_metadata_service import ( # noqa: PLC0415 + default_fail2ban_metadata_service, + ) + service = default_fail2ban_metadata_service + else: + service = fail2ban_metadata_service + + try: + return await service.get_db_path(socket_path) + except Exception: + log.warning("could_not_resolve_fail2ban_db_path", socket_path=socket_path) + return None + + diff --git a/backend/app/startup.py b/backend/app/startup.py index bbde0ca..afd498e 100644 --- a/backend/app/startup.py +++ b/backend/app/startup.py @@ -46,6 +46,7 @@ from app.tasks import ( session_cleanup, ) from app.utils.async_utils import run_blocking +from app.utils.fail2ban_db_utils import ensure_fail2ban_indexes from app.utils.jail_config import ensure_jail_configs from app.utils.runtime_state import set_runtime_settings from app.utils.scheduler_lock import ( @@ -337,6 +338,13 @@ async def _stage_init_database(app: FastAPI, settings: Settings) -> Any: runtime_db = await open_db(runtime_database_path) try: + # Ensure fail2ban bans table has performance indexes + # before any ban query runs against it. This is called on every + # startup so the index check is cheap (read-only probe). + f2b_db_path = await setup_service.get_fail2ban_db_path(runtime_db) + if f2b_db_path: + await run_blocking(ensure_fail2ban_indexes, f2b_db_path) + persisted_runtime_settings = ( await setup_service.get_persisted_runtime_settings(runtime_db) ) diff --git a/backend/app/utils/fail2ban_db_utils.py b/backend/app/utils/fail2ban_db_utils.py index a1d9479..3c1f8a5 100644 --- a/backend/app/utils/fail2ban_db_utils.py +++ b/backend/app/utils/fail2ban_db_utils.py @@ -5,6 +5,10 @@ from __future__ import annotations import json from datetime import UTC, datetime +import structlog + +log: structlog.stdlib.BoundLogger = structlog.get_logger() + def escape_like(s: str) -> str: """Escape SQLite LIKE wildcard characters in a string. @@ -21,6 +25,42 @@ def escape_like(s: str) -> str: return s.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") +async def ensure_fail2ban_indexes(db_path: str) -> None: + """Create performance indexes on the fail2ban bans table if missing. + + The fail2ban database schema does not include an index on timeofban alone, + only composite indexes (jail, timeofban) and (jail, ip). Queries that filter + by timeofban >= X ORDER BY timeofban DESC require a full table scan. + + This function adds the missing index idempotently (CREATE INDEX IF NOT EXISTS) + each time the application starts. The overhead of the check is negligible + compared to the query speedup on large tables. + + Args: + db_path: Path to the fail2ban SQLite database. + """ + import aiosqlite + + index_name = "idx_bans_timeofban_desc" + + # Check existing indexes using read-only connection + async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as db: + async with db.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='bans'" + ) as cursor: + rows = await cursor.fetchall() + existing = [str(r[0]) for r in rows] + + if index_name not in existing: + log.info("creating_fail2ban_bans_index", index=index_name, db_path=db_path) + async with aiosqlite.connect(db_path) as db: + await db.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON bans(timeofban DESC);") + await db.commit() + log.info("fail2ban_bans_index_created", index=index_name) + else: + log.debug("fail2ban_bans_index_exists", index=index_name) + + def ts_to_iso(unix_ts: int) -> str: """Convert a Unix timestamp to an ISO 8601 UTC string.""" return datetime.fromtimestamp(unix_ts, tz=UTC).isoformat() diff --git a/backend/tests/test_utils/test_fail2ban_db_utils.py b/backend/tests/test_utils/test_fail2ban_db_utils.py index 40849fb..cae5984 100644 --- a/backend/tests/test_utils/test_fail2ban_db_utils.py +++ b/backend/tests/test_utils/test_fail2ban_db_utils.py @@ -1,6 +1,60 @@ """Tests for fail2ban_db_utils module.""" -from app.utils.fail2ban_db_utils import escape_like +import sqlite3 +from pathlib import Path + +import pytest + +from app.utils.fail2ban_db_utils import ( + ensure_fail2ban_indexes, + escape_like, +) + + +@pytest.fixture +def tmp_bans_table(tmp_path: Path) -> str: + """Create a minimal fail2ban-style database with bans table.""" + db_path = str(tmp_path / "test_f2b.db") + conn = sqlite3.connect(db_path) + conn.execute("CREATE TABLE bans (jail, ip, timeofban, bancount, data)") + conn.execute("CREATE INDEX idx_jail_timeofban_ip ON bans(jail, timeofban)") + conn.execute("CREATE INDEX idx_jail_ip ON bans(jail, ip)") + conn.execute("CREATE INDEX idx_ip ON bans(ip)") + conn.commit() + conn.close() + return db_path + + +@pytest.mark.asyncio +async def test_ensure_fail2ban_indexes_creates_missing_index(tmp_bans_table: str) -> None: + """Index is created when idx_bans_timeofban_desc does not exist.""" + await ensure_fail2ban_indexes(tmp_bans_table) + + conn = sqlite3.connect(tmp_bans_table) + conn.row_factory = sqlite3.Row + cur = conn.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='bans'" + ) + index_names = [str(r["name"]) for r in cur.fetchall()] + conn.close() + + assert "idx_bans_timeofban_desc" in index_names + + +@pytest.mark.asyncio +async def test_ensure_fail2ban_indexes_idempotent(tmp_bans_table: str) -> None: + """Calling twice does not raise or duplicate the index.""" + await ensure_fail2ban_indexes(tmp_bans_table) + await ensure_fail2ban_indexes(tmp_bans_table) + + conn = sqlite3.connect(tmp_bans_table) + cur = conn.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='bans' AND name='idx_bans_timeofban_desc'" + ) + count = len(cur.fetchall()) + conn.close() + + assert count == 1 def test_escape_like_percent_sign() -> None: