Add fail2ban DB index management and socket-based path resolution

- New get_fail2ban_db_path() in setup_service resolves DB path from configured socket path
- New ensure_fail2ban_indexes() creates missing performance indexes on bans table
- Call ensure_fail2ban_indexes on every startup before first ban query
- Remove completed tasks from Docs/Tasks.md
- Update Docs/PERFORMANCE.md with index findings
This commit is contained in:
Copilot
2026-05-03 12:17:31 +02:00
committed by Lukas
parent 0133489920
commit 22db607875
6 changed files with 189 additions and 50 deletions

View File

@@ -96,3 +96,51 @@ EXPLAIN QUERY PLAN SELECT ip, COUNT(*) FROM history_archive WHERE timeofban >= ?
```
Expected: `USING INDEX idx_history_archive_timeofban` in the output.
---
## Fail2ban Database Indexes
BanGUI reads from fail2ban's SQLite database (`/var/run/fail2ban/fail2ban.db`). Query performance degrades without appropriate indexes.
### Current fail2ban bans Indexes
Fail2ban creates these indexes on the `bans` table:
- `bans_jail_timeofban_ip` — composite (jail, timeofban, ip)
- `bans_jail_ip` — composite (jail, ip)
- `bans_ip` — single (ip)
**Missing**: standalone index on `timeofban` alone.
### BanGUI Automatic Index Creation
On startup, BanGUI calls `ensure_fail2ban_indexes()` to add missing indexes idempotently:
```python
# From fail2ban_db_utils.py
CREATE INDEX IF NOT EXISTS idx_bans_timeofban_desc ON bans(timeofban DESC);
```
This improves queries like:
```sql
SELECT * FROM bans WHERE timeofban >= ? ORDER BY timeofban DESC;
```
### Verifying Index Usage
Check if a query uses the index:
```sql
EXPLAIN QUERY PLAN SELECT * FROM bans WHERE timeofban >= 1700000000 ORDER BY timeofban DESC;
-- With index: SEARCH USING INDEX idx_bans_timeofban_desc
-- Without: SCAN TABLE bans
```
### Adding Indexes to Migrations
For BanGUI's own `history_archive` table, indexes go in migrations via `_ Migration.add_table_indexes()`:
```python
def _add_history_archive_indexes(m: Migration) -> None:
m.add_index("history_archive", ["timeofban"], unique=False, if_not_exists=True)
m.add_index("history_archive", ["jail", "timeofban"], unique=False, if_not_exists=True)
```

View File

@@ -1,51 +1,3 @@
### Issue #20: MEDIUM - No Correlation ID in Background Tasks
**Where found**:
- All task files in `backend/app/tasks/`
- Background tasks don't propagate correlation ID
- Can't correlate task logs with triggering request
**Why this is needed**:
Troubleshooting becomes hard:
- Task fails
- Logs show task name
- Can't find what triggered it
**Goal**:
Track correlation ID through entire request lifecycle including background tasks.
**What to do**:
1. Use contextvars for correlation ID:
```python
from contextvars import ContextVar
correlation_id_var: ContextVar[str] = ContextVar('correlation_id', default='bg-task')
async def blocklist_import_task(source_id: str, correlation_id: str):
token = correlation_id_var.set(correlation_id)
try:
logger.info(f"Starting import for source {source_id}")
finally:
correlation_id_var.reset(token)
```
2. Pass correlation ID to background tasks
3. Include in structured logs
4. Create task tracking UI showing correlation ID
**Possible traps and issues**:
- Correlation ID must flow through all async contexts
- Need to pass ID when scheduling tasks
- Multiple nested tasks might have parent/child correlation IDs
**Docs changes needed**:
- Add observability guide for background tasks
- Document correlation ID format
**Doc references**:
- DETAILED_FINDINGS.md - Issue #26 "Missing Correlation ID"
---
### Issue #21: MEDIUM - Missing Database Indexes for Performance
**Where found**:

View File

@@ -21,6 +21,7 @@ if TYPE_CHECKING:
import aiosqlite
from app.repositories.protocols import SettingsRepository
from app.services.protocols import Fail2BanMetadataService
log: structlog.stdlib.BoundLogger = structlog.get_logger()
@@ -281,3 +282,39 @@ async def get_timezone(
return tz if tz else "UTC"
async def get_fail2ban_db_path(
db: aiosqlite.Connection,
settings_repo: SettingsRepository = default_settings_repo,
fail2ban_metadata_service: Fail2BanMetadataService | None = None,
) -> str | None:
"""Resolve the fail2ban database path from the configured socket.
Args:
db: Active database connection (used to look up the socket path).
settings_repo: Repository interface for settings persistence.
fail2ban_metadata_service: Service for resolving DB path from socket.
If not provided, uses the default singleton.
Returns:
The resolved fail2ban SQLite database path, or None if fail2ban
is not reachable.
"""
socket_path = await settings_repo.get_setting(db, _KEY_FAIL2BAN_SOCKET)
if not socket_path:
return None
if fail2ban_metadata_service is None:
from app.services.fail2ban_metadata_service import ( # noqa: PLC0415
default_fail2ban_metadata_service,
)
service = default_fail2ban_metadata_service
else:
service = fail2ban_metadata_service
try:
return await service.get_db_path(socket_path)
except Exception:
log.warning("could_not_resolve_fail2ban_db_path", socket_path=socket_path)
return None

View File

@@ -46,6 +46,7 @@ from app.tasks import (
session_cleanup,
)
from app.utils.async_utils import run_blocking
from app.utils.fail2ban_db_utils import ensure_fail2ban_indexes
from app.utils.jail_config import ensure_jail_configs
from app.utils.runtime_state import set_runtime_settings
from app.utils.scheduler_lock import (
@@ -337,6 +338,13 @@ async def _stage_init_database(app: FastAPI, settings: Settings) -> Any:
runtime_db = await open_db(runtime_database_path)
try:
# Ensure fail2ban bans table has performance indexes
# before any ban query runs against it. This is called on every
# startup so the index check is cheap (read-only probe).
f2b_db_path = await setup_service.get_fail2ban_db_path(runtime_db)
if f2b_db_path:
await run_blocking(ensure_fail2ban_indexes, f2b_db_path)
persisted_runtime_settings = (
await setup_service.get_persisted_runtime_settings(runtime_db)
)

View File

@@ -5,6 +5,10 @@ from __future__ import annotations
import json
from datetime import UTC, datetime
import structlog
log: structlog.stdlib.BoundLogger = structlog.get_logger()
def escape_like(s: str) -> str:
"""Escape SQLite LIKE wildcard characters in a string.
@@ -21,6 +25,42 @@ def escape_like(s: str) -> str:
return s.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
async def ensure_fail2ban_indexes(db_path: str) -> None:
"""Create performance indexes on the fail2ban bans table if missing.
The fail2ban database schema does not include an index on timeofban alone,
only composite indexes (jail, timeofban) and (jail, ip). Queries that filter
by timeofban >= X ORDER BY timeofban DESC require a full table scan.
This function adds the missing index idempotently (CREATE INDEX IF NOT EXISTS)
each time the application starts. The overhead of the check is negligible
compared to the query speedup on large tables.
Args:
db_path: Path to the fail2ban SQLite database.
"""
import aiosqlite
index_name = "idx_bans_timeofban_desc"
# Check existing indexes using read-only connection
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as db:
async with db.execute(
"SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='bans'"
) as cursor:
rows = await cursor.fetchall()
existing = [str(r[0]) for r in rows]
if index_name not in existing:
log.info("creating_fail2ban_bans_index", index=index_name, db_path=db_path)
async with aiosqlite.connect(db_path) as db:
await db.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON bans(timeofban DESC);")
await db.commit()
log.info("fail2ban_bans_index_created", index=index_name)
else:
log.debug("fail2ban_bans_index_exists", index=index_name)
def ts_to_iso(unix_ts: int) -> str:
"""Convert a Unix timestamp to an ISO 8601 UTC string."""
return datetime.fromtimestamp(unix_ts, tz=UTC).isoformat()

View File

@@ -1,6 +1,60 @@
"""Tests for fail2ban_db_utils module."""
from app.utils.fail2ban_db_utils import escape_like
import sqlite3
from pathlib import Path
import pytest
from app.utils.fail2ban_db_utils import (
ensure_fail2ban_indexes,
escape_like,
)
@pytest.fixture
def tmp_bans_table(tmp_path: Path) -> str:
"""Create a minimal fail2ban-style database with bans table."""
db_path = str(tmp_path / "test_f2b.db")
conn = sqlite3.connect(db_path)
conn.execute("CREATE TABLE bans (jail, ip, timeofban, bancount, data)")
conn.execute("CREATE INDEX idx_jail_timeofban_ip ON bans(jail, timeofban)")
conn.execute("CREATE INDEX idx_jail_ip ON bans(jail, ip)")
conn.execute("CREATE INDEX idx_ip ON bans(ip)")
conn.commit()
conn.close()
return db_path
@pytest.mark.asyncio
async def test_ensure_fail2ban_indexes_creates_missing_index(tmp_bans_table: str) -> None:
"""Index is created when idx_bans_timeofban_desc does not exist."""
await ensure_fail2ban_indexes(tmp_bans_table)
conn = sqlite3.connect(tmp_bans_table)
conn.row_factory = sqlite3.Row
cur = conn.execute(
"SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='bans'"
)
index_names = [str(r["name"]) for r in cur.fetchall()]
conn.close()
assert "idx_bans_timeofban_desc" in index_names
@pytest.mark.asyncio
async def test_ensure_fail2ban_indexes_idempotent(tmp_bans_table: str) -> None:
"""Calling twice does not raise or duplicate the index."""
await ensure_fail2ban_indexes(tmp_bans_table)
await ensure_fail2ban_indexes(tmp_bans_table)
conn = sqlite3.connect(tmp_bans_table)
cur = conn.execute(
"SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='bans' AND name='idx_bans_timeofban_desc'"
)
count = len(cur.fetchall())
conn.close()
assert count == 1
def test_escape_like_percent_sign() -> None: