Add database migration 5: Indexes for history_archive query performance

- Add composite index on (jail, timeofban DESC) for dashboard filtering
- Add composite index on (timeofban DESC, jail, action) for time-range queries
- Add single-column indexes on ip and action for targeted filtering
- Update schema version to 5 and document in Backend-Development.md

Indexes optimize common dashboard and API query patterns with pagination.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-29 20:17:58 +02:00
parent 187cd8250d
commit b6631b86e4
4 changed files with 257 additions and 19 deletions

View File

@@ -107,7 +107,7 @@ _SCHEMA_STATEMENTS: list[str] = [
_CREATE_HISTORY_ARCHIVE,
]
_CURRENT_SCHEMA_VERSION: int = 4
_CURRENT_SCHEMA_VERSION: int = 5
_MIGRATIONS: dict[int, str] = {
1: "\n".join(_SCHEMA_STATEMENTS),
@@ -143,6 +143,29 @@ CREATE TABLE scheduler_lock (
created_at REAL NOT NULL,
heartbeat_at REAL NOT NULL
);
""",
5: """
-- Migration 5: Add indexes to history_archive table for query performance.
-- The history_archive table supports filtering by jail, IP, action, and time range,
-- combined with pagination (ORDER BY timeofban DESC LIMIT/OFFSET).
-- These indexes accelerate common dashboard and API queries.
-- See Docs/Backend-Development.md § Database Performance for details.
-- Composite index for common queries: jail + timeofban ordering (dashboard filter).
CREATE INDEX IF NOT EXISTS idx_history_archive_jail_timeofban
ON history_archive (jail, timeofban DESC);
-- Composite index for time-range + jail queries (history timeline filters).
CREATE INDEX IF NOT EXISTS idx_history_archive_timeofban_jail_action
ON history_archive (timeofban DESC, jail, action);
-- Index for single-column filters: supports IP prefix searches and exact matches.
CREATE INDEX IF NOT EXISTS idx_history_archive_ip
ON history_archive (ip);
-- Index for action-based queries: supports ban/unban filtering.
CREATE INDEX IF NOT EXISTS idx_history_archive_action
ON history_archive (action);
""",
}

View File

@@ -0,0 +1,171 @@
"""Benchmark tests for history_archive query performance.
These tests evaluate query performance before and after adding indexes.
They serve as regression tests to catch performance degradation and document
the performance characteristics of the archive table.
"""
from __future__ import annotations
import time
from pathlib import Path
import aiosqlite
import pytest
from app.db import init_db
from app.repositories.history_archive_repo import (
archive_ban_event,
get_archived_history,
get_max_timeofban,
purge_archived_history,
)
@pytest.fixture
async def app_db_with_archive(tmp_path: Path) -> str:
"""Create a database with a pre-populated archive table."""
path = str(tmp_path / "app.db")
async with aiosqlite.connect(path) as db:
db.row_factory = aiosqlite.Row
await init_db(db)
# Populate with realistic test data: 10,000 records across 10 jails
jails = ["sshd", "nginx", "apache", "dovecot", "postfix", "http-auth", "recidive", "mysqld", "pam", "jail10"]
ips = [f"10.{i // 1000}.{(i // 10) % 100}.{i % 10}" for i in range(1000)]
base_time = int(time.time()) - 86400 * 30 # 30 days ago
for i in range(10000):
jail = jails[i % len(jails)]
ip = ips[i % len(ips)]
timeofban = base_time + (i * 300) # Spread across 30 days with 5-min intervals
bancount = (i % 5) + 1
await archive_ban_event(
db,
jail=jail,
ip=ip,
timeofban=timeofban,
bancount=bancount,
data='{"matches": 5, "failures": 3}',
action="ban",
)
return path
@pytest.mark.asyncio
async def test_get_max_timeofban_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify MAX(timeofban) query is efficient."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
start = time.perf_counter()
max_ts = await get_max_timeofban(db)
elapsed = time.perf_counter() - start
assert max_ts is not None
assert elapsed < 0.01, f"MAX query took {elapsed:.4f}s (expected <0.01s)"
@pytest.mark.asyncio
async def test_list_history_with_jail_filter_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify filtering by jail + sorting by time is efficient."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
start = time.perf_counter()
rows, total = await get_archived_history(db, jail="sshd", page=1, page_size=100)
elapsed = time.perf_counter() - start
assert total > 0
assert len(rows) > 0
assert elapsed < 0.05, f"Jail filter query took {elapsed:.4f}s (expected <0.05s)"
@pytest.mark.asyncio
async def test_list_history_with_ip_filter_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify filtering by IP + sorting by time is efficient."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
start = time.perf_counter()
rows, total = await get_archived_history(db, ip_filter="10.0", page=1, page_size=100)
elapsed = time.perf_counter() - start
assert total > 0
assert len(rows) > 0
assert elapsed < 0.05, f"IP filter query took {elapsed:.4f}s (expected <0.05s)"
@pytest.mark.asyncio
async def test_list_history_with_timerange_filter_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify filtering by time range + sorting is efficient."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
now = int(time.time())
since = now - 86400 * 7 # Last 7 days
start = time.perf_counter()
rows, total = await get_archived_history(db, since=since, page=1, page_size=100)
elapsed = time.perf_counter() - start
assert total > 0
assert len(rows) > 0
assert elapsed < 0.05, f"Time range filter query took {elapsed:.4f}s (expected <0.05s)"
@pytest.mark.asyncio
async def test_list_history_with_combined_filters_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify combined filters (jail + time range) are efficient."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
now = int(time.time())
since = now - 86400 * 7 # Last 7 days
start = time.perf_counter()
rows, total = await get_archived_history(db, jail="sshd", since=since, page=1, page_size=100)
elapsed = time.perf_counter() - start
assert total > 0
assert len(rows) > 0
assert elapsed < 0.05, f"Combined filter query took {elapsed:.4f}s (expected <0.05s)"
@pytest.mark.asyncio
async def test_count_history_with_filters_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify COUNT(*) with filters is efficient for pagination."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
start = time.perf_counter()
_, total = await get_archived_history(db, jail="sshd", page_size=100)
elapsed = time.perf_counter() - start
assert total > 0
# COUNT query typically included in get_archived_history call
assert elapsed < 0.10, f"Count query took {elapsed:.4f}s (expected <0.10s)"
@pytest.mark.asyncio
async def test_purge_old_entries_performance(app_db_with_archive: str) -> None:
"""Benchmark: Verify DELETE with time filter is efficient."""
async with aiosqlite.connect(app_db_with_archive) as db:
db.row_factory = aiosqlite.Row
# Get current count
_, initial_count = await get_archived_history(db)
age_seconds = 86400 * 20 # Delete entries older than 20 days
start = time.perf_counter()
deleted = await purge_archived_history(db, age_seconds)
elapsed = time.perf_counter() - start
_, final_count = await get_archived_history(db)
assert deleted > 0
assert final_count == initial_count - deleted
assert elapsed < 0.10, f"Delete query took {elapsed:.4f}s (expected <0.10s)"