Files
BanGUI/backend/app/db.py
2026-05-04 13:13:01 +02:00

453 lines
16 KiB
Python

"""Application database schema definition and initialisation.
BanGUI maintains its own SQLite database that stores configuration, session
state, blocklist source definitions, and import run logs. This module is
the single source of truth for the schema — all ``CREATE TABLE`` statements
live here and are applied on first run via :func:`init_db`.
The fail2ban database is separate and is accessed read-only by the history
and ban services.
"""
from __future__ import annotations
from pathlib import Path
import aiosqlite
import structlog
log: structlog.stdlib.BoundLogger = structlog.get_logger()
# ---------------------------------------------------------------------------
# DDL statements
# ---------------------------------------------------------------------------
_CREATE_SETTINGS: str = """
CREATE TABLE IF NOT EXISTS settings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
key TEXT NOT NULL UNIQUE,
value TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
);
"""
_CREATE_SESSIONS: str = """
CREATE TABLE IF NOT EXISTS sessions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
expires_at TEXT NOT NULL
);
"""
_CREATE_SESSIONS_TOKEN_INDEX: str = """
CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_token_hash ON sessions (token_hash);
"""
_CREATE_BLOCKLIST_SOURCES: str = """
CREATE TABLE IF NOT EXISTS blocklist_sources (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
url TEXT NOT NULL UNIQUE,
enabled INTEGER NOT NULL DEFAULT 1,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
);
"""
_CREATE_IMPORT_LOG: str = """
CREATE TABLE IF NOT EXISTS import_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER REFERENCES blocklist_sources(id) ON DELETE RESTRICT,
source_url TEXT NOT NULL,
timestamp INTEGER NOT NULL,
ips_imported INTEGER NOT NULL DEFAULT 0,
ips_skipped INTEGER NOT NULL DEFAULT 0,
errors TEXT
);
"""
_CREATE_GEO_CACHE: str = """
CREATE TABLE IF NOT EXISTS geo_cache (
ip TEXT PRIMARY KEY,
country_code TEXT,
country_name TEXT,
asn TEXT,
org TEXT,
cached_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
);
"""
_CREATE_HISTORY_ARCHIVE: str = """
CREATE TABLE IF NOT EXISTS history_archive (
id INTEGER PRIMARY KEY AUTOINCREMENT,
jail TEXT NOT NULL,
ip TEXT NOT NULL,
timeofban INTEGER NOT NULL,
bancount INTEGER NOT NULL,
data TEXT NOT NULL,
action TEXT NOT NULL CHECK(action IN ('ban', 'unban')),
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
UNIQUE(ip, jail, action, timeofban)
);
"""
_CREATE_SCHEMA_MIGRATIONS: str = """
CREATE TABLE IF NOT EXISTS schema_migrations (
version INTEGER PRIMARY KEY,
migrated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))
);
"""
# Ordered list of DDL statements to execute on initialisation.
_SCHEMA_STATEMENTS: list[str] = [
_CREATE_SETTINGS,
_CREATE_SESSIONS,
_CREATE_SESSIONS_TOKEN_INDEX,
_CREATE_BLOCKLIST_SOURCES,
_CREATE_IMPORT_LOG,
_CREATE_GEO_CACHE,
_CREATE_HISTORY_ARCHIVE,
]
_CURRENT_SCHEMA_VERSION: int = 9
_MIGRATIONS: dict[int, str] = {
1: "\n".join(_SCHEMA_STATEMENTS),
2: """
-- Migration 2: Hash session tokens for security.
-- Drop the old sessions table and recreate with token_hash column.
-- This invalidates all existing sessions, which is acceptable as the DB
-- contents were exposed in plaintext.
DROP TABLE IF EXISTS sessions;
CREATE TABLE sessions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
token_hash TEXT NOT NULL UNIQUE,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
expires_at TEXT NOT NULL
);
CREATE UNIQUE INDEX idx_sessions_token_hash ON sessions (token_hash);
""",
3: """
-- Migration 3: Add last_seen timestamp to geo_cache for retention policy.
-- Tracks when each IP was last referenced to enable purging of stale entries.
-- Default to current timestamp for existing rows.
ALTER TABLE geo_cache ADD COLUMN last_seen TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'));
""",
4: """
-- Migration 4: Add scheduler_lock table for multi-worker safety.
-- Implements database-backed locking to ensure only one worker runs the scheduler.
-- Uses atomic transactions to prevent race conditions in container orchestration.
-- Lock is held by the process that successfully inserts the singleton row (id=1).
CREATE TABLE scheduler_lock (
id INTEGER PRIMARY KEY CHECK (id = 1),
pid INTEGER NOT NULL,
hostname TEXT NOT NULL,
created_at REAL NOT NULL,
heartbeat_at REAL NOT NULL,
heartbeat_timeout REAL NOT NULL DEFAULT 300
);
""",
5: """
-- Migration 5: Add indexes to history_archive table for query performance.
-- The history_archive table supports filtering by jail, IP, action, and time range,
-- combined with pagination (ORDER BY timeofban DESC LIMIT/OFFSET).
-- These indexes accelerate common dashboard and API queries.
-- See Docs/Backend-Development.md § Database Performance for details.
-- Composite index for common queries: jail + timeofban ordering (dashboard filter).
CREATE INDEX IF NOT EXISTS idx_history_archive_jail_timeofban
ON history_archive (jail, timeofban DESC);
-- Composite index for time-range + jail queries (history timeline filters).
CREATE INDEX IF NOT EXISTS idx_history_archive_timeofban_jail_action
ON history_archive (timeofban DESC, jail, action);
-- Index for single-column filters: supports IP prefix searches and exact matches.
CREATE INDEX IF NOT EXISTS idx_history_archive_ip
ON history_archive (ip);
-- Index for action-based queries: supports ban/unban filtering.
CREATE INDEX IF NOT EXISTS idx_history_archive_action
ON history_archive (action);
""",
6: """
-- Migration 6: Add import_runs table for tracking blocklist import idempotency.
-- Tracks unique imports by source and content hash to enable idempotent retries.
-- On import crash, retry will detect the operation_id and skip duplicate bans.
-- This prevents duplicate IP bans if the scheduler retries after a failure.
CREATE TABLE IF NOT EXISTS import_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL REFERENCES blocklist_sources(id) ON DELETE CASCADE,
content_hash TEXT NOT NULL,
status TEXT NOT NULL CHECK(status IN ('pending', 'completed', 'failed')),
imported_count INTEGER NOT NULL DEFAULT 0,
skipped_count INTEGER NOT NULL DEFAULT 0,
error_message TEXT,
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
UNIQUE(source_id, content_hash)
);
-- Index for looking up completed imports by source
CREATE INDEX IF NOT EXISTS idx_import_runs_source_status
ON import_runs (source_id, status);
""",
7: """
-- Migration 7: Add indexes to import_log table for cursor-based pagination.
-- The import_log table is paginated by id (newest first) and filtered by source_id.
-- These indexes accelerate pagination queries and maintain consistent ordering.
-- See Docs/Backend-Development.md § Database Performance for details.
-- Index for ordering by id DESC for cursor-based pagination (newest first)
CREATE INDEX IF NOT EXISTS idx_import_log_id_desc
ON import_log (id DESC);
-- Composite index for source_id + id DESC ordering (filtered pagination)
CREATE INDEX IF NOT EXISTS idx_import_log_source_id_desc
ON import_log (source_id, id DESC);
""",
8: """
-- Migration 8: Migrate import_log.timestamp from TEXT ISO 8601 to INTEGER UNIX epoch.
-- Standardizes all BanGUI timestamps on INTEGER UNIX (seconds since epoch).
-- This aligns import_log with history_archive which already uses INTEGER timeofban.
-- TEXT ISO 8601: "2024-06-15T13:45:00.000Z"
-- INTEGER UNIX: 1718453100
ALTER TABLE import_log ADD COLUMN timestamp_unix INTEGER;
UPDATE import_log SET timestamp_unix = strftime('%s', timestamp);
ALTER TABLE import_log DROP COLUMN timestamp;
ALTER TABLE import_log RENAME COLUMN timestamp_unix TO timestamp;
""",
9: """
-- Migration 9: Change import_log.source_id foreign key to ON DELETE RESTRICT.
-- Previously, deleting a blocklist source set source_id to NULL, leaving orphaned
-- log records with populated URL but NULL source_id (meaningless/useless data).
-- Now, RESTRICT prevents source deletion if import logs exist, preserving data
-- integrity. Admin must delete logs before deleting source.
-- See Issue #11: Foreign Key ON DELETE Semantics Problem.
DROP INDEX IF EXISTS idx_import_log_source_id_desc;
DROP TABLE IF EXISTS _import_log_backup;
CREATE TABLE _import_log_backup (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER REFERENCES blocklist_sources(id) ON DELETE RESTRICT,
source_url TEXT NOT NULL,
timestamp INTEGER NOT NULL,
ips_imported INTEGER NOT NULL DEFAULT 0,
ips_skipped INTEGER NOT NULL DEFAULT 0,
errors TEXT
);
INSERT INTO _import_log_backup (id, source_id, source_url, timestamp, ips_imported, ips_skipped, errors)
SELECT id, source_id, source_url, timestamp, ips_imported, ips_skipped, errors FROM import_log;
DROP TABLE import_log;
ALTER TABLE _import_log_backup RENAME TO import_log;
CREATE INDEX IF NOT EXISTS idx_import_log_source_id_desc
ON import_log (source_id, id DESC);
""",
}
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
async def _configure_connection(db: aiosqlite.Connection) -> None:
"""Apply hardening pragmas to a newly-opened SQLite connection."""
await db.execute("PRAGMA foreign_keys=ON;")
await db.execute("PRAGMA busy_timeout=5000;")
async def _cleanup_wal_files(db_path: str) -> None:
"""Remove orphaned WAL files after crashes.
When SQLite crashes in WAL mode, it may leave behind stale .wal and .shm
files that prevent the database from opening properly. This function removes
them if they exist and are not in use by any connection.
The actual recovery is done by SQLite automatically when opening the database.
This just cleans up orphaned files from previous crashes.
Args:
db_path: Path to the database file.
"""
wal_path = Path(db_path + "-wal")
shm_path = Path(db_path + "-shm")
for path in (wal_path, shm_path):
if path.exists():
try:
path.unlink()
log.warning("orphaned_sqlite_file_removed", path=str(path))
except OSError:
pass # File in use or permission denied
async def _get_current_schema_version(db: aiosqlite.Connection) -> int:
"""Return the highest applied schema version for the given database."""
await db.execute(_CREATE_SCHEMA_MIGRATIONS)
async with db.execute("SELECT MAX(version) FROM schema_migrations;") as cursor:
row = await cursor.fetchone()
if row is None or row[0] is None:
return 0
return int(row[0])
async def _parse_migration_statements(script: str) -> list[str]:
"""Parse a migration script into individual SQL statements.
Splits on semicolons but ignores semicolons inside string literals and
comments. Handles both block (-- comment) and line comments.
Args:
script: The raw migration script.
Returns:
List of SQL statements (stripped of whitespace and comments).
"""
statements: list[str] = []
current_stmt: list[str] = []
i = 0
while i < len(script):
char = script[i]
# Skip block comments (-- ...)
if i < len(script) - 1 and script[i:i+2] == "--":
while i < len(script) and script[i] != "\n":
i += 1
i += 1
continue
# Skip line comments (/* ... */)
if i < len(script) - 1 and script[i:i+2] == "/*":
i += 2
while i < len(script) - 1:
if script[i:i+2] == "*/":
i += 2
break
i += 1
continue
# Handle string literals (single or double quotes)
if char in ("'", '"'):
quote = char
current_stmt.append(char)
i += 1
while i < len(script):
if script[i] == quote:
if i + 1 < len(script) and script[i + 1] == quote:
# Escaped quote
current_stmt.append(quote)
current_stmt.append(quote)
i += 2
else:
# End of string
current_stmt.append(quote)
i += 1
break
else:
current_stmt.append(script[i])
i += 1
continue
# Statement separator
if char == ";":
stmt = "".join(current_stmt).strip()
if stmt:
statements.append(stmt)
current_stmt = []
i += 1
continue
current_stmt.append(char)
i += 1
# Add any remaining statement
stmt = "".join(current_stmt).strip()
if stmt:
statements.append(stmt)
return statements
async def _apply_migration(db: aiosqlite.Connection, version: int) -> None:
"""Apply a single migration step and record its completion atomically.
Wraps all DDL statements and the schema_migrations insert in a single
BEGIN IMMEDIATE ... COMMIT transaction to ensure atomicity. If any
statement fails, the entire migration is rolled back.
Args:
db: An open aiosqlite.Connection.
version: The migration version number.
Raises:
Any exception from executing the migration statements or inserting
the schema migration record will cause a rollback.
"""
migration_script = _MIGRATIONS[version]
statements = await _parse_migration_statements(migration_script)
try:
await db.execute("BEGIN IMMEDIATE;")
for statement in statements:
await db.execute(statement)
await db.execute("INSERT INTO schema_migrations (version) VALUES (?);", (version,))
await db.commit()
except Exception:
await db.rollback()
raise
async def _migrate_schema(db: aiosqlite.Connection) -> None:
"""Migrate the database schema to the latest supported version."""
current_version = await _get_current_schema_version(db)
if current_version == _CURRENT_SCHEMA_VERSION:
return
if current_version > _CURRENT_SCHEMA_VERSION:
raise RuntimeError(
f"database schema version {current_version} is newer than supported "
f"version {_CURRENT_SCHEMA_VERSION}"
)
log.info("migrating_database_schema", from_version=current_version, to_version=_CURRENT_SCHEMA_VERSION)
for next_version in range(current_version + 1, _CURRENT_SCHEMA_VERSION + 1):
await _apply_migration(db, next_version)
log.info("database_schema_ready", schema_version=_CURRENT_SCHEMA_VERSION)
async def init_db(db: aiosqlite.Connection) -> None:
"""Create or migrate the BanGUI application database schema.
This function is idempotent — calling it on an already-initialised
database has no effect. It should be called once during application
startup inside the FastAPI lifespan handler.
Args:
db: An open :class:`aiosqlite.Connection` to the application database.
"""
log.info("initialising_database_schema")
await _configure_connection(db)
await _migrate_schema(db)
async def open_db(database_path: str) -> aiosqlite.Connection:
"""Open a new application SQLite connection with the standard settings.
Args:
database_path: Path to the BanGUI SQLite database.
Returns:
A configured :class:`aiosqlite.Connection` instance.
"""
await _cleanup_wal_files(database_path)
db = await aiosqlite.connect(database_path)
db.row_factory = aiosqlite.Row
await _configure_connection(db)
return db