feat: comprehensive health check with DB, scheduler, cache

- Add /api/v1/health endpoint with component-level checks
- Verify DB connectivity, fail2ban socket, scheduler, session cache
- Add SQLite WAL cleanup on startup (orphan crash files)
- Migration 8: import_log.timestamp → INTEGER UNIX epoch
- Align import_log timestamps with history_archive (already UNIX int)
- Add unit tests for DB cleanup and health router

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-02 23:03:57 +02:00
parent b631c1c546
commit 1285bc8571
12 changed files with 472 additions and 241 deletions

View File

@@ -9,6 +9,10 @@ The fail2ban database is separate and is accessed read-only by the history
and ban services.
"""
from __future__ import annotations
from pathlib import Path
import aiosqlite
import structlog
@@ -107,7 +111,7 @@ _SCHEMA_STATEMENTS: list[str] = [
_CREATE_HISTORY_ARCHIVE,
]
_CURRENT_SCHEMA_VERSION: int = 7
_CURRENT_SCHEMA_VERSION: int = 8
_MIGRATIONS: dict[int, str] = {
1: "\n".join(_SCHEMA_STATEMENTS),
@@ -201,6 +205,17 @@ CREATE INDEX IF NOT EXISTS idx_import_log_id_desc
-- Composite index for source_id + id DESC ordering (filtered pagination)
CREATE INDEX IF NOT EXISTS idx_import_log_source_id_desc
ON import_log (source_id, id DESC);
""",
8: """
-- Migration 8: Migrate import_log.timestamp from TEXT ISO 8601 to INTEGER UNIX epoch.
-- Standardizes all BanGUI timestamps on INTEGER UNIX (seconds since epoch).
-- This aligns import_log with history_archive which already uses INTEGER timeofban.
-- TEXT ISO 8601: "2024-06-15T13:45:00.000Z"
-- INTEGER UNIX: 1718453100
ALTER TABLE import_log ADD COLUMN timestamp_unix INTEGER;
UPDATE import_log SET timestamp_unix = strftime('%s', timestamp);
ALTER TABLE import_log DROP COLUMN timestamp;
ALTER TABLE import_log RENAME COLUMN timestamp_unix TO timestamp;
""",
}
@@ -218,6 +233,31 @@ async def _configure_connection(db: aiosqlite.Connection) -> None:
await db.execute("PRAGMA busy_timeout=5000;")
async def _cleanup_wal_files(db_path: str) -> None:
"""Remove orphaned WAL files after crashes.
When SQLite crashes in WAL mode, it may leave behind stale .wal and .shm
files that prevent the database from opening properly. This function removes
them if they exist and are not in use by any connection.
The actual recovery is done by SQLite automatically when opening the database.
This just cleans up orphaned files from previous crashes.
Args:
db_path: Path to the database file.
"""
wal_path = Path(db_path + "-wal")
shm_path = Path(db_path + "-shm")
for path in (wal_path, shm_path):
if path.exists():
try:
path.unlink()
log.warning("orphaned_sqlite_file_removed", path=str(path))
except OSError:
pass # File in use or permission denied
async def _get_current_schema_version(db: aiosqlite.Connection) -> int:
"""Return the highest applied schema version for the given database."""
await db.execute(_CREATE_SCHEMA_MIGRATIONS)
@@ -380,6 +420,7 @@ async def open_db(database_path: str) -> aiosqlite.Connection:
Returns:
A configured :class:`aiosqlite.Connection` instance.
"""
await _cleanup_wal_files(database_path)
db = await aiosqlite.connect(database_path)
db.row_factory = aiosqlite.Row
await _configure_connection(db)