Optimise geo lookup and aggregation for 10k+ IPs

- Add persistent geo_cache SQLite table (db.py)
- Rewrite geo_service: batch API (100 IPs/call), two-tier cache,
  no caching of failed lookups so they are retried
- Pre-warm geo cache from DB on startup (main.py lifespan)
- Rewrite bans_by_country: SQL GROUP BY ip aggregation + lookup_batch
  instead of 2000-row fetch + asyncio.gather individual calls
- Pre-warm geo cache after blocklist import (blocklist_service)
- Add 300ms debounce to useMapData hook to cancel stale requests
- Add perf benchmark asserting <2s for 10k bans
- Add seed_10k_bans.py script for manual perf testing
This commit is contained in:
2026-03-07 20:28:51 +01:00
parent 53d664de4f
commit ddfc8a0b02
13 changed files with 917 additions and 90 deletions

View File

@@ -12,7 +12,7 @@ from __future__ import annotations
import json
from datetime import UTC, datetime
from typing import Any
from typing import TYPE_CHECKING, Any
import aiosqlite
import structlog
@@ -29,6 +29,9 @@ from app.models.ban import (
)
from app.utils.fail2ban_client import Fail2BanClient
if TYPE_CHECKING:
import aiohttp
log: structlog.stdlib.BoundLogger = structlog.get_logger()
# ---------------------------------------------------------------------------
@@ -280,35 +283,51 @@ async def list_bans(
# bans_by_country
# ---------------------------------------------------------------------------
#: Maximum bans fetched for aggregation (guard against huge databases).
_MAX_GEO_BANS: int = 2_000
#: Maximum rows returned in the companion table alongside the map.
_MAX_COMPANION_BANS: int = 200
async def bans_by_country(
socket_path: str,
range_: TimeRange,
http_session: aiohttp.ClientSession | None = None,
geo_enricher: Any | None = None,
app_db: aiosqlite.Connection | None = None,
origin: BanOrigin | None = None,
) -> BansByCountryResponse:
"""Aggregate ban counts per country for the selected time window.
Fetches up to ``_MAX_GEO_BANS`` ban records from the fail2ban database,
enriches them with geo data, and returns a ``{country_code: count}`` map
alongside the enriched ban list for the companion access table.
Uses a two-step strategy optimised for large datasets:
1. Queries the fail2ban DB with ``GROUP BY ip`` to get the per-IP ban
counts for all unique IPs in the window — no row-count cap.
2. Batch-resolves every unique IP via :func:`~app.services.geo_service.lookup_batch`
(100 IPs per HTTP call) instead of one-at-a-time lookups.
3. Returns a ``{country_code: count}`` aggregation and the 200 most
recent raw rows (already geo-cached from step 2) for the companion
table.
Args:
socket_path: Path to the fail2ban Unix domain socket.
range_: Time-range preset.
geo_enricher: Optional async ``(ip) -> GeoInfo | None`` callable.
http_session: Optional :class:`aiohttp.ClientSession` for batch
geo lookups. When provided, :func:`geo_service.lookup_batch`
is used instead of the *geo_enricher* callable.
geo_enricher: Legacy async ``(ip) -> GeoInfo | None`` callable;
used when *http_session* is ``None``.
app_db: Optional BanGUI application database used to persist newly
resolved geo entries across restarts.
origin: Optional origin filter — ``"blocklist"`` restricts results to
the ``blocklist-import`` jail, ``"selfblock"`` excludes it.
Returns:
:class:`~app.models.ban.BansByCountryResponse` with per-country
aggregation and the full ban list.
aggregation and the companion ban list.
"""
import asyncio
from app.services import geo_service # noqa: PLC0415
since: int = _since_unix(range_)
origin_clause, origin_params = _origin_sql_filter(origin)
db_path: str = await _get_fail2ban_db_path(socket_path)
@@ -323,6 +342,7 @@ async def bans_by_country(
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
f2b_db.row_factory = aiosqlite.Row
# Total count for the window.
async with f2b_db.execute(
"SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
(since, *origin_params),
@@ -330,6 +350,19 @@ async def bans_by_country(
count_row = await cur.fetchone()
total: int = int(count_row[0]) if count_row else 0
# Aggregation: unique IPs + their total event count.
# No LIMIT here — we need all unique source IPs for accurate country counts.
async with f2b_db.execute(
"SELECT ip, COUNT(*) AS event_count "
"FROM bans "
"WHERE timeofban >= ?"
+ origin_clause
+ " GROUP BY ip",
(since, *origin_params),
) as cur:
agg_rows = await cur.fetchall()
# Companion table: most recent raw rows for display alongside the map.
async with f2b_db.execute(
"SELECT jail, ip, timeofban, bancount, data "
"FROM bans "
@@ -337,14 +370,21 @@ async def bans_by_country(
+ origin_clause
+ " ORDER BY timeofban DESC "
"LIMIT ?",
(since, *origin_params, _MAX_GEO_BANS),
(since, *origin_params, _MAX_COMPANION_BANS),
) as cur:
rows = await cur.fetchall()
companion_rows = await cur.fetchall()
# Geo-enrich unique IPs in parallel.
unique_ips: list[str] = list({str(r["ip"]) for r in rows})
# Batch-resolve all unique IPs (much faster than individual lookups).
unique_ips: list[str] = [str(r["ip"]) for r in agg_rows]
geo_map: dict[str, Any] = {}
if geo_enricher is not None and unique_ips:
if http_session is not None and unique_ips:
try:
geo_map = await geo_service.lookup_batch(unique_ips, http_session, db=app_db)
except Exception as exc: # noqa: BLE001
log.warning("ban_service_batch_geo_failed", error=str(exc))
elif geo_enricher is not None and unique_ips:
# Fallback: legacy per-IP enricher (used in tests / older callers).
async def _safe_lookup(ip: str) -> tuple[str, Any]:
try:
return ip, await geo_enricher(ip)
@@ -355,16 +395,29 @@ async def bans_by_country(
results = await asyncio.gather(*(_safe_lookup(ip) for ip in unique_ips))
geo_map = dict(results)
# Build ban items and aggregate country counts.
# Build country aggregation from the SQL-grouped rows.
countries: dict[str, int] = {}
country_names: dict[str, str] = {}
bans: list[DashboardBanItem] = []
for row in rows:
ip = str(row["ip"])
for row in agg_rows:
ip: str = str(row["ip"])
geo = geo_map.get(ip)
cc: str | None = geo.country_code if geo else None
cn: str | None = geo.country_name if geo else None
event_count: int = int(row["event_count"])
if cc:
countries[cc] = countries.get(cc, 0) + event_count
if cn and cc not in country_names:
country_names[cc] = cn
# Build companion table from recent rows (geo already cached from batch step).
bans: list[DashboardBanItem] = []
for row in companion_rows:
ip = str(row["ip"])
geo = geo_map.get(ip)
cc = geo.country_code if geo else None
cn = geo.country_name if geo else None
asn: str | None = geo.asn if geo else None
org: str | None = geo.org if geo else None
matches, _ = _parse_data_json(row["data"])
@@ -384,11 +437,6 @@ async def bans_by_country(
)
)
if cc:
countries[cc] = countries.get(cc, 0) + 1
if cn and cc not in country_names:
country_names[cc] = cn
return BansByCountryResponse(
countries=countries,
country_names=country_names,