Optimise geo lookup and aggregation for 10k+ IPs
- Add persistent geo_cache SQLite table (db.py) - Rewrite geo_service: batch API (100 IPs/call), two-tier cache, no caching of failed lookups so they are retried - Pre-warm geo cache from DB on startup (main.py lifespan) - Rewrite bans_by_country: SQL GROUP BY ip aggregation + lookup_batch instead of 2000-row fetch + asyncio.gather individual calls - Pre-warm geo cache after blocklist import (blocklist_service) - Add 300ms debounce to useMapData hook to cancel stale requests - Add perf benchmark asserting <2s for 10k bans - Add seed_10k_bans.py script for manual perf testing
This commit is contained in:
@@ -12,7 +12,7 @@ from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import aiosqlite
|
||||
import structlog
|
||||
@@ -29,6 +29,9 @@ from app.models.ban import (
|
||||
)
|
||||
from app.utils.fail2ban_client import Fail2BanClient
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import aiohttp
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -280,35 +283,51 @@ async def list_bans(
|
||||
# bans_by_country
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
#: Maximum bans fetched for aggregation (guard against huge databases).
|
||||
_MAX_GEO_BANS: int = 2_000
|
||||
#: Maximum rows returned in the companion table alongside the map.
|
||||
_MAX_COMPANION_BANS: int = 200
|
||||
|
||||
|
||||
async def bans_by_country(
|
||||
socket_path: str,
|
||||
range_: TimeRange,
|
||||
http_session: aiohttp.ClientSession | None = None,
|
||||
geo_enricher: Any | None = None,
|
||||
app_db: aiosqlite.Connection | None = None,
|
||||
origin: BanOrigin | None = None,
|
||||
) -> BansByCountryResponse:
|
||||
"""Aggregate ban counts per country for the selected time window.
|
||||
|
||||
Fetches up to ``_MAX_GEO_BANS`` ban records from the fail2ban database,
|
||||
enriches them with geo data, and returns a ``{country_code: count}`` map
|
||||
alongside the enriched ban list for the companion access table.
|
||||
Uses a two-step strategy optimised for large datasets:
|
||||
|
||||
1. Queries the fail2ban DB with ``GROUP BY ip`` to get the per-IP ban
|
||||
counts for all unique IPs in the window — no row-count cap.
|
||||
2. Batch-resolves every unique IP via :func:`~app.services.geo_service.lookup_batch`
|
||||
(100 IPs per HTTP call) instead of one-at-a-time lookups.
|
||||
3. Returns a ``{country_code: count}`` aggregation and the 200 most
|
||||
recent raw rows (already geo-cached from step 2) for the companion
|
||||
table.
|
||||
|
||||
Args:
|
||||
socket_path: Path to the fail2ban Unix domain socket.
|
||||
range_: Time-range preset.
|
||||
geo_enricher: Optional async ``(ip) -> GeoInfo | None`` callable.
|
||||
http_session: Optional :class:`aiohttp.ClientSession` for batch
|
||||
geo lookups. When provided, :func:`geo_service.lookup_batch`
|
||||
is used instead of the *geo_enricher* callable.
|
||||
geo_enricher: Legacy async ``(ip) -> GeoInfo | None`` callable;
|
||||
used when *http_session* is ``None``.
|
||||
app_db: Optional BanGUI application database used to persist newly
|
||||
resolved geo entries across restarts.
|
||||
origin: Optional origin filter — ``"blocklist"`` restricts results to
|
||||
the ``blocklist-import`` jail, ``"selfblock"`` excludes it.
|
||||
|
||||
Returns:
|
||||
:class:`~app.models.ban.BansByCountryResponse` with per-country
|
||||
aggregation and the full ban list.
|
||||
aggregation and the companion ban list.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from app.services import geo_service # noqa: PLC0415
|
||||
|
||||
since: int = _since_unix(range_)
|
||||
origin_clause, origin_params = _origin_sql_filter(origin)
|
||||
db_path: str = await _get_fail2ban_db_path(socket_path)
|
||||
@@ -323,6 +342,7 @@ async def bans_by_country(
|
||||
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
|
||||
f2b_db.row_factory = aiosqlite.Row
|
||||
|
||||
# Total count for the window.
|
||||
async with f2b_db.execute(
|
||||
"SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
|
||||
(since, *origin_params),
|
||||
@@ -330,6 +350,19 @@ async def bans_by_country(
|
||||
count_row = await cur.fetchone()
|
||||
total: int = int(count_row[0]) if count_row else 0
|
||||
|
||||
# Aggregation: unique IPs + their total event count.
|
||||
# No LIMIT here — we need all unique source IPs for accurate country counts.
|
||||
async with f2b_db.execute(
|
||||
"SELECT ip, COUNT(*) AS event_count "
|
||||
"FROM bans "
|
||||
"WHERE timeofban >= ?"
|
||||
+ origin_clause
|
||||
+ " GROUP BY ip",
|
||||
(since, *origin_params),
|
||||
) as cur:
|
||||
agg_rows = await cur.fetchall()
|
||||
|
||||
# Companion table: most recent raw rows for display alongside the map.
|
||||
async with f2b_db.execute(
|
||||
"SELECT jail, ip, timeofban, bancount, data "
|
||||
"FROM bans "
|
||||
@@ -337,14 +370,21 @@ async def bans_by_country(
|
||||
+ origin_clause
|
||||
+ " ORDER BY timeofban DESC "
|
||||
"LIMIT ?",
|
||||
(since, *origin_params, _MAX_GEO_BANS),
|
||||
(since, *origin_params, _MAX_COMPANION_BANS),
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
companion_rows = await cur.fetchall()
|
||||
|
||||
# Geo-enrich unique IPs in parallel.
|
||||
unique_ips: list[str] = list({str(r["ip"]) for r in rows})
|
||||
# Batch-resolve all unique IPs (much faster than individual lookups).
|
||||
unique_ips: list[str] = [str(r["ip"]) for r in agg_rows]
|
||||
geo_map: dict[str, Any] = {}
|
||||
if geo_enricher is not None and unique_ips:
|
||||
|
||||
if http_session is not None and unique_ips:
|
||||
try:
|
||||
geo_map = await geo_service.lookup_batch(unique_ips, http_session, db=app_db)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
log.warning("ban_service_batch_geo_failed", error=str(exc))
|
||||
elif geo_enricher is not None and unique_ips:
|
||||
# Fallback: legacy per-IP enricher (used in tests / older callers).
|
||||
async def _safe_lookup(ip: str) -> tuple[str, Any]:
|
||||
try:
|
||||
return ip, await geo_enricher(ip)
|
||||
@@ -355,16 +395,29 @@ async def bans_by_country(
|
||||
results = await asyncio.gather(*(_safe_lookup(ip) for ip in unique_ips))
|
||||
geo_map = dict(results)
|
||||
|
||||
# Build ban items and aggregate country counts.
|
||||
# Build country aggregation from the SQL-grouped rows.
|
||||
countries: dict[str, int] = {}
|
||||
country_names: dict[str, str] = {}
|
||||
bans: list[DashboardBanItem] = []
|
||||
|
||||
for row in rows:
|
||||
ip = str(row["ip"])
|
||||
for row in agg_rows:
|
||||
ip: str = str(row["ip"])
|
||||
geo = geo_map.get(ip)
|
||||
cc: str | None = geo.country_code if geo else None
|
||||
cn: str | None = geo.country_name if geo else None
|
||||
event_count: int = int(row["event_count"])
|
||||
|
||||
if cc:
|
||||
countries[cc] = countries.get(cc, 0) + event_count
|
||||
if cn and cc not in country_names:
|
||||
country_names[cc] = cn
|
||||
|
||||
# Build companion table from recent rows (geo already cached from batch step).
|
||||
bans: list[DashboardBanItem] = []
|
||||
for row in companion_rows:
|
||||
ip = str(row["ip"])
|
||||
geo = geo_map.get(ip)
|
||||
cc = geo.country_code if geo else None
|
||||
cn = geo.country_name if geo else None
|
||||
asn: str | None = geo.asn if geo else None
|
||||
org: str | None = geo.org if geo else None
|
||||
matches, _ = _parse_data_json(row["data"])
|
||||
@@ -384,11 +437,6 @@ async def bans_by_country(
|
||||
)
|
||||
)
|
||||
|
||||
if cc:
|
||||
countries[cc] = countries.get(cc, 0) + 1
|
||||
if cn and cc not in country_names:
|
||||
country_names[cc] = cn
|
||||
|
||||
return BansByCountryResponse(
|
||||
countries=countries,
|
||||
country_names=country_names,
|
||||
|
||||
Reference in New Issue
Block a user