Optimise geo lookup and aggregation for 10k+ IPs
- Add persistent geo_cache SQLite table (db.py) - Rewrite geo_service: batch API (100 IPs/call), two-tier cache, no caching of failed lookups so they are retried - Pre-warm geo cache from DB on startup (main.py lifespan) - Rewrite bans_by_country: SQL GROUP BY ip aggregation + lookup_batch instead of 2000-row fetch + asyncio.gather individual calls - Pre-warm geo cache after blocklist import (blocklist_service) - Add 300ms debounce to useMapData hook to cancel stale requests - Add perf benchmark asserting <2s for 10k bans - Add seed_10k_bans.py script for manual perf testing
This commit is contained in:
0
backend/tests/scripts/__init__.py
Normal file
0
backend/tests/scripts/__init__.py
Normal file
213
backend/tests/scripts/seed_10k_bans.py
Normal file
213
backend/tests/scripts/seed_10k_bans.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""Seed 10 000 synthetic bans into the fail2ban dev database.
|
||||
|
||||
Usage::
|
||||
|
||||
cd backend
|
||||
python tests/scripts/seed_10k_bans.py [--db-path /path/to/fail2ban.sqlite3]
|
||||
|
||||
This script inserts 10 000 synthetic ban rows spread over the last 365 days
|
||||
into the fail2ban SQLite database and pre-resolves all synthetic IPs into the
|
||||
BanGUI geo_cache. Run it once to get realistic dashboard and map load times
|
||||
in the browser without requiring a live fail2ban instance with active traffic.
|
||||
|
||||
.. warning::
|
||||
This script **writes** to the fail2ban database. Only use it against the
|
||||
development database (``Docker/fail2ban-dev-config/fail2ban.sqlite3`` or
|
||||
equivalent). Never run it against a production database.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import random
|
||||
import sqlite3
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Default paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_DEFAULT_F2B_DB: str = str(
|
||||
Path(__file__).resolve().parents[3] / "Docker" / "fail2ban-dev-config" / "fail2ban.sqlite3"
|
||||
)
|
||||
_DEFAULT_APP_DB: str = str(
|
||||
Path(__file__).resolve().parents[2] / "bangui.db"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BAN_COUNT: int = 10_000
|
||||
_YEAR_SECONDS: int = 365 * 24 * 3600
|
||||
_JAIL_POOL: list[str] = ["sshd", "nginx", "blocklist-import", "postfix", "dovecot"]
|
||||
_COUNTRY_POOL: list[tuple[str, str]] = [
|
||||
("DE", "Germany"),
|
||||
("US", "United States"),
|
||||
("CN", "China"),
|
||||
("RU", "Russia"),
|
||||
("FR", "France"),
|
||||
("BR", "Brazil"),
|
||||
("IN", "India"),
|
||||
("GB", "United Kingdom"),
|
||||
("NL", "Netherlands"),
|
||||
("CA", "Canada"),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _random_ip() -> str:
|
||||
"""Return a random dotted-decimal IPv4 string in public ranges."""
|
||||
return ".".join(str(random.randint(1, 254)) for _ in range(4))
|
||||
|
||||
|
||||
def _seed_bans(f2b_db_path: str) -> list[str]:
|
||||
"""Insert 10 000 synthetic ban rows into the fail2ban SQLite database.
|
||||
|
||||
Uses the synchronous ``sqlite3`` module because fail2ban itself uses
|
||||
synchronous writes and the schema is straightforward.
|
||||
|
||||
Args:
|
||||
f2b_db_path: Filesystem path to the fail2ban SQLite database.
|
||||
|
||||
Returns:
|
||||
List of all IP addresses inserted.
|
||||
"""
|
||||
now = int(time.time())
|
||||
ips: list[str] = [_random_ip() for _ in range(_BAN_COUNT)]
|
||||
rows = [
|
||||
(
|
||||
random.choice(_JAIL_POOL),
|
||||
ip,
|
||||
now - random.randint(0, _YEAR_SECONDS),
|
||||
3600,
|
||||
random.randint(1, 10),
|
||||
None,
|
||||
)
|
||||
for ip in ips
|
||||
]
|
||||
|
||||
with sqlite3.connect(f2b_db_path) as con:
|
||||
# Ensure the bans table exists (for dev environments where fail2ban
|
||||
# may not have created it yet).
|
||||
con.execute(
|
||||
"CREATE TABLE IF NOT EXISTS bans ("
|
||||
"jail TEXT NOT NULL, "
|
||||
"ip TEXT, "
|
||||
"timeofban INTEGER NOT NULL, "
|
||||
"bantime INTEGER NOT NULL DEFAULT 3600, "
|
||||
"bancount INTEGER NOT NULL DEFAULT 1, "
|
||||
"data JSON"
|
||||
")"
|
||||
)
|
||||
con.executemany(
|
||||
"INSERT INTO bans (jail, ip, timeofban, bantime, bancount, data) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
con.commit()
|
||||
|
||||
log.info("Inserted %d ban rows into %s", _BAN_COUNT, f2b_db_path)
|
||||
return ips
|
||||
|
||||
|
||||
def _seed_geo_cache(app_db_path: str, ips: list[str]) -> None:
|
||||
"""Pre-populate the BanGUI geo_cache table for all inserted IPs.
|
||||
|
||||
Assigns synthetic country data cycling through :data:`_COUNTRY_POOL` so
|
||||
the world map shows a realistic distribution of countries without making
|
||||
any real HTTP requests.
|
||||
|
||||
Args:
|
||||
app_db_path: Filesystem path to the BanGUI application database.
|
||||
ips: List of IP addresses to pre-cache.
|
||||
"""
|
||||
country_cycle = _COUNTRY_POOL * (len(ips) // len(_COUNTRY_POOL) + 1)
|
||||
rows = [
|
||||
(ip, cc, cn, f"AS{1000 + i % 500}", f"Synthetic ISP {i % 50}")
|
||||
for i, (ip, (cc, cn)) in enumerate(zip(ips, country_cycle, strict=False))
|
||||
]
|
||||
|
||||
with sqlite3.connect(app_db_path) as con:
|
||||
con.execute(
|
||||
"CREATE TABLE IF NOT EXISTS geo_cache ("
|
||||
"ip TEXT PRIMARY KEY, "
|
||||
"country_code TEXT, "
|
||||
"country_name TEXT, "
|
||||
"asn TEXT, "
|
||||
"org TEXT, "
|
||||
"cached_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now'))"
|
||||
")"
|
||||
)
|
||||
con.executemany(
|
||||
"""
|
||||
INSERT INTO geo_cache (ip, country_code, country_name, asn, org)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
ON CONFLICT(ip) DO UPDATE SET
|
||||
country_code = excluded.country_code,
|
||||
country_name = excluded.country_name,
|
||||
asn = excluded.asn,
|
||||
org = excluded.org
|
||||
""",
|
||||
rows,
|
||||
)
|
||||
con.commit()
|
||||
|
||||
log.info("Pre-cached geo data for %d IPs in %s", len(ips), app_db_path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Parse CLI arguments and run the seed operation."""
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Seed 10 000 synthetic bans for performance testing."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--f2b-db",
|
||||
default=_DEFAULT_F2B_DB,
|
||||
help=f"Path to the fail2ban SQLite database (default: {_DEFAULT_F2B_DB})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--app-db",
|
||||
default=_DEFAULT_APP_DB,
|
||||
help=f"Path to the BanGUI application database (default: {_DEFAULT_APP_DB})",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
f2b_path = Path(args.f2b_db)
|
||||
app_path = Path(args.app_db)
|
||||
|
||||
if not f2b_path.parent.exists():
|
||||
log.error("fail2ban DB directory does not exist: %s", f2b_path.parent)
|
||||
sys.exit(1)
|
||||
|
||||
if not app_path.parent.exists():
|
||||
log.error("App DB directory does not exist: %s", app_path.parent)
|
||||
sys.exit(1)
|
||||
|
||||
log.info("Seeding %d bans into: %s", _BAN_COUNT, f2b_path)
|
||||
ips = _seed_bans(str(f2b_path))
|
||||
|
||||
log.info("Pre-caching geo data into: %s", app_path)
|
||||
_seed_geo_cache(str(app_path), ips)
|
||||
|
||||
log.info("Done. Restart the BanGUI backend to load the new geo cache entries.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
257
backend/tests/test_services/test_ban_service_perf.py
Normal file
257
backend/tests/test_services/test_ban_service_perf.py
Normal file
@@ -0,0 +1,257 @@
|
||||
"""Performance benchmark for ban_service with 10 000+ banned IPs.
|
||||
|
||||
These tests assert that both ``list_bans`` and ``bans_by_country`` complete
|
||||
within 2 seconds wall-clock time when the geo cache is warm and the fail2ban
|
||||
database contains 10 000 synthetic ban records.
|
||||
|
||||
External network calls are eliminated by pre-populating the in-memory geo
|
||||
cache before the timed section, so the benchmark measures only the database
|
||||
query and in-process aggregation overhead.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
import time
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import aiosqlite
|
||||
import pytest
|
||||
|
||||
from app.services import ban_service, geo_service
|
||||
from app.services.geo_service import GeoInfo
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_BAN_COUNT: int = 10_000
|
||||
_WALL_CLOCK_LIMIT: float = 2.0 # seconds
|
||||
|
||||
_NOW: int = int(time.time())
|
||||
|
||||
#: Country codes to cycle through when generating synthetic geo data.
|
||||
_COUNTRIES: list[tuple[str, str]] = [
|
||||
("DE", "Germany"),
|
||||
("US", "United States"),
|
||||
("CN", "China"),
|
||||
("RU", "Russia"),
|
||||
("FR", "France"),
|
||||
("BR", "Brazil"),
|
||||
("IN", "India"),
|
||||
("GB", "United Kingdom"),
|
||||
]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _random_ip() -> str:
|
||||
"""Generate a random-looking public IPv4 address string.
|
||||
|
||||
Returns:
|
||||
Dotted-decimal string with each octet in range 1–254.
|
||||
"""
|
||||
return ".".join(str(random.randint(1, 254)) for _ in range(4))
|
||||
|
||||
|
||||
def _random_jail() -> str:
|
||||
"""Pick a jail name from a small pool.
|
||||
|
||||
Returns:
|
||||
One of ``sshd``, ``nginx``, ``blocklist-import``.
|
||||
"""
|
||||
return random.choice(["sshd", "nginx", "blocklist-import"])
|
||||
|
||||
|
||||
async def _seed_f2b_db(path: str, n: int) -> list[str]:
|
||||
"""Create a fail2ban SQLite database with *n* synthetic ban rows.
|
||||
|
||||
Bans are spread uniformly over the last 365 days.
|
||||
|
||||
Args:
|
||||
path: Filesystem path for the new database.
|
||||
n: Number of rows to insert.
|
||||
|
||||
Returns:
|
||||
List of all unique IP address strings inserted.
|
||||
"""
|
||||
year_seconds = 365 * 24 * 3600
|
||||
ips: list[str] = [_random_ip() for _ in range(n)]
|
||||
|
||||
async with aiosqlite.connect(path) as db:
|
||||
await db.execute(
|
||||
"CREATE TABLE jails ("
|
||||
"name TEXT NOT NULL UNIQUE, "
|
||||
"enabled INTEGER NOT NULL DEFAULT 1"
|
||||
")"
|
||||
)
|
||||
await db.execute(
|
||||
"CREATE TABLE bans ("
|
||||
"jail TEXT NOT NULL, "
|
||||
"ip TEXT, "
|
||||
"timeofban INTEGER NOT NULL, "
|
||||
"bantime INTEGER NOT NULL DEFAULT 3600, "
|
||||
"bancount INTEGER NOT NULL DEFAULT 1, "
|
||||
"data JSON"
|
||||
")"
|
||||
)
|
||||
rows = [
|
||||
(_random_jail(), ip, _NOW - random.randint(0, year_seconds), 3600, 1, None)
|
||||
for ip in ips
|
||||
]
|
||||
await db.executemany(
|
||||
"INSERT INTO bans (jail, ip, timeofban, bantime, bancount, data) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
rows,
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
return ips
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def event_loop_policy() -> None: # type: ignore[misc]
|
||||
"""Use the default event loop policy for module-scoped fixtures."""
|
||||
return None
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def perf_db_path(tmp_path_factory: Any) -> str: # type: ignore[misc]
|
||||
"""Return the path to a fail2ban DB seeded with 10 000 synthetic bans.
|
||||
|
||||
Module-scoped so the database is created only once for all perf tests.
|
||||
"""
|
||||
tmp_path = tmp_path_factory.mktemp("perf")
|
||||
path = str(tmp_path / "fail2ban_perf.sqlite3")
|
||||
ips = await _seed_f2b_db(path, _BAN_COUNT)
|
||||
|
||||
# Pre-populate the in-memory geo cache so no network calls are made.
|
||||
geo_service.clear_cache()
|
||||
country_cycle = _COUNTRIES * (_BAN_COUNT // len(_COUNTRIES) + 1)
|
||||
for i, ip in enumerate(ips):
|
||||
cc, cn = country_cycle[i]
|
||||
geo_service._cache[ip] = GeoInfo( # noqa: SLF001 (test-only direct access)
|
||||
country_code=cc,
|
||||
country_name=cn,
|
||||
asn=f"AS{1000 + i % 500}",
|
||||
org="Synthetic ISP",
|
||||
)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Benchmark tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBanServicePerformance:
|
||||
"""Wall-clock performance assertions for the ban service."""
|
||||
|
||||
async def test_list_bans_returns_within_time_limit(
|
||||
self, perf_db_path: str
|
||||
) -> None:
|
||||
"""``list_bans`` with 10 000 bans completes in under 2 seconds."""
|
||||
|
||||
async def noop_enricher(ip: str) -> GeoInfo | None:
|
||||
return geo_service._cache.get(ip) # noqa: SLF001
|
||||
|
||||
with patch(
|
||||
"app.services.ban_service._get_fail2ban_db_path",
|
||||
new=AsyncMock(return_value=perf_db_path),
|
||||
):
|
||||
start = time.perf_counter()
|
||||
result = await ban_service.list_bans(
|
||||
"/fake/sock",
|
||||
"365d",
|
||||
page=1,
|
||||
page_size=100,
|
||||
geo_enricher=noop_enricher,
|
||||
)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
assert result.total == _BAN_COUNT, (
|
||||
f"Expected {_BAN_COUNT} total bans, got {result.total}"
|
||||
)
|
||||
assert len(result.items) == 100
|
||||
assert elapsed < _WALL_CLOCK_LIMIT, (
|
||||
f"list_bans took {elapsed:.2f}s — must be < {_WALL_CLOCK_LIMIT}s"
|
||||
)
|
||||
|
||||
async def test_bans_by_country_returns_within_time_limit(
|
||||
self, perf_db_path: str
|
||||
) -> None:
|
||||
"""``bans_by_country`` with 10 000 bans completes in under 2 seconds."""
|
||||
|
||||
async def noop_enricher(ip: str) -> GeoInfo | None:
|
||||
return geo_service._cache.get(ip) # noqa: SLF001
|
||||
|
||||
with patch(
|
||||
"app.services.ban_service._get_fail2ban_db_path",
|
||||
new=AsyncMock(return_value=perf_db_path),
|
||||
):
|
||||
start = time.perf_counter()
|
||||
result = await ban_service.bans_by_country(
|
||||
"/fake/sock",
|
||||
"365d",
|
||||
geo_enricher=noop_enricher,
|
||||
)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
assert result.total == _BAN_COUNT
|
||||
assert len(result.countries) > 0 # At least one country resolved
|
||||
assert elapsed < _WALL_CLOCK_LIMIT, (
|
||||
f"bans_by_country took {elapsed:.2f}s — must be < {_WALL_CLOCK_LIMIT}s"
|
||||
)
|
||||
|
||||
async def test_list_bans_country_data_populated(
|
||||
self, perf_db_path: str
|
||||
) -> None:
|
||||
"""All returned items have geo data from the warm cache."""
|
||||
|
||||
async def noop_enricher(ip: str) -> GeoInfo | None:
|
||||
return geo_service._cache.get(ip) # noqa: SLF001
|
||||
|
||||
with patch(
|
||||
"app.services.ban_service._get_fail2ban_db_path",
|
||||
new=AsyncMock(return_value=perf_db_path),
|
||||
):
|
||||
result = await ban_service.list_bans(
|
||||
"/fake/sock",
|
||||
"365d",
|
||||
page=1,
|
||||
page_size=100,
|
||||
geo_enricher=noop_enricher,
|
||||
)
|
||||
|
||||
# Every item should have a country because the cache is warm.
|
||||
missing = [i for i in result.items if i.country_code is None]
|
||||
assert missing == [], f"{len(missing)} items missing country_code"
|
||||
|
||||
async def test_bans_by_country_aggregation_correct(
|
||||
self, perf_db_path: str
|
||||
) -> None:
|
||||
"""Country aggregation sums across all 10 000 bans."""
|
||||
|
||||
async def noop_enricher(ip: str) -> GeoInfo | None:
|
||||
return geo_service._cache.get(ip) # noqa: SLF001
|
||||
|
||||
with patch(
|
||||
"app.services.ban_service._get_fail2ban_db_path",
|
||||
new=AsyncMock(return_value=perf_db_path),
|
||||
):
|
||||
result = await ban_service.bans_by_country(
|
||||
"/fake/sock",
|
||||
"365d",
|
||||
geo_enricher=noop_enricher,
|
||||
)
|
||||
|
||||
total_in_countries = sum(result.countries.values())
|
||||
# Total bans in country map should equal total bans (all IPs are cached).
|
||||
assert total_in_countries == _BAN_COUNT, (
|
||||
f"Country sum {total_in_countries} != total {_BAN_COUNT}"
|
||||
)
|
||||
@@ -166,8 +166,8 @@ class TestLookupCaching:
|
||||
|
||||
assert session.get.call_count == 2
|
||||
|
||||
async def test_negative_result_cached(self) -> None:
|
||||
"""A failed lookup result (status != success) is also cached."""
|
||||
async def test_negative_result_not_cached(self) -> None:
|
||||
"""A failed lookup (status != success) is NOT cached so it is retried."""
|
||||
session = _make_session(
|
||||
{"status": "fail", "message": "reserved range"}
|
||||
)
|
||||
@@ -175,7 +175,8 @@ class TestLookupCaching:
|
||||
await geo_service.lookup("192.168.1.1", session) # type: ignore[arg-type]
|
||||
await geo_service.lookup("192.168.1.1", session) # type: ignore[arg-type]
|
||||
|
||||
assert session.get.call_count == 1
|
||||
# Failed lookups must not be cached — both calls must reach the API.
|
||||
assert session.get.call_count == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -201,7 +202,7 @@ class TestLookupFailures:
|
||||
assert result is None
|
||||
|
||||
async def test_failed_status_returns_geo_info_with_nulls(self) -> None:
|
||||
"""When ip-api returns ``status=fail`` a GeoInfo with null fields is cached."""
|
||||
"""When ip-api returns ``status=fail`` a GeoInfo with null fields is returned (but not cached)."""
|
||||
session = _make_session({"status": "fail", "message": "private range"})
|
||||
result = await geo_service.lookup("10.0.0.1", session) # type: ignore[arg-type]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user