Fix geo cache write performance: batch commits, read-only GETs, dirty flush

- Remove per-IP db.commit() from _persist_entry() and _persist_neg_entry();
  add a single commit after the full lookup_batch() chunk loop instead.
  Reduces commits from ~5,200 to 1 per bans/by-country request.

- Remove db dependency from GET /api/dashboard/bans and
  GET /api/dashboard/bans/by-country; pass app_db=None so no SQLite
  writes occur during read-only requests.

- Add _dirty set to geo_service; _store() marks resolved IPs dirty.
  New flush_dirty(db) batch-upserts all dirty entries in one transaction.
  New geo_cache_flush APScheduler task flushes every 60 s so geo data
  is persisted without blocking requests.
This commit is contained in:
2026-03-10 18:45:58 +01:00
parent 0225f32901
commit 44a5a3d70e
6 changed files with 505 additions and 34 deletions

View File

@@ -118,6 +118,10 @@ _cache: dict[str, GeoInfo] = {}
#: Entries within :data:`_NEG_CACHE_TTL` seconds are not re-queried.
_neg_cache: dict[str, float] = {}
#: IPs added to :data:`_cache` but not yet persisted to the database.
#: Consumed and cleared atomically by :func:`flush_dirty`.
_dirty: set[str] = set()
#: Optional MaxMind GeoLite2 reader initialised by :func:`init_geoip`.
_geoip_reader: geoip2.database.Reader | None = None
@@ -125,10 +129,12 @@ _geoip_reader: geoip2.database.Reader | None = None
def clear_cache() -> None:
"""Flush both the positive and negative lookup caches.
Useful in tests and when the operator suspects stale data.
Also clears the dirty set so any pending-but-unpersisted entries are
discarded. Useful in tests and when the operator suspects stale data.
"""
_cache.clear()
_neg_cache.clear()
_dirty.clear()
def clear_neg_cache() -> None:
@@ -256,7 +262,6 @@ async def _persist_entry(
""",
(ip, info.country_code, info.country_name, info.asn, info.org),
)
await db.commit()
async def _persist_neg_entry(db: aiosqlite.Connection, ip: str) -> None:
@@ -273,7 +278,6 @@ async def _persist_neg_entry(db: aiosqlite.Connection, ip: str) -> None:
"INSERT OR IGNORE INTO geo_cache (ip) VALUES (?)",
(ip,),
)
await db.commit()
# ---------------------------------------------------------------------------
@@ -330,6 +334,7 @@ async def lookup(
if result.country_code is not None and db is not None:
try:
await _persist_entry(db, ip, result)
await db.commit()
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_lookup_success", ip=ip, country=result.country_code, asn=result.asn)
@@ -350,6 +355,7 @@ async def lookup(
if fallback.country_code is not None and db is not None:
try:
await _persist_entry(db, ip, fallback)
await db.commit()
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_geoip_fallback_success", ip=ip, country=fallback.country_code)
@@ -360,6 +366,7 @@ async def lookup(
if db is not None:
try:
await _persist_neg_entry(db, ip)
await db.commit()
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_neg_failed", ip=ip, error=str(exc))
@@ -449,6 +456,12 @@ async def lookup_batch(
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_neg_failed", ip=ip, error=str(exc))
if db is not None:
try:
await db.commit()
except Exception as exc: # noqa: BLE001
log.warning("geo_batch_commit_failed", error=str(exc))
log.info(
"geo_batch_lookup_complete",
requested=len(uncached),
@@ -561,11 +574,77 @@ def _str_or_none(value: object) -> str | None:
def _store(ip: str, info: GeoInfo) -> None:
"""Insert *info* into the module-level cache, flushing if over capacity.
When the IP resolved successfully (``country_code is not None``) it is
also added to the :data:`_dirty` set so :func:`flush_dirty` can persist
it to the database on the next scheduled flush.
Args:
ip: The IP address key.
info: The :class:`GeoInfo` to store.
"""
if len(_cache) >= _MAX_CACHE_SIZE:
_cache.clear()
_dirty.clear()
log.info("geo_cache_flushed", reason="capacity")
_cache[ip] = info
if info.country_code is not None:
_dirty.add(ip)
async def flush_dirty(db: aiosqlite.Connection) -> int:
"""Persist all new in-memory geo entries to the ``geo_cache`` table.
Takes an atomic snapshot of :data:`_dirty`, clears it, then batch-inserts
all entries that are still present in :data:`_cache` using a single
``executemany`` call and one ``COMMIT``. This is the only place that
writes to the persistent cache during normal operation after startup.
If the database write fails the entries are re-added to :data:`_dirty`
so they will be retried on the next flush cycle.
Args:
db: Open :class:`aiosqlite.Connection` to the BanGUI application
database.
Returns:
The number of rows successfully upserted.
"""
if not _dirty:
return 0
# Atomically snapshot and clear in a single-threaded async context.
# No ``await`` between copy and clear ensures no interleaving.
to_flush = _dirty.copy()
_dirty.clear()
rows = [
(ip, _cache[ip].country_code, _cache[ip].country_name, _cache[ip].asn, _cache[ip].org)
for ip in to_flush
if ip in _cache
]
if not rows:
return 0
try:
await db.executemany(
"""
INSERT INTO geo_cache (ip, country_code, country_name, asn, org)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(ip) DO UPDATE SET
country_code = excluded.country_code,
country_name = excluded.country_name,
asn = excluded.asn,
org = excluded.org,
cached_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
""",
rows,
)
await db.commit()
except Exception as exc: # noqa: BLE001
log.warning("geo_flush_dirty_failed", error=str(exc))
# Re-add to dirty so they are retried on the next flush cycle.
_dirty.update(to_flush)
return 0
log.info("geo_flush_dirty_complete", count=len(rows))
return len(rows)