Fix geo cache write performance: batch commits, read-only GETs, dirty flush
- Remove per-IP db.commit() from _persist_entry() and _persist_neg_entry(); add a single commit after the full lookup_batch() chunk loop instead. Reduces commits from ~5,200 to 1 per bans/by-country request. - Remove db dependency from GET /api/dashboard/bans and GET /api/dashboard/bans/by-country; pass app_db=None so no SQLite writes occur during read-only requests. - Add _dirty set to geo_service; _store() marks resolved IPs dirty. New flush_dirty(db) batch-upserts all dirty entries in one transaction. New geo_cache_flush APScheduler task flushes every 60 s so geo data is persisted without blocking requests.
This commit is contained in:
66
backend/app/tasks/geo_cache_flush.py
Normal file
66
backend/app/tasks/geo_cache_flush.py
Normal file
@@ -0,0 +1,66 @@
|
||||
"""Geo cache flush background task.
|
||||
|
||||
Registers an APScheduler job that periodically persists newly resolved IP
|
||||
geo entries from the in-memory ``_dirty`` set to the ``geo_cache`` table.
|
||||
|
||||
After Task 2 removed geo cache writes from GET requests, newly resolved IPs
|
||||
are only held in the in-memory cache until this task flushes them. With the
|
||||
default 60-second interval, at most one minute of new resolution results is
|
||||
at risk on an unexpected process restart.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
import structlog
|
||||
|
||||
from app.services import geo_service
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastapi import FastAPI
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
#: How often the flush job fires (seconds). Configurable tuning constant.
|
||||
GEO_FLUSH_INTERVAL: int = 60
|
||||
|
||||
#: Stable APScheduler job ID — ensures re-registration replaces, not duplicates.
|
||||
JOB_ID: str = "geo_cache_flush"
|
||||
|
||||
|
||||
async def _run_flush(app: Any) -> None:
|
||||
"""Flush the geo service dirty set to the application database.
|
||||
|
||||
Reads shared resources from ``app.state`` and delegates to
|
||||
:func:`~app.services.geo_service.flush_dirty`.
|
||||
|
||||
Args:
|
||||
app: The :class:`fastapi.FastAPI` application instance passed via
|
||||
APScheduler ``kwargs``.
|
||||
"""
|
||||
db = app.state.db
|
||||
count = await geo_service.flush_dirty(db)
|
||||
if count > 0:
|
||||
log.debug("geo_cache_flush_ran", flushed=count)
|
||||
|
||||
|
||||
def register(app: FastAPI) -> None:
|
||||
"""Add (or replace) the geo cache flush job in the application scheduler.
|
||||
|
||||
Must be called after the scheduler has been started (i.e., inside the
|
||||
lifespan handler, after ``scheduler.start()``).
|
||||
|
||||
Args:
|
||||
app: The :class:`fastapi.FastAPI` application instance whose
|
||||
``app.state.scheduler`` will receive the job.
|
||||
"""
|
||||
app.state.scheduler.add_job(
|
||||
_run_flush,
|
||||
trigger="interval",
|
||||
seconds=GEO_FLUSH_INTERVAL,
|
||||
kwargs={"app": app},
|
||||
id=JOB_ID,
|
||||
replace_existing=True,
|
||||
)
|
||||
log.info("geo_cache_flush_scheduled", interval_seconds=GEO_FLUSH_INTERVAL)
|
||||
Reference in New Issue
Block a user