Fix geo cache write performance: batch commits, read-only GETs, dirty flush

- Remove per-IP db.commit() from _persist_entry() and _persist_neg_entry();
  add a single commit after the full lookup_batch() chunk loop instead.
  Reduces commits from ~5,200 to 1 per bans/by-country request.

- Remove db dependency from GET /api/dashboard/bans and
  GET /api/dashboard/bans/by-country; pass app_db=None so no SQLite
  writes occur during read-only requests.

- Add _dirty set to geo_service; _store() marks resolved IPs dirty.
  New flush_dirty(db) batch-upserts all dirty entries in one transaction.
  New geo_cache_flush APScheduler task flushes every 60 s so geo data
  is persisted without blocking requests.
This commit is contained in:
2026-03-10 18:45:58 +01:00
parent 0225f32901
commit 44a5a3d70e
6 changed files with 505 additions and 34 deletions

View File

@@ -356,3 +356,212 @@ class TestGeoipFallback:
assert result is not None
assert result.country_code is None
# ---------------------------------------------------------------------------
# Batch single-commit behaviour (Task 1)
# ---------------------------------------------------------------------------
def _make_batch_session(batch_response: list[dict[str, object]]) -> MagicMock:
"""Build a mock aiohttp.ClientSession for batch POST calls.
Args:
batch_response: The list that the mock response's ``json()`` returns.
Returns:
A :class:`MagicMock` with a ``post`` method wired as an async context.
"""
mock_resp = AsyncMock()
mock_resp.status = 200
mock_resp.json = AsyncMock(return_value=batch_response)
mock_ctx = AsyncMock()
mock_ctx.__aenter__ = AsyncMock(return_value=mock_resp)
mock_ctx.__aexit__ = AsyncMock(return_value=False)
session = MagicMock()
session.post = MagicMock(return_value=mock_ctx)
return session
def _make_async_db() -> MagicMock:
"""Build a minimal mock :class:`aiosqlite.Connection`.
Returns:
MagicMock with ``execute``, ``executemany``, and ``commit`` wired as
async coroutines.
"""
db = MagicMock()
db.execute = AsyncMock()
db.executemany = AsyncMock()
db.commit = AsyncMock()
return db
class TestLookupBatchSingleCommit:
"""lookup_batch() issues exactly one commit per call, not one per IP."""
async def test_single_commit_for_multiple_ips(self) -> None:
"""A batch of N IPs produces exactly one db.commit(), not N."""
ips = ["1.1.1.1", "2.2.2.2", "3.3.3.3"]
batch_response = [
{"query": ip, "status": "success", "countryCode": "DE", "country": "Germany", "as": "AS1", "org": "Org"}
for ip in ips
]
session = _make_batch_session(batch_response)
db = _make_async_db()
await geo_service.lookup_batch(ips, session, db=db) # type: ignore[arg-type]
db.commit.assert_awaited_once()
async def test_commit_called_even_on_failed_lookups(self) -> None:
"""A batch with all-failed lookups still triggers one commit."""
ips = ["10.0.0.1", "10.0.0.2"]
batch_response = [
{"query": ip, "status": "fail", "message": "private range"}
for ip in ips
]
session = _make_batch_session(batch_response)
db = _make_async_db()
await geo_service.lookup_batch(ips, session, db=db) # type: ignore[arg-type]
db.commit.assert_awaited_once()
async def test_no_commit_when_db_is_none(self) -> None:
"""When db=None, no commit is attempted."""
ips = ["1.1.1.1"]
batch_response = [
{"query": "1.1.1.1", "status": "success", "countryCode": "US", "country": "United States", "as": "AS15169", "org": "Google LLC"},
]
session = _make_batch_session(batch_response)
# Should not raise; without db there is nothing to commit.
result = await geo_service.lookup_batch(ips, session, db=None)
assert result["1.1.1.1"].country_code == "US"
async def test_no_commit_for_all_cached_ips(self) -> None:
"""When all IPs are already cached, no HTTP call and no commit occur."""
geo_service._cache["5.5.5.5"] = GeoInfo( # type: ignore[attr-defined]
country_code="FR", country_name="France", asn="AS1", org="ISP"
)
db = _make_async_db()
session = _make_batch_session([])
result = await geo_service.lookup_batch(["5.5.5.5"], session, db=db) # type: ignore[arg-type]
assert result["5.5.5.5"].country_code == "FR"
db.commit.assert_not_awaited()
session.post.assert_not_called()
# ---------------------------------------------------------------------------
# Dirty-set tracking and flush_dirty (Task 3)
# ---------------------------------------------------------------------------
class TestDirtySetTracking:
"""_store() marks successfully resolved IPs as dirty."""
def test_successful_resolution_adds_to_dirty(self) -> None:
"""Storing a GeoInfo with a country_code adds the IP to _dirty."""
info = GeoInfo(country_code="DE", country_name="Germany", asn="AS1", org="ISP")
geo_service._store("1.2.3.4", info) # type: ignore[attr-defined]
assert "1.2.3.4" in geo_service._dirty # type: ignore[attr-defined]
def test_null_country_does_not_add_to_dirty(self) -> None:
"""Storing a GeoInfo with country_code=None must not pollute _dirty."""
info = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
geo_service._store("10.0.0.1", info) # type: ignore[attr-defined]
assert "10.0.0.1" not in geo_service._dirty # type: ignore[attr-defined]
def test_clear_cache_also_clears_dirty(self) -> None:
"""clear_cache() must discard any pending dirty entries."""
info = GeoInfo(country_code="US", country_name="United States", asn="AS1", org="ISP")
geo_service._store("8.8.8.8", info) # type: ignore[attr-defined]
assert geo_service._dirty # type: ignore[attr-defined]
geo_service.clear_cache()
assert not geo_service._dirty # type: ignore[attr-defined]
async def test_lookup_batch_populates_dirty(self) -> None:
"""After lookup_batch() with db=None, resolved IPs appear in _dirty."""
ips = ["1.1.1.1", "2.2.2.2"]
batch_response = [
{"query": ip, "status": "success", "countryCode": "JP", "country": "Japan", "as": "AS7500", "org": "IIJ"}
for ip in ips
]
session = _make_batch_session(batch_response)
await geo_service.lookup_batch(ips, session, db=None)
for ip in ips:
assert ip in geo_service._dirty # type: ignore[attr-defined]
class TestFlushDirty:
"""flush_dirty() persists dirty entries and clears the set."""
async def test_flush_writes_and_clears_dirty(self) -> None:
"""flush_dirty() inserts all dirty IPs and clears _dirty afterwards."""
info = GeoInfo(country_code="GB", country_name="United Kingdom", asn="AS2856", org="BT")
geo_service._store("100.0.0.1", info) # type: ignore[attr-defined]
assert "100.0.0.1" in geo_service._dirty # type: ignore[attr-defined]
db = _make_async_db()
count = await geo_service.flush_dirty(db)
assert count == 1
db.executemany.assert_awaited_once()
db.commit.assert_awaited_once()
assert "100.0.0.1" not in geo_service._dirty # type: ignore[attr-defined]
async def test_flush_returns_zero_when_nothing_dirty(self) -> None:
"""flush_dirty() returns 0 and makes no DB calls when _dirty is empty."""
db = _make_async_db()
count = await geo_service.flush_dirty(db)
assert count == 0
db.executemany.assert_not_awaited()
db.commit.assert_not_awaited()
async def test_flush_re_adds_to_dirty_on_db_error(self) -> None:
"""When the DB write fails, entries are re-added to _dirty for retry."""
info = GeoInfo(country_code="AU", country_name="Australia", asn="AS1", org="ISP")
geo_service._store("200.0.0.1", info) # type: ignore[attr-defined]
db = _make_async_db()
db.executemany = AsyncMock(side_effect=OSError("disk full"))
count = await geo_service.flush_dirty(db)
assert count == 0
assert "200.0.0.1" in geo_service._dirty # type: ignore[attr-defined]
async def test_flush_batch_and_lookup_batch_integration(self) -> None:
"""lookup_batch() populates _dirty; flush_dirty() then persists them."""
ips = ["10.1.2.3", "10.1.2.4"]
batch_response = [
{"query": ip, "status": "success", "countryCode": "CA", "country": "Canada", "as": "AS812", "org": "Bell"}
for ip in ips
]
session = _make_batch_session(batch_response)
# Resolve without DB to populate only in-memory cache and _dirty.
await geo_service.lookup_batch(ips, session, db=None)
assert geo_service._dirty == set(ips) # type: ignore[attr-defined]
# Now flush to the DB.
db = _make_async_db()
count = await geo_service.flush_dirty(db)
assert count == 2
assert not geo_service._dirty # type: ignore[attr-defined]
db.commit.assert_awaited_once()