Fix geo cache write performance: batch commits, read-only GETs, dirty flush
- Remove per-IP db.commit() from _persist_entry() and _persist_neg_entry(); add a single commit after the full lookup_batch() chunk loop instead. Reduces commits from ~5,200 to 1 per bans/by-country request. - Remove db dependency from GET /api/dashboard/bans and GET /api/dashboard/bans/by-country; pass app_db=None so no SQLite writes occur during read-only requests. - Add _dirty set to geo_service; _store() marks resolved IPs dirty. New flush_dirty(db) batch-upserts all dirty entries in one transaction. New geo_cache_flush APScheduler task flushes every 60 s so geo data is persisted without blocking requests.
This commit is contained in:
@@ -356,3 +356,212 @@ class TestGeoipFallback:
|
||||
|
||||
assert result is not None
|
||||
assert result.country_code is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Batch single-commit behaviour (Task 1)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_batch_session(batch_response: list[dict[str, object]]) -> MagicMock:
|
||||
"""Build a mock aiohttp.ClientSession for batch POST calls.
|
||||
|
||||
Args:
|
||||
batch_response: The list that the mock response's ``json()`` returns.
|
||||
|
||||
Returns:
|
||||
A :class:`MagicMock` with a ``post`` method wired as an async context.
|
||||
"""
|
||||
mock_resp = AsyncMock()
|
||||
mock_resp.status = 200
|
||||
mock_resp.json = AsyncMock(return_value=batch_response)
|
||||
|
||||
mock_ctx = AsyncMock()
|
||||
mock_ctx.__aenter__ = AsyncMock(return_value=mock_resp)
|
||||
mock_ctx.__aexit__ = AsyncMock(return_value=False)
|
||||
|
||||
session = MagicMock()
|
||||
session.post = MagicMock(return_value=mock_ctx)
|
||||
return session
|
||||
|
||||
|
||||
def _make_async_db() -> MagicMock:
|
||||
"""Build a minimal mock :class:`aiosqlite.Connection`.
|
||||
|
||||
Returns:
|
||||
MagicMock with ``execute``, ``executemany``, and ``commit`` wired as
|
||||
async coroutines.
|
||||
"""
|
||||
db = MagicMock()
|
||||
db.execute = AsyncMock()
|
||||
db.executemany = AsyncMock()
|
||||
db.commit = AsyncMock()
|
||||
return db
|
||||
|
||||
|
||||
class TestLookupBatchSingleCommit:
|
||||
"""lookup_batch() issues exactly one commit per call, not one per IP."""
|
||||
|
||||
async def test_single_commit_for_multiple_ips(self) -> None:
|
||||
"""A batch of N IPs produces exactly one db.commit(), not N."""
|
||||
ips = ["1.1.1.1", "2.2.2.2", "3.3.3.3"]
|
||||
batch_response = [
|
||||
{"query": ip, "status": "success", "countryCode": "DE", "country": "Germany", "as": "AS1", "org": "Org"}
|
||||
for ip in ips
|
||||
]
|
||||
session = _make_batch_session(batch_response)
|
||||
db = _make_async_db()
|
||||
|
||||
await geo_service.lookup_batch(ips, session, db=db) # type: ignore[arg-type]
|
||||
|
||||
db.commit.assert_awaited_once()
|
||||
|
||||
async def test_commit_called_even_on_failed_lookups(self) -> None:
|
||||
"""A batch with all-failed lookups still triggers one commit."""
|
||||
ips = ["10.0.0.1", "10.0.0.2"]
|
||||
batch_response = [
|
||||
{"query": ip, "status": "fail", "message": "private range"}
|
||||
for ip in ips
|
||||
]
|
||||
session = _make_batch_session(batch_response)
|
||||
db = _make_async_db()
|
||||
|
||||
await geo_service.lookup_batch(ips, session, db=db) # type: ignore[arg-type]
|
||||
|
||||
db.commit.assert_awaited_once()
|
||||
|
||||
async def test_no_commit_when_db_is_none(self) -> None:
|
||||
"""When db=None, no commit is attempted."""
|
||||
ips = ["1.1.1.1"]
|
||||
batch_response = [
|
||||
{"query": "1.1.1.1", "status": "success", "countryCode": "US", "country": "United States", "as": "AS15169", "org": "Google LLC"},
|
||||
]
|
||||
session = _make_batch_session(batch_response)
|
||||
|
||||
# Should not raise; without db there is nothing to commit.
|
||||
result = await geo_service.lookup_batch(ips, session, db=None)
|
||||
|
||||
assert result["1.1.1.1"].country_code == "US"
|
||||
|
||||
async def test_no_commit_for_all_cached_ips(self) -> None:
|
||||
"""When all IPs are already cached, no HTTP call and no commit occur."""
|
||||
geo_service._cache["5.5.5.5"] = GeoInfo( # type: ignore[attr-defined]
|
||||
country_code="FR", country_name="France", asn="AS1", org="ISP"
|
||||
)
|
||||
db = _make_async_db()
|
||||
session = _make_batch_session([])
|
||||
|
||||
result = await geo_service.lookup_batch(["5.5.5.5"], session, db=db) # type: ignore[arg-type]
|
||||
|
||||
assert result["5.5.5.5"].country_code == "FR"
|
||||
db.commit.assert_not_awaited()
|
||||
session.post.assert_not_called()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dirty-set tracking and flush_dirty (Task 3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDirtySetTracking:
|
||||
"""_store() marks successfully resolved IPs as dirty."""
|
||||
|
||||
def test_successful_resolution_adds_to_dirty(self) -> None:
|
||||
"""Storing a GeoInfo with a country_code adds the IP to _dirty."""
|
||||
info = GeoInfo(country_code="DE", country_name="Germany", asn="AS1", org="ISP")
|
||||
geo_service._store("1.2.3.4", info) # type: ignore[attr-defined]
|
||||
|
||||
assert "1.2.3.4" in geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
def test_null_country_does_not_add_to_dirty(self) -> None:
|
||||
"""Storing a GeoInfo with country_code=None must not pollute _dirty."""
|
||||
info = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
|
||||
geo_service._store("10.0.0.1", info) # type: ignore[attr-defined]
|
||||
|
||||
assert "10.0.0.1" not in geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
def test_clear_cache_also_clears_dirty(self) -> None:
|
||||
"""clear_cache() must discard any pending dirty entries."""
|
||||
info = GeoInfo(country_code="US", country_name="United States", asn="AS1", org="ISP")
|
||||
geo_service._store("8.8.8.8", info) # type: ignore[attr-defined]
|
||||
assert geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
geo_service.clear_cache()
|
||||
|
||||
assert not geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
async def test_lookup_batch_populates_dirty(self) -> None:
|
||||
"""After lookup_batch() with db=None, resolved IPs appear in _dirty."""
|
||||
ips = ["1.1.1.1", "2.2.2.2"]
|
||||
batch_response = [
|
||||
{"query": ip, "status": "success", "countryCode": "JP", "country": "Japan", "as": "AS7500", "org": "IIJ"}
|
||||
for ip in ips
|
||||
]
|
||||
session = _make_batch_session(batch_response)
|
||||
|
||||
await geo_service.lookup_batch(ips, session, db=None)
|
||||
|
||||
for ip in ips:
|
||||
assert ip in geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
|
||||
class TestFlushDirty:
|
||||
"""flush_dirty() persists dirty entries and clears the set."""
|
||||
|
||||
async def test_flush_writes_and_clears_dirty(self) -> None:
|
||||
"""flush_dirty() inserts all dirty IPs and clears _dirty afterwards."""
|
||||
info = GeoInfo(country_code="GB", country_name="United Kingdom", asn="AS2856", org="BT")
|
||||
geo_service._store("100.0.0.1", info) # type: ignore[attr-defined]
|
||||
assert "100.0.0.1" in geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
db = _make_async_db()
|
||||
count = await geo_service.flush_dirty(db)
|
||||
|
||||
assert count == 1
|
||||
db.executemany.assert_awaited_once()
|
||||
db.commit.assert_awaited_once()
|
||||
assert "100.0.0.1" not in geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
async def test_flush_returns_zero_when_nothing_dirty(self) -> None:
|
||||
"""flush_dirty() returns 0 and makes no DB calls when _dirty is empty."""
|
||||
db = _make_async_db()
|
||||
count = await geo_service.flush_dirty(db)
|
||||
|
||||
assert count == 0
|
||||
db.executemany.assert_not_awaited()
|
||||
db.commit.assert_not_awaited()
|
||||
|
||||
async def test_flush_re_adds_to_dirty_on_db_error(self) -> None:
|
||||
"""When the DB write fails, entries are re-added to _dirty for retry."""
|
||||
info = GeoInfo(country_code="AU", country_name="Australia", asn="AS1", org="ISP")
|
||||
geo_service._store("200.0.0.1", info) # type: ignore[attr-defined]
|
||||
|
||||
db = _make_async_db()
|
||||
db.executemany = AsyncMock(side_effect=OSError("disk full"))
|
||||
|
||||
count = await geo_service.flush_dirty(db)
|
||||
|
||||
assert count == 0
|
||||
assert "200.0.0.1" in geo_service._dirty # type: ignore[attr-defined]
|
||||
|
||||
async def test_flush_batch_and_lookup_batch_integration(self) -> None:
|
||||
"""lookup_batch() populates _dirty; flush_dirty() then persists them."""
|
||||
ips = ["10.1.2.3", "10.1.2.4"]
|
||||
batch_response = [
|
||||
{"query": ip, "status": "success", "countryCode": "CA", "country": "Canada", "as": "AS812", "org": "Bell"}
|
||||
for ip in ips
|
||||
]
|
||||
session = _make_batch_session(batch_response)
|
||||
|
||||
# Resolve without DB to populate only in-memory cache and _dirty.
|
||||
await geo_service.lookup_batch(ips, session, db=None)
|
||||
assert geo_service._dirty == set(ips) # type: ignore[attr-defined]
|
||||
|
||||
# Now flush to the DB.
|
||||
db = _make_async_db()
|
||||
count = await geo_service.flush_dirty(db)
|
||||
|
||||
assert count == 2
|
||||
assert not geo_service._dirty # type: ignore[attr-defined]
|
||||
db.commit.assert_awaited_once()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user