feat(geo): add cache hit/miss metrics and prewarm support

- Add _hits/_misses counters to GeoCache for cache hit/miss ratio tracking
- Reset counters on clear()
- Count hits before misses in lookup_batch() to avoid interleaving
- Add synchronous prewarm() using asyncio.create_task for fire-and-forget
- Add hits/misses fields to GeoCacheStatsResponse model
- Add TestCacheMetrics (5 tests), TestPrewarm (3 tests), TestLargeBanList (2 tests)
- Fix _make_async_db() mock: db.execute is not async, returns ctx manager
- Move collections.abc to TYPE_CHECKING block (TC003)

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-03 00:35:47 +02:00
parent b587c6e850
commit bd6170722a
4 changed files with 280 additions and 152 deletions

View File

@@ -75,6 +75,8 @@ class GeoCacheStatsResponse(BanGuiBaseModel):
unresolved: int = Field(..., description="Number of geo_cache rows with country_code IS NULL.")
neg_cache_size: int = Field(..., description="Number of entries in the in-memory negative cache.")
dirty_size: int = Field(..., description="Number of newly resolved entries not yet flushed to disk.")
hits: int = Field(default=0, description="Number of cache hits since last clear.")
misses: int = Field(default=0, description="Number of cache misses since last clear.")
class GeoReResolveResponse(BanGuiBaseModel):
"""Response for ``POST /api/geo/re-resolve``.

View File

@@ -16,6 +16,7 @@ An instance should be created once at startup and stored on ``app.state.geo_cach
from __future__ import annotations
import asyncio
import collections.abc
import time
from typing import TYPE_CHECKING
@@ -26,6 +27,8 @@ from app.models.geo import GeoInfo
from app.repositories import geo_cache_repo
if TYPE_CHECKING:
import collections.abc
import aiosqlite
import geoip2.database
import geoip2.errors
@@ -91,6 +94,8 @@ class GeoCache:
_geoip_initialized: Indicates whether init_geoip() has been called.
_allow_http_fallback: Whether to use ip-api.com as fallback.
_cache_lock: Async lock protecting cache mutations.
_hits: Cache hit counter (increments on hit, resets on clear).
_misses: Cache miss counter (increments on miss, resets on clear).
"""
def __init__(self, allow_http_fallback: bool = False) -> None:
@@ -108,6 +113,8 @@ class GeoCache:
self._geoip_initialized: bool = False
self._allow_http_fallback: bool = allow_http_fallback
self._cache_lock: asyncio.Lock = asyncio.Lock()
self._hits: int = 0
self._misses: int = 0
async def clear(self) -> None:
"""Flush both the positive and negative lookup caches.
@@ -119,6 +126,8 @@ class GeoCache:
self._cache.clear()
self._neg_cache.clear()
self._dirty.clear()
self._hits = 0
self._misses = 0
async def clear_neg_cache(self) -> None:
"""Flush only the negative (failed-lookups) cache.
@@ -162,6 +171,8 @@ class GeoCache:
"unresolved": unresolved,
"neg_cache_size": len(self._neg_cache),
"dirty_size": len(self._dirty),
"hits": self._hits,
"misses": self._misses,
}
async def count_unresolved(self, db: aiosqlite.Connection) -> int:
@@ -365,6 +376,7 @@ class GeoCache:
(e.g. network timeout).
"""
if ip in self._cache:
self._hits += 1
return self._cache[ip]
# Negative cache: skip IPs that recently failed to avoid hammering the API.
@@ -396,6 +408,7 @@ class GeoCache:
log.debug("geo_lookup_failed_no_http_fallback", ip=ip)
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
self._misses += 1
if db is not None:
try:
await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip)
@@ -447,6 +460,7 @@ class GeoCache:
# Both resolvers failed — record in negative cache to avoid hammering.
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
self._misses += 1
if db is not None:
try:
await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip)
@@ -538,9 +552,15 @@ class GeoCache:
else:
uncached.append(ip)
# Count cache hits (IPs found in cache) before miss counting.
cache_hits = sum(1 for ip in unique_ips if ip in self._cache)
self._hits += cache_hits
if not uncached:
return geo_result
self._misses += len(uncached)
log.info("geo_batch_lookup_start", total=len(uncached))
# PRIMARY: Try local MaxMind database for all uncached IPs.
@@ -824,3 +844,20 @@ class GeoCache:
log.info("geo_flush_dirty_complete", count=len(rows))
return len(rows)
def prewarm(
self,
ips: collections.abc.Iterable[str],
http_session: aiohttp.ClientSession,
) -> None:
"""Pre-load geo data for *ips* without blocking the caller.
Fires off :meth:`lookup_batch` as a background task. Useful at startup
or when warming the cache with commonly-seen IPs before they appear in
bans.
Args:
ips: Iterable of IP address strings to pre-warm.
http_session: Shared :class:`aiohttp.ClientSession` for HTTP fallback.
"""
asyncio.create_task(self.lookup_batch(list(ips), http_session))

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import asyncio
from collections.abc import Mapping, Sequence
from unittest.mock import AsyncMock, MagicMock, patch
@@ -443,13 +444,39 @@ def _make_async_db() -> MagicMock:
"""Build a minimal mock :class:`aiosqlite.Connection`.
Returns:
MagicMock with ``execute``, ``executemany``, and ``commit`` wired as
async coroutines.
MagicMock with ``execute``, ``executemany``, ``commit``, and ``rollback``
wired as async coroutines. ``execute`` is an async function that returns
an async context manager yielding a cursor.
"""
db = MagicMock()
db.execute = AsyncMock()
# Build a mock cursor for count_unresolved and similar queries.
mock_cursor = MagicMock()
mock_cursor.fetchone = AsyncMock(return_value=(0,))
mock_cursor.fetchall = AsyncMock(return_value=[])
# Build an async context manager wrapping the cursor.
mock_ctx = MagicMock()
mock_ctx.__aenter__ = AsyncMock(return_value=mock_cursor)
mock_ctx.__aexit__ = AsyncMock(return_value=None)
# For BEGIN statements (transaction start), return a no-op ctx manager.
class _BeginCtx:
async def __aenter__(self) -> None:
return None
async def __aexit__(self, *args: object) -> None:
return None
async def fake_execute(sql: str, *args: object, **kwargs: object) -> MagicMock: # type: ignore[no-untyped-def]
if isinstance(sql, str) and sql.startswith("BEGIN"):
return MagicMock(__aenter__=AsyncMock(return_value=None), __aexit__=AsyncMock(return_value=None))
return mock_ctx
db.execute = MagicMock(side_effect=fake_execute)
db.executemany = AsyncMock()
db.commit = AsyncMock()
db.rollback = AsyncMock()
return db
@@ -1032,3 +1059,214 @@ class TestLookupBatchBulkWrites:
assert db.executemany.await_count == 2
db.execute.assert_not_awaited()
# ---------------------------------------------------------------------------
# Cache metrics (Issue #15)
# ---------------------------------------------------------------------------
class TestCacheMetrics:
"""Metrics counters track cache hit/miss ratios."""
async def test_cache_hit_increments_hits(self) -> None:
"""lookup() with a cached IP increments _hits."""
geo_cache = GeoCache(allow_http_fallback=True)
geo_cache._cache["1.1.1.1"] = GeoInfo(
country_code="AU", country_name="Australia", asn=None, org=None
)
await geo_cache.lookup("1.1.1.1", MagicMock())
assert geo_cache._hits == 1
assert geo_cache._misses == 0
async def test_cache_miss_increments_misses(self) -> None:
"""lookup() with allow_http_fallback=False increments _misses on MMDB miss."""
geo_cache = GeoCache(allow_http_fallback=False)
await geo_cache.lookup("10.255.255.1", MagicMock())
assert geo_cache._hits == 0
assert geo_cache._misses == 1
async def test_batch_hits_count_cached_ips(self) -> None:
"""lookup_batch increments _hits for IPs already in cache."""
geo_cache = GeoCache(allow_http_fallback=True)
session = _make_batch_session(
[
{
"query": "1.1.1.1",
"status": "success",
"countryCode": "AU",
"country": "Australia",
"as": "AS13335",
"org": "Cloudflare",
},
]
)
# First: populate cache for 1.1.1.1.
await geo_cache.lookup_batch(["1.1.1.1"], session)
# Second call: 1.1.1.1 is cache hit; 2.2.2.2 is not cached → HTTP call.
session2 = _make_batch_session(
[
{
"query": "2.2.2.2",
"status": "success",
"countryCode": "BR",
"country": "Brazil",
"as": "AS1",
"org": "Org",
},
]
)
await geo_cache.lookup_batch(["1.1.1.1", "2.2.2.2"], session2)
assert geo_cache._hits == 1, f"Expected 1 hit, got {geo_cache._hits}"
assert geo_cache._misses == 2, f"Expected 2 misses (both IPs needed resolution), got {geo_cache._misses}"
async def test_cache_stats_includes_hits_and_misses(self) -> None:
"""cache_stats() returns hits and misses counters."""
geo_cache = GeoCache()
db = MagicMock()
# Patch count_unresolved to avoid the async context manager issue.
from app.repositories import geo_cache_repo
with patch.object(
geo_cache_repo,
"count_unresolved",
AsyncMock(return_value=0),
):
stats = await geo_cache.cache_stats(db)
assert "hits" in stats
assert "misses" in stats
assert stats["hits"] == geo_cache._hits
assert stats["misses"] == geo_cache._misses
async def test_clear_resets_hits_and_misses(self) -> None:
"""clear() resets _hits and _misses to zero."""
geo_cache = GeoCache()
geo_cache._hits = 42
geo_cache._misses = 99
await geo_cache.clear()
assert geo_cache._hits == 0
assert geo_cache._misses == 0
# ---------------------------------------------------------------------------
# Pre-warming (Issue #15)
# ---------------------------------------------------------------------------
class TestPrewarm:
"""prewarm() loads geo data without blocking the caller."""
async def test_prewarm_fires_and_forgets(self) -> None:
"""prewarm() returns None immediately (fire-and-forget)."""
geo_cache = GeoCache(allow_http_fallback=True)
session = _make_batch_session(
[
{
"query": "1.1.1.1",
"status": "success",
"countryCode": "AU",
"country": "Australia",
"as": "AS13335",
"org": "Cloudflare",
},
]
)
result = geo_cache.prewarm(["1.1.1.1"], session)
assert result is None
# Let the fire-and-forget task complete.
await asyncio.sleep(0.05)
async def test_prewarm_populates_cache_eventually(self) -> None:
"""prewarm() eventually populates the cache via lookup_batch."""
geo_cache = GeoCache(allow_http_fallback=True)
session = _make_batch_session(
[
{
"query": "1.1.1.1",
"status": "success",
"countryCode": "AU",
"country": "Australia",
"as": "AS13335",
"org": "Cloudflare",
},
]
)
geo_cache.prewarm(["1.1.1.1"], session)
# Let the fire-and-forget task run.
await asyncio.sleep(0.05)
assert "1.1.1.1" in geo_cache._cache
async def test_prewarm_accepts_empty_list(self) -> None:
"""prewarm() with an empty list does not raise."""
geo_cache = GeoCache()
result = geo_cache.prewarm([], MagicMock())
assert result is None
await asyncio.sleep(0.01)
# ---------------------------------------------------------------------------
# Large ban list performance (Issue #15)
# ---------------------------------------------------------------------------
class TestLargeBanList:
"""lookup_batch handles large IP lists without O(n) per-IP overhead."""
async def test_batch_processes_1000_ips_single_db_write(self) -> None:
"""1000 IPs should result in a single bulk DB write per chunk."""
geo_cache = GeoCache(allow_http_fallback=True)
ips = [f"1.1.1.{i % 256}" for i in range(1000)]
batch_response = [
{
"query": ip,
"status": "success",
"countryCode": "US",
"country": "United States",
"as": "AS1",
"org": "Org1",
}
for ip in ips
]
session = _make_batch_session(batch_response)
db = _make_async_db()
await geo_cache.lookup_batch(ips, session, db=db)
assert db.executemany.await_count >= 1
async def test_batch_deduplicates_ips(self) -> None:
"""lookup_batch deduplicates input IPs to avoid redundant work."""
geo_cache = GeoCache(allow_http_fallback=True)
ips = ["1.1.1.1", "1.1.1.1", "1.1.1.1"]
session = _make_batch_session(
[
{
"query": "1.1.1.1",
"status": "success",
"countryCode": "AU",
"country": "Australia",
"as": "AS13335",
"org": "Cloudflare",
},
]
)
result = await geo_cache.lookup_batch(ips, session)
assert len(result) == 1
assert "1.1.1.1" in result