Create GeoCache class with all mutable state as instance attributes: - _cache, _neg_cache, _dirty, _geoip_reader, _geoip_initialized, _cache_lock - All public methods: lookup(), lookup_batch(), lookup_cached_only(), flush_dirty(), load_from_db(), clear(), etc. Initialization & Dependency Injection: - Instantiate GeoCache in startup.py and store on app.state.geo_cache - Add get_geo_cache() dependency function in dependencies.py - Inject into routes and tasks via FastAPI's dependency system Backward Compatibility: - Maintain module-level functions in geo_service.py as deprecated wrappers - All old callers continue to work through _default_geo_cache instance - Remove test-escape-hatch functions (clear_cache, clear_neg_cache moved to methods) Background Tasks: - Update geo_cache_flush.py and geo_re_resolve.py to receive GeoCache instance - Tasks now operate on injected instance rather than module globals Tests: - Refactor test_geo_service.py with geo_cache fixture providing fresh instances - Update patch paths to target GeoCache methods correctly - Fix internal state assertions to access instance attributes Documentation: - Update Architekture.md to document GeoCache as managed stateful service - Describe cache lifecycle (load on startup, flush periodically, re-resolve stale) - Note process-local limitations for multi-worker deployments Fixes violation of Single Responsibility Principle: module no longer owns both lookup logic and cache lifecycle management. Cache is now a first-class injectable service with transparent lifecycle. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
738 lines
28 KiB
Python
738 lines
28 KiB
Python
"""GeoCache service — encapsulates geo service mutable state.
|
|
|
|
This module defines the :class:`GeoCache` class which encapsulates all
|
|
module-level mutable state from the original geo_service into a single
|
|
injectable instance. The cache manages:
|
|
|
|
- In-memory positive results cache (``ip → GeoInfo``)
|
|
- Negative cache (failed lookups with TTL)
|
|
- Dirty set (entries pending persistence)
|
|
- Lock protecting cache mutations
|
|
- MaxMind GeoLite2 reader initialization
|
|
|
|
An instance should be created once at startup and stored on ``app.state.geo_cache``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import time
|
|
from typing import TYPE_CHECKING
|
|
|
|
import aiohttp
|
|
import structlog
|
|
|
|
from app.models.geo import GeoInfo
|
|
from app.repositories import geo_cache_repo
|
|
|
|
if TYPE_CHECKING:
|
|
import aiosqlite
|
|
import geoip2.database
|
|
import geoip2.errors
|
|
|
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constants
|
|
# ---------------------------------------------------------------------------
|
|
|
|
#: ip-api.com single-IP lookup endpoint (HTTP only on the free tier).
|
|
_API_URL: str = (
|
|
"http://ip-api.com/json/{ip}?fields=status,message,country,countryCode,org,as"
|
|
)
|
|
|
|
#: ip-api.com batch endpoint — accepts up to 100 IPs per POST.
|
|
_BATCH_API_URL: str = (
|
|
"http://ip-api.com/batch?fields=status,message,country,countryCode,org,as,query"
|
|
)
|
|
|
|
#: Maximum IPs per batch request (ip-api.com hard limit is 100).
|
|
_BATCH_SIZE: int = 100
|
|
|
|
#: Maximum number of entries kept in the in-process cache before it is
|
|
#: flushed completely. A simple eviction strategy — the cache is cheap to
|
|
#: rebuild from the persistent store.
|
|
_MAX_CACHE_SIZE: int = 50_000
|
|
|
|
#: Timeout for outgoing geo API requests in seconds.
|
|
_REQUEST_TIMEOUT: float = 5.0
|
|
|
|
#: How many seconds a failed lookup result is suppressed before the IP is
|
|
#: eligible for a new API attempt. Default: 5 minutes.
|
|
_NEG_CACHE_TTL: float = 300.0
|
|
|
|
#: Minimum delay in seconds between consecutive batch HTTP requests to
|
|
#: ip-api.com. The free tier allows 45 requests/min; 1.5 s ≈ 40 req/min.
|
|
_BATCH_DELAY: float = 1.5
|
|
|
|
#: Maximum number of retries for a batch chunk that fails with a
|
|
#: transient error (e.g. connection reset due to rate limiting).
|
|
_BATCH_MAX_RETRIES: int = 2
|
|
|
|
|
|
class GeoCache:
|
|
"""Manages IP geolocation caching with positive and negative caches.
|
|
|
|
Encapsulates all mutable state needed for geo-IP resolution. Provides
|
|
methods for single lookups, batch lookups, persistence, and cache management.
|
|
|
|
State:
|
|
_cache: In-memory positive results cache (``ip → GeoInfo``).
|
|
_neg_cache: Failed lookup timestamps (``ip → epoch``).
|
|
_dirty: IPs added but not yet persisted to database.
|
|
_geoip_reader: Optional MaxMind GeoLite2 reader.
|
|
_geoip_initialized: Indicates whether init_geoip() has been called.
|
|
_cache_lock: Async lock protecting cache mutations.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialize an empty GeoCache."""
|
|
self._cache: dict[str, GeoInfo] = {}
|
|
self._neg_cache: dict[str, float] = {}
|
|
self._dirty: set[str] = set()
|
|
self._geoip_reader: geoip2.database.Reader | None = None
|
|
self._geoip_initialized: bool = False
|
|
self._cache_lock: asyncio.Lock = asyncio.Lock()
|
|
|
|
async def clear(self) -> None:
|
|
"""Flush both the positive and negative lookup caches.
|
|
|
|
Also clears the dirty set so any pending-but-unpersisted entries are
|
|
discarded. Useful in tests and when the operator suspects stale data.
|
|
"""
|
|
async with self._cache_lock:
|
|
self._cache.clear()
|
|
self._neg_cache.clear()
|
|
self._dirty.clear()
|
|
|
|
async def clear_neg_cache(self) -> None:
|
|
"""Flush only the negative (failed-lookups) cache.
|
|
|
|
Useful when triggering a manual re-resolve so that previously failed
|
|
IPs are immediately eligible for a new API attempt.
|
|
"""
|
|
async with self._cache_lock:
|
|
self._neg_cache.clear()
|
|
|
|
def is_cached(self, ip: str) -> bool:
|
|
"""Return ``True`` if *ip* has a positive entry in the in-memory cache.
|
|
|
|
A positive entry is one with a non-``None`` ``country_code``.
|
|
|
|
Args:
|
|
ip: IPv4 or IPv6 address string.
|
|
|
|
Returns:
|
|
``True`` when *ip* is in the cache with a known country code.
|
|
"""
|
|
return ip in self._cache and self._cache[ip].country_code is not None
|
|
|
|
async def cache_stats(self, db: aiosqlite.Connection) -> dict[str, int]:
|
|
"""Return diagnostic counters for the geo cache subsystem.
|
|
|
|
Queries the persistent store for the number of unresolved entries and
|
|
combines it with in-memory counters.
|
|
|
|
Args:
|
|
db: Open BanGUI application database connection.
|
|
|
|
Returns:
|
|
Dict with keys ``cache_size``, ``unresolved``, ``neg_cache_size``,
|
|
and ``dirty_size``.
|
|
"""
|
|
unresolved = await geo_cache_repo.count_unresolved(db)
|
|
|
|
return {
|
|
"cache_size": len(self._cache),
|
|
"unresolved": unresolved,
|
|
"neg_cache_size": len(self._neg_cache),
|
|
"dirty_size": len(self._dirty),
|
|
}
|
|
|
|
async def count_unresolved(self, db: aiosqlite.Connection) -> int:
|
|
"""Return the number of unresolved entries in the persistent geo cache."""
|
|
return await geo_cache_repo.count_unresolved(db)
|
|
|
|
async def get_unresolved_ips(self, db: aiosqlite.Connection) -> list[str]:
|
|
"""Return geo cache IPs where the country code has not yet been resolved.
|
|
|
|
Args:
|
|
db: Open BanGUI application database connection.
|
|
|
|
Returns:
|
|
List of IP addresses that are candidates for re-resolution.
|
|
"""
|
|
return await geo_cache_repo.get_unresolved_ips(db)
|
|
|
|
async def re_resolve_all(
|
|
self,
|
|
db: aiosqlite.Connection,
|
|
http_session: aiohttp.ClientSession,
|
|
) -> dict[str, int]:
|
|
"""Retry geo resolution for all unresolved cache entries.
|
|
|
|
This helper clears the in-memory negative cache before attempting a
|
|
fresh batch lookup, then returns counters for how many IPs were retried
|
|
and how many gained a resolved country code.
|
|
|
|
Args:
|
|
db: BanGUI application database connection.
|
|
http_session: Shared aiohttp client session.
|
|
|
|
Returns:
|
|
A dict with ``resolved`` and ``total`` counts.
|
|
"""
|
|
import structlog # noqa: PLC0415
|
|
|
|
log = structlog.get_logger()
|
|
unresolved = await self.get_unresolved_ips(db)
|
|
if not unresolved:
|
|
return {"resolved": 0, "total": 0}
|
|
|
|
await self.clear_neg_cache()
|
|
geo_map = await self.lookup_batch(unresolved, http_session, db=db)
|
|
resolved_count = sum(
|
|
1 for info in geo_map.values() if info.country_code is not None
|
|
)
|
|
|
|
log.info(
|
|
"geo_re_resolve_complete",
|
|
total=len(unresolved),
|
|
resolved=resolved_count,
|
|
)
|
|
return {"resolved": resolved_count, "total": len(unresolved)}
|
|
|
|
def init_geoip(self, mmdb_path: str | None) -> None:
|
|
"""Initialise the MaxMind GeoLite2-Country database reader.
|
|
|
|
This function is startup-only and must be called before request handling
|
|
begins. A second initialization attempt is considered a programming error
|
|
and raises ``RuntimeError``.
|
|
|
|
If *mmdb_path* is ``None``, empty, or the file does not exist the
|
|
fallback is silently disabled — ip-api.com remains the sole resolver.
|
|
|
|
Args:
|
|
mmdb_path: Absolute path to a ``GeoLite2-Country.mmdb`` file.
|
|
"""
|
|
if self._geoip_initialized:
|
|
raise RuntimeError("GeoIP reader already initialised")
|
|
if not mmdb_path:
|
|
return
|
|
from pathlib import Path # noqa: PLC0415
|
|
|
|
import geoip2.database # noqa: PLC0415
|
|
|
|
if not Path(mmdb_path).is_file():
|
|
log.warning("geoip_mmdb_not_found", path=mmdb_path)
|
|
return
|
|
self._geoip_reader = geoip2.database.Reader(mmdb_path)
|
|
self._geoip_initialized = True
|
|
log.info("geoip_mmdb_loaded", path=mmdb_path)
|
|
|
|
def _geoip_lookup(self, ip: str) -> GeoInfo | None:
|
|
"""Attempt a local MaxMind GeoLite2 lookup for *ip*.
|
|
|
|
Returns ``None`` when the reader is not initialised, the IP is not in
|
|
the database, or any other error occurs.
|
|
|
|
Args:
|
|
ip: IPv4 or IPv6 address string.
|
|
|
|
Returns:
|
|
A :class:`GeoInfo` with at least ``country_code`` populated, or
|
|
``None`` when resolution is impossible.
|
|
"""
|
|
if self._geoip_reader is None:
|
|
return None
|
|
import geoip2.errors # noqa: PLC0415
|
|
|
|
try:
|
|
response = self._geoip_reader.country(ip)
|
|
code: str | None = response.country.iso_code or None
|
|
name: str | None = response.country.name or None
|
|
if code is None:
|
|
return None
|
|
return GeoInfo(country_code=code, country_name=name, asn=None, org=None)
|
|
except geoip2.errors.AddressNotFoundError:
|
|
return None
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning("geoip_lookup_failed", ip=ip, error=str(exc))
|
|
return None
|
|
|
|
async def load_cache_from_db(self, db: aiosqlite.Connection) -> None:
|
|
"""Pre-populate the in-memory cache from the ``geo_cache`` table.
|
|
|
|
Should be called once during application startup so the service starts
|
|
with a warm cache instead of making cold API calls on the first request.
|
|
|
|
Args:
|
|
db: Open :class:`aiosqlite.Connection` to the BanGUI application
|
|
database (not the fail2ban database).
|
|
"""
|
|
count = 0
|
|
cache_entries: list[tuple[str, GeoInfo]] = []
|
|
for row in await geo_cache_repo.load_all(db):
|
|
country_code: str | None = row["country_code"]
|
|
if country_code is None:
|
|
continue
|
|
ip: str = row["ip"]
|
|
cache_entries.append(
|
|
(
|
|
ip,
|
|
GeoInfo(
|
|
country_code=country_code,
|
|
country_name=row["country_name"],
|
|
asn=row["asn"],
|
|
org=row["org"],
|
|
),
|
|
)
|
|
)
|
|
count += 1
|
|
|
|
async with self._cache_lock:
|
|
for ip, info in cache_entries:
|
|
self._cache[ip] = info
|
|
log.info("geo_cache_loaded_from_db", entries=count)
|
|
|
|
async def _store(self, ip: str, info: GeoInfo) -> None:
|
|
"""Insert *info* into the cache, flushing if over capacity.
|
|
|
|
When the IP resolved successfully (``country_code is not None``) it is
|
|
also added to the dirty set so :meth:`flush_dirty` can persist
|
|
it to the database on the next scheduled flush.
|
|
|
|
Args:
|
|
ip: The IP address key.
|
|
info: The :class:`GeoInfo` to store.
|
|
"""
|
|
async with self._cache_lock:
|
|
if len(self._cache) >= _MAX_CACHE_SIZE:
|
|
self._cache.clear()
|
|
self._dirty.clear()
|
|
log.info("geo_cache_flushed", reason="capacity")
|
|
self._cache[ip] = info
|
|
if info.country_code is not None:
|
|
self._dirty.add(ip)
|
|
|
|
async def lookup(
|
|
self,
|
|
ip: str,
|
|
http_session: aiohttp.ClientSession,
|
|
db: aiosqlite.Connection | None = None,
|
|
) -> GeoInfo | None:
|
|
"""Resolve an IP address to country, ASN, and organisation metadata.
|
|
|
|
Results are cached in-process. If the cache exceeds ``_MAX_CACHE_SIZE``
|
|
entries it is flushed before the new result is stored.
|
|
|
|
Only successful resolutions (``country_code is not None``) are written to
|
|
the persistent cache when *db* is provided. Failed lookups are **not**
|
|
cached so they are retried on the next call.
|
|
|
|
Args:
|
|
ip: IPv4 or IPv6 address string.
|
|
http_session: Shared :class:`aiohttp.ClientSession` (from
|
|
``app.state.http_session``).
|
|
db: Optional BanGUI application database. When provided, successful
|
|
lookups are persisted for cross-restart cache warming.
|
|
|
|
Returns:
|
|
A :class:`GeoInfo` instance, or ``None`` when the lookup fails
|
|
in a way that should prevent the caller from caching a bad result
|
|
(e.g. network timeout).
|
|
"""
|
|
if ip in self._cache:
|
|
return self._cache[ip]
|
|
|
|
# Negative cache: skip IPs that recently failed to avoid hammering the API.
|
|
neg_ts = self._neg_cache.get(ip)
|
|
if neg_ts is not None and (time.monotonic() - neg_ts) < _NEG_CACHE_TTL:
|
|
return GeoInfo(country_code=None, country_name=None, asn=None, org=None)
|
|
|
|
url: str = _API_URL.format(ip=ip)
|
|
api_ok = False
|
|
try:
|
|
async with http_session.get(url, timeout=aiohttp.ClientTimeout(total=_REQUEST_TIMEOUT)) as resp:
|
|
if resp.status != 200:
|
|
log.warning("geo_lookup_non_200", ip=ip, status=resp.status)
|
|
else:
|
|
data: dict[str, object] = await resp.json(content_type=None)
|
|
if data.get("status") == "success":
|
|
api_ok = True
|
|
result = self._parse_single_response(data)
|
|
await self._store(ip, result)
|
|
if result.country_code is not None and db is not None:
|
|
try:
|
|
await geo_cache_repo.upsert_entry_and_commit(
|
|
db=db,
|
|
ip=ip,
|
|
country_code=result.country_code,
|
|
country_name=result.country_name,
|
|
asn=result.asn,
|
|
org=result.org,
|
|
)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning("geo_persist_failed", ip=ip, error=str(exc))
|
|
log.debug("geo_lookup_success", ip=ip, country=result.country_code, asn=result.asn)
|
|
return result
|
|
log.debug(
|
|
"geo_lookup_failed",
|
|
ip=ip,
|
|
message=data.get("message", "unknown"),
|
|
)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning(
|
|
"geo_lookup_request_failed",
|
|
ip=ip,
|
|
exc_type=type(exc).__name__,
|
|
error=repr(exc),
|
|
)
|
|
|
|
if not api_ok:
|
|
# Try local MaxMind database as fallback.
|
|
fallback = self._geoip_lookup(ip)
|
|
if fallback is not None:
|
|
await self._store(ip, fallback)
|
|
if fallback.country_code is not None and db is not None:
|
|
try:
|
|
await geo_cache_repo.upsert_entry_and_commit(
|
|
db=db,
|
|
ip=ip,
|
|
country_code=fallback.country_code,
|
|
country_name=fallback.country_name,
|
|
asn=fallback.asn,
|
|
org=fallback.org,
|
|
)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning("geo_persist_failed", ip=ip, error=str(exc))
|
|
log.debug("geo_geoip_fallback_success", ip=ip, country=fallback.country_code)
|
|
return fallback
|
|
|
|
# Both resolvers failed — record in negative cache to avoid hammering.
|
|
async with self._cache_lock:
|
|
self._neg_cache[ip] = time.monotonic()
|
|
if db is not None:
|
|
try:
|
|
await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning("geo_persist_neg_failed", ip=ip, error=str(exc))
|
|
|
|
return GeoInfo(country_code=None, country_name=None, asn=None, org=None)
|
|
|
|
def lookup_cached_only(
|
|
self,
|
|
ips: list[str],
|
|
) -> tuple[dict[str, GeoInfo], list[str]]:
|
|
"""Return cached geo data for *ips* without making any external API calls.
|
|
|
|
Used by callers that want to return a fast response using only what is
|
|
already in memory, while deferring resolution of uncached IPs to a
|
|
background task.
|
|
|
|
Args:
|
|
ips: IP address strings to look up.
|
|
|
|
Returns:
|
|
A ``(geo_map, uncached)`` tuple where *geo_map* maps every IP that
|
|
was already in the in-memory cache to its :class:`GeoInfo`, and
|
|
*uncached* is the list of IPs that were not found in the cache.
|
|
Entries in the negative cache (recently failed) are **not** included
|
|
in *uncached* so they are not re-queued immediately.
|
|
"""
|
|
geo_map: dict[str, GeoInfo] = {}
|
|
uncached: list[str] = []
|
|
now = time.monotonic()
|
|
|
|
for ip in dict.fromkeys(ips): # deduplicate, preserve order
|
|
if ip in self._cache:
|
|
geo_map[ip] = self._cache[ip]
|
|
elif ip in self._neg_cache and (now - self._neg_cache[ip]) < _NEG_CACHE_TTL:
|
|
# Still within the cool-down window — do not re-queue.
|
|
pass
|
|
else:
|
|
uncached.append(ip)
|
|
|
|
return geo_map, uncached
|
|
|
|
async def lookup_batch(
|
|
self,
|
|
ips: list[str],
|
|
http_session: aiohttp.ClientSession,
|
|
db: aiosqlite.Connection | None = None,
|
|
) -> dict[str, GeoInfo]:
|
|
"""Resolve multiple IP addresses in bulk using ip-api.com batch endpoint.
|
|
|
|
IPs already present in the in-memory cache are returned immediately
|
|
without making an HTTP request. Uncached IPs are sent to
|
|
``http://ip-api.com/batch`` in chunks of up to :data:`_BATCH_SIZE`.
|
|
|
|
Only successful resolutions (``country_code is not None``) are written to
|
|
the persistent cache when *db* is provided. Both positive and negative
|
|
entries are written in bulk using ``executemany`` (one round-trip per
|
|
chunk) rather than one ``execute`` per IP.
|
|
|
|
Args:
|
|
ips: List of IP address strings to resolve. Duplicates are ignored.
|
|
http_session: Shared :class:`aiohttp.ClientSession`.
|
|
db: Optional BanGUI application database for persistent cache writes.
|
|
|
|
Returns:
|
|
Dict mapping ``ip → GeoInfo`` for every input IP. IPs whose
|
|
resolution failed will have a ``GeoInfo`` with all-``None`` fields.
|
|
"""
|
|
geo_result: dict[str, GeoInfo] = {}
|
|
uncached: list[str] = []
|
|
_empty = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
|
|
|
|
unique_ips = list(dict.fromkeys(ips)) # deduplicate, preserve order
|
|
now = time.monotonic()
|
|
for ip in unique_ips:
|
|
if ip in self._cache:
|
|
geo_result[ip] = self._cache[ip]
|
|
elif ip in self._neg_cache and (now - self._neg_cache[ip]) < _NEG_CACHE_TTL:
|
|
# Recently failed — skip API call, return empty result.
|
|
geo_result[ip] = _empty
|
|
else:
|
|
uncached.append(ip)
|
|
|
|
if not uncached:
|
|
return geo_result
|
|
|
|
log.info("geo_batch_lookup_start", total=len(uncached))
|
|
|
|
for batch_idx, chunk_start in enumerate(range(0, len(uncached), _BATCH_SIZE)):
|
|
chunk = uncached[chunk_start : chunk_start + _BATCH_SIZE]
|
|
|
|
# Throttle: pause between consecutive HTTP calls to stay within the
|
|
# ip-api.com free-tier rate limit (45 req/min).
|
|
if batch_idx > 0:
|
|
await asyncio.sleep(_BATCH_DELAY)
|
|
|
|
# Retry transient failures (e.g. connection-reset from rate limit).
|
|
chunk_result: dict[str, GeoInfo] | None = None
|
|
for attempt in range(_BATCH_MAX_RETRIES + 1):
|
|
chunk_result = await self._batch_api_call(chunk, http_session)
|
|
# If every IP in the chunk came back with country_code=None and the
|
|
# batch wasn't tiny, that almost certainly means the whole request
|
|
# was rejected (connection reset / 429). Retry after a back-off.
|
|
all_failed = all(
|
|
info.country_code is None for info in chunk_result.values()
|
|
)
|
|
if not all_failed or attempt >= _BATCH_MAX_RETRIES:
|
|
break
|
|
backoff = _BATCH_DELAY * (2 ** (attempt + 1))
|
|
log.warning(
|
|
"geo_batch_retry",
|
|
attempt=attempt + 1,
|
|
chunk_size=len(chunk),
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
|
|
assert chunk_result is not None # noqa: S101
|
|
|
|
# Collect bulk-write rows instead of one execute per IP.
|
|
pos_rows: list[tuple[str, str | None, str | None, str | None, str | None]] = []
|
|
neg_ips: list[str] = []
|
|
|
|
for ip, info in chunk_result.items():
|
|
if info.country_code is not None:
|
|
# Successful API resolution.
|
|
await self._store(ip, info)
|
|
geo_result[ip] = info
|
|
if db is not None:
|
|
pos_rows.append(
|
|
(ip, info.country_code, info.country_name, info.asn, info.org)
|
|
)
|
|
else:
|
|
# API failed — try local GeoIP fallback.
|
|
fallback = self._geoip_lookup(ip)
|
|
if fallback is not None:
|
|
await self._store(ip, fallback)
|
|
geo_result[ip] = fallback
|
|
if db is not None:
|
|
pos_rows.append(
|
|
(
|
|
ip,
|
|
fallback.country_code,
|
|
fallback.country_name,
|
|
fallback.asn,
|
|
fallback.org,
|
|
)
|
|
)
|
|
else:
|
|
# Both resolvers failed — record in negative cache.
|
|
async with self._cache_lock:
|
|
self._neg_cache[ip] = time.monotonic()
|
|
geo_result[ip] = _empty
|
|
if db is not None:
|
|
neg_ips.append(ip)
|
|
|
|
if db is not None and (pos_rows or neg_ips):
|
|
try:
|
|
await geo_cache_repo.bulk_upsert_entries_and_neg_entries_and_commit(
|
|
db,
|
|
pos_rows,
|
|
neg_ips,
|
|
)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning(
|
|
"geo_batch_persist_failed",
|
|
positive_count=len(pos_rows),
|
|
negative_count=len(neg_ips),
|
|
error=str(exc),
|
|
)
|
|
|
|
log.info(
|
|
"geo_batch_lookup_complete",
|
|
requested=len(uncached),
|
|
resolved=sum(1 for g in geo_result.values() if g.country_code is not None),
|
|
)
|
|
return geo_result
|
|
|
|
async def _batch_api_call(
|
|
self,
|
|
ips: list[str],
|
|
http_session: aiohttp.ClientSession,
|
|
) -> dict[str, GeoInfo]:
|
|
"""Send one batch request to the ip-api.com batch endpoint.
|
|
|
|
Args:
|
|
ips: Up to :data:`_BATCH_SIZE` IP address strings.
|
|
http_session: Shared HTTP session.
|
|
|
|
Returns:
|
|
Dict mapping ``ip → GeoInfo`` for every IP in *ips*. IPs where the
|
|
API returned a failure record or the request raised an exception get
|
|
an all-``None`` :class:`GeoInfo`.
|
|
"""
|
|
empty = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
|
|
fallback: dict[str, GeoInfo] = dict.fromkeys(ips, empty)
|
|
|
|
payload = [{"query": ip} for ip in ips]
|
|
try:
|
|
async with http_session.post(
|
|
_BATCH_API_URL,
|
|
json=payload,
|
|
timeout=aiohttp.ClientTimeout(total=_REQUEST_TIMEOUT * 2),
|
|
) as resp:
|
|
if resp.status != 200:
|
|
log.warning("geo_batch_non_200", status=resp.status, count=len(ips))
|
|
return fallback
|
|
data: list[dict[str, object]] = await resp.json(content_type=None)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning(
|
|
"geo_batch_request_failed",
|
|
count=len(ips),
|
|
exc_type=type(exc).__name__,
|
|
error=repr(exc),
|
|
)
|
|
return fallback
|
|
|
|
out: dict[str, GeoInfo] = {}
|
|
for entry in data:
|
|
ip_str: str = str(entry.get("query", ""))
|
|
if not ip_str:
|
|
continue
|
|
if entry.get("status") != "success":
|
|
out[ip_str] = empty
|
|
log.debug(
|
|
"geo_batch_entry_failed",
|
|
ip=ip_str,
|
|
message=entry.get("message", "unknown"),
|
|
)
|
|
continue
|
|
out[ip_str] = self._parse_single_response(entry)
|
|
|
|
# Fill any IPs missing from the response.
|
|
for ip in ips:
|
|
if ip not in out:
|
|
out[ip] = empty
|
|
|
|
return out
|
|
|
|
def _parse_single_response(self, data: dict[str, object]) -> GeoInfo:
|
|
"""Build a :class:`GeoInfo` from a single ip-api.com response dict.
|
|
|
|
Args:
|
|
data: A ``status == "success"`` JSON response from ip-api.com.
|
|
|
|
Returns:
|
|
Populated :class:`GeoInfo`.
|
|
"""
|
|
country_code: str | None = self._str_or_none(data.get("countryCode"))
|
|
country_name: str | None = self._str_or_none(data.get("country"))
|
|
asn_raw: str | None = self._str_or_none(data.get("as"))
|
|
org_raw: str | None = self._str_or_none(data.get("org"))
|
|
|
|
# ip-api returns "AS12345 Some Org" in both "as" and "org".
|
|
asn: str | None = asn_raw.split()[0] if asn_raw else None
|
|
|
|
return GeoInfo(
|
|
country_code=country_code,
|
|
country_name=country_name,
|
|
asn=asn,
|
|
org=org_raw,
|
|
)
|
|
|
|
def _str_or_none(self, value: object) -> str | None:
|
|
"""Return *value* as a non-empty string, or ``None``.
|
|
|
|
Args:
|
|
value: Raw JSON value which may be ``None``, empty, or a string.
|
|
|
|
Returns:
|
|
Stripped string if non-empty, else ``None``.
|
|
"""
|
|
if value is None:
|
|
return None
|
|
s = str(value).strip()
|
|
return s if s else None
|
|
|
|
async def flush_dirty(self, db: aiosqlite.Connection) -> int:
|
|
"""Persist all new in-memory geo entries to the ``geo_cache`` table.
|
|
|
|
Takes an atomic snapshot of the dirty set, clears it, then batch-inserts
|
|
all entries that are still present in the cache using a single
|
|
``executemany`` call and one ``COMMIT``. This is the only place that
|
|
writes to the persistent cache during normal operation after startup.
|
|
|
|
If the database write fails the entries are re-added to the dirty set
|
|
so they will be retried on the next flush cycle.
|
|
|
|
Args:
|
|
db: Open :class:`aiosqlite.Connection` to the BanGUI application
|
|
database.
|
|
|
|
Returns:
|
|
The number of rows successfully upserted.
|
|
"""
|
|
async with self._cache_lock:
|
|
if not self._dirty:
|
|
return 0
|
|
|
|
# Atomically snapshot and clear while holding the cache lock.
|
|
to_flush = self._dirty.copy()
|
|
self._dirty.clear()
|
|
|
|
rows = [
|
|
(ip, self._cache[ip].country_code, self._cache[ip].country_name, self._cache[ip].asn, self._cache[ip].org)
|
|
for ip in to_flush
|
|
if ip in self._cache
|
|
]
|
|
|
|
if not rows:
|
|
return 0
|
|
|
|
try:
|
|
await geo_cache_repo.bulk_upsert_entries_and_commit(db, rows)
|
|
except Exception as exc: # noqa: BLE001
|
|
log.warning("geo_flush_dirty_failed", error=str(exc))
|
|
# Re-add to dirty so they are retried on the next flush cycle.
|
|
self._dirty.update(to_flush)
|
|
return 0
|
|
|
|
log.info("geo_flush_dirty_complete", count=len(rows))
|
|
return len(rows)
|