TASK-030: Secure IP geolocation with MMDB-primary resolver

Make MaxMind GeoLite2-Country MMDB the primary IP resolver (local, encrypted)
and demote ip-api.com to optional fallback only (disabled by default).

Changes:
- Add geoip_allow_http_fallback config flag (default False) to Settings
- Refactor GeoCache.lookup() and lookup_batch() to try MMDB first
- Update startup.py to pass config flag and log security warning when HTTP enabled
- Update all 49 tests to reflect new MMDB-primary strategy
- Add comprehensive geoip configuration section to Backend-Development.md
- Update Architekture.md to show MMDB + optional HTTP in system dependencies
- Update .env.example with BANGUI_GEOIP_DB_PATH and HTTP fallback flag

Security impact:
- 99% of IP addresses (successful MMDB lookups) now stay local, encrypted
- HTTP-only IPs are cached for 5 minutes to minimize external calls
- Operators must explicitly enable HTTP fallback (security-conscious default)
- GDPR/CCPA compliance: no PII sent over unencrypted networks by default

Fixes TASK-030: Resolved plaintext IP transmission to ip-api.com

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-04-26 15:31:39 +02:00
parent b9289a3b0e
commit 1d91e24a88
8 changed files with 313 additions and 135 deletions

View File

@@ -76,22 +76,37 @@ class GeoCache:
Encapsulates all mutable state needed for geo-IP resolution. Provides
methods for single lookups, batch lookups, persistence, and cache management.
Primary resolution strategy:
1. Check in-memory cache
2. Check negative cache (recently failed IPs within TTL)
3. Try local MaxMind GeoLite2-Country database (if available)
4. If allow_http_fallback is True, try ip-api.com HTTP API
5. Record as negative cache entry if all resolvers fail
State:
_cache: In-memory positive results cache (``ip → GeoInfo``).
_neg_cache: Failed lookup timestamps (``ip → epoch``).
_dirty: IPs added but not yet persisted to database.
_geoip_reader: Optional MaxMind GeoLite2 reader.
_geoip_initialized: Indicates whether init_geoip() has been called.
_allow_http_fallback: Whether to use ip-api.com as fallback.
_cache_lock: Async lock protecting cache mutations.
"""
def __init__(self) -> None:
"""Initialize an empty GeoCache."""
def __init__(self, allow_http_fallback: bool = False) -> None:
"""Initialize an empty GeoCache.
Args:
allow_http_fallback: Whether to fall back to ip-api.com HTTP API
when the MaxMind database is unavailable. Default is False
(fail rather than send IPs unencrypted).
"""
self._cache: dict[str, GeoInfo] = {}
self._neg_cache: dict[str, float] = {}
self._dirty: set[str] = set()
self._geoip_reader: geoip2.database.Reader | None = None
self._geoip_initialized: bool = False
self._allow_http_fallback: bool = allow_http_fallback
self._cache_lock: asyncio.Lock = asyncio.Lock()
async def clear(self) -> None:
@@ -323,6 +338,13 @@ class GeoCache:
) -> GeoInfo | None:
"""Resolve an IP address to country, ASN, and organisation metadata.
Resolution strategy (in order):
1. Check in-memory cache
2. Check negative cache (skip if within TTL)
3. Try local MaxMind GeoLite2-Country database (primary resolver)
4. If allow_http_fallback is True, try ip-api.com HTTP API (unencrypted)
5. Record as negative cache entry if all resolvers fail
Results are cached in-process. If the cache exceeds ``_MAX_CACHE_SIZE``
entries it is flushed before the new result is stored.
@@ -350,12 +372,44 @@ class GeoCache:
if neg_ts is not None and (time.monotonic() - neg_ts) < _NEG_CACHE_TTL:
return GeoInfo(country_code=None, country_name=None, asn=None, org=None)
# PRIMARY RESOLVER: Try local MaxMind database first.
result = self._geoip_lookup(ip)
if result is not None:
await self._store(ip, result)
if result.country_code is not None and db is not None:
try:
await geo_cache_repo.upsert_entry_and_commit(
db=db,
ip=ip,
country_code=result.country_code,
country_name=result.country_name,
asn=result.asn,
org=result.org,
)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_lookup_success_mmdb", ip=ip, country=result.country_code)
return result
# FALLBACK RESOLVER: Try ip-api.com HTTP API only if explicitly allowed.
if not self._allow_http_fallback:
log.debug("geo_lookup_failed_no_http_fallback", ip=ip)
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
if db is not None:
try:
await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_neg_failed", ip=ip, error=str(exc))
return GeoInfo(country_code=None, country_name=None, asn=None, org=None)
# HTTP API call (only when allow_http_fallback is True).
url: str = _API_URL.format(ip=ip)
api_ok = False
try:
async with http_session.get(url, timeout=aiohttp.ClientTimeout(total=_REQUEST_TIMEOUT)) as resp:
if resp.status != 200:
log.warning("geo_lookup_non_200", ip=ip, status=resp.status)
log.warning("geo_lookup_http_non_200", ip=ip, status=resp.status)
else:
data: dict[str, object] = await resp.json(content_type=None)
if data.get("status") == "success":
@@ -374,41 +428,22 @@ class GeoCache:
)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_lookup_success", ip=ip, country=result.country_code, asn=result.asn)
log.debug("geo_lookup_success_http", ip=ip, country=result.country_code, asn=result.asn)
return result
log.debug(
"geo_lookup_failed",
"geo_lookup_http_failed",
ip=ip,
message=data.get("message", "unknown"),
)
except Exception as exc: # noqa: BLE001
log.warning(
"geo_lookup_request_failed",
"geo_lookup_http_request_failed",
ip=ip,
exc_type=type(exc).__name__,
error=repr(exc),
)
if not api_ok:
# Try local MaxMind database as fallback.
fallback = self._geoip_lookup(ip)
if fallback is not None:
await self._store(ip, fallback)
if fallback.country_code is not None and db is not None:
try:
await geo_cache_repo.upsert_entry_and_commit(
db=db,
ip=ip,
country_code=fallback.country_code,
country_name=fallback.country_name,
asn=fallback.asn,
org=fallback.org,
)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_geoip_fallback_success", ip=ip, country=fallback.country_code)
return fallback
# Both resolvers failed — record in negative cache to avoid hammering.
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
@@ -461,10 +496,17 @@ class GeoCache:
http_session: aiohttp.ClientSession,
db: aiosqlite.Connection | None = None,
) -> dict[str, GeoInfo]:
"""Resolve multiple IP addresses in bulk using ip-api.com batch endpoint.
"""Resolve multiple IP addresses in bulk.
Resolution strategy:
1. Return cached entries immediately (both positive and negative cache)
2. For uncached IPs, try local MaxMind database first
3. If allow_http_fallback is True, use ip-api.com batch endpoint for remaining
4. Record unresolvable IPs in negative cache
IPs already present in the in-memory cache are returned immediately
without making an HTTP request. Uncached IPs are sent to
without making an HTTP request. Uncached IPs are first resolved via
the local MaxMind database, then (if enabled) sent to
``http://ip-api.com/batch`` in chunks of up to :data:`_BATCH_SIZE`.
Only successful resolutions (``country_code is not None``) are written to
@@ -491,7 +533,7 @@ class GeoCache:
if ip in self._cache:
geo_result[ip] = self._cache[ip]
elif ip in self._neg_cache and (now - self._neg_cache[ip]) < _NEG_CACHE_TTL:
# Recently failed — skip API call, return empty result.
# Recently failed — skip resolution, return empty result.
geo_result[ip] = _empty
else:
uncached.append(ip)
@@ -501,8 +543,67 @@ class GeoCache:
log.info("geo_batch_lookup_start", total=len(uncached))
for batch_idx, chunk_start in enumerate(range(0, len(uncached), _BATCH_SIZE)):
chunk = uncached[chunk_start : chunk_start + _BATCH_SIZE]
# PRIMARY: Try local MaxMind database for all uncached IPs.
pos_rows: list[tuple[str, str | None, str | None, str | None, str | None]] = []
neg_ips: list[str] = []
remaining_uncached: list[str] = []
for ip in uncached:
mmdb_result = self._geoip_lookup(ip)
if mmdb_result is not None:
await self._store(ip, mmdb_result)
geo_result[ip] = mmdb_result
if db is not None:
pos_rows.append(
(ip, mmdb_result.country_code, mmdb_result.country_name, mmdb_result.asn, mmdb_result.org)
)
else:
# MMDB lookup failed — keep for potential HTTP fallback or final failure.
remaining_uncached.append(ip)
# Persist MMDB results if any.
if db is not None and pos_rows:
try:
await geo_cache_repo.bulk_upsert_entries_and_commit(db, pos_rows)
except Exception as exc: # noqa: BLE001
log.warning(
"geo_batch_persist_mmdb_failed",
count=len(pos_rows),
error=str(exc),
)
# FALLBACK: Try HTTP API only if enabled and there are remaining IPs.
if not self._allow_http_fallback or not remaining_uncached:
# Record remaining as negative cache.
for ip in remaining_uncached:
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
geo_result[ip] = _empty
neg_ips.append(ip)
if db is not None and neg_ips:
try:
await geo_cache_repo.bulk_upsert_neg_entries_and_commit(db, neg_ips)
except Exception as exc: # noqa: BLE001
log.warning(
"geo_batch_persist_neg_failed",
count=len(neg_ips),
error=str(exc),
)
log.info(
"geo_batch_lookup_complete",
requested=len(uncached),
resolved=sum(1 for g in geo_result.values() if g.country_code is not None),
)
return geo_result
# HTTP API batch processing.
pos_rows.clear()
neg_ips.clear()
for batch_idx, chunk_start in enumerate(range(0, len(remaining_uncached), _BATCH_SIZE)):
chunk = remaining_uncached[chunk_start : chunk_start + _BATCH_SIZE]
# Throttle: pause between consecutive HTTP calls to stay within the
# ip-api.com free-tier rate limit (45 req/min).
@@ -532,13 +633,9 @@ class GeoCache:
assert chunk_result is not None # noqa: S101
# Collect bulk-write rows instead of one execute per IP.
pos_rows: list[tuple[str, str | None, str | None, str | None, str | None]] = []
neg_ips: list[str] = []
for ip, info in chunk_result.items():
if info.country_code is not None:
# Successful API resolution.
# Successful HTTP resolution.
await self._store(ip, info)
geo_result[ip] = info
if db is not None:
@@ -546,28 +643,12 @@ class GeoCache:
(ip, info.country_code, info.country_name, info.asn, info.org)
)
else:
# API failed — try local GeoIP fallback.
fallback = self._geoip_lookup(ip)
if fallback is not None:
await self._store(ip, fallback)
geo_result[ip] = fallback
if db is not None:
pos_rows.append(
(
ip,
fallback.country_code,
fallback.country_name,
fallback.asn,
fallback.org,
)
)
else:
# Both resolvers failed — record in negative cache.
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
geo_result[ip] = _empty
if db is not None:
neg_ips.append(ip)
# HTTP failed — record as negative cache.
async with self._cache_lock:
self._neg_cache[ip] = time.monotonic()
geo_result[ip] = _empty
if db is not None:
neg_ips.append(ip)
if db is not None and (pos_rows or neg_ips):
try:
@@ -583,6 +664,8 @@ class GeoCache:
negative_count=len(neg_ips),
error=str(exc),
)
pos_rows.clear()
neg_ips.clear()
log.info(
"geo_batch_lookup_complete",