Files
BanGUI/backend/app/services/geo_service.py
Lukas 6877637507 Fix startup crash caused by top-level geoip2 import
geoip2 is an optional dependency used only when a MaxMind mmdb path is
configured. Importing it unconditionally at module level caused the server
to crash on startup with ModuleNotFoundError when the package was absent
from the environment.

Move the imports under TYPE_CHECKING (for static analysis) and add lazy
local imports inside init_geoip() and _geoip_lookup() where geoip2 is
actually needed at runtime. The server now starts normally without a
MaxMind database, and geoip2 is loaded on demand if the feature is used.
2026-03-10 15:48:00 +01:00

572 lines
20 KiB
Python

"""Geo service.
Resolves IP addresses to their country, ASN, and organisation using the
`ip-api.com <http://ip-api.com>`_ JSON API. Results are cached in two tiers:
1. **In-memory dict** — fastest; survives for the life of the process.
2. **Persistent SQLite table** (``geo_cache``) — survives restarts; loaded
into the in-memory dict during application startup via
:func:`load_cache_from_db`.
Only *successful* lookups (those returning a non-``None`` ``country_code``)
are written to the persistent cache. Failed lookups are **not** cached so
they will be retried on the next request.
For bulk operations the batch endpoint ``http://ip-api.com/batch`` is used
(up to 100 IPs per HTTP call) which is far more efficient than one-at-a-time
requests. Use :func:`lookup_batch` from the ban or blocklist services.
Usage::
import aiohttp
import aiosqlite
from app.services import geo_service
# warm the cache from the persistent store at startup
async with aiosqlite.connect("bangui.db") as db:
await geo_service.load_cache_from_db(db)
async with aiohttp.ClientSession() as session:
# single lookup
info = await geo_service.lookup("1.2.3.4", session)
if info:
print(info.country_code) # "DE"
# bulk lookup (more efficient for large sets)
geo_map = await geo_service.lookup_batch(["1.2.3.4", "5.6.7.8"], session)
"""
from __future__ import annotations
import time
from dataclasses import dataclass
from typing import TYPE_CHECKING
import structlog
if TYPE_CHECKING:
import aiohttp
import aiosqlite
import geoip2.database
import geoip2.errors
log: structlog.stdlib.BoundLogger = structlog.get_logger()
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
#: ip-api.com single-IP lookup endpoint (HTTP only on the free tier).
_API_URL: str = (
"http://ip-api.com/json/{ip}?fields=status,message,country,countryCode,org,as"
)
#: ip-api.com batch endpoint — accepts up to 100 IPs per POST.
_BATCH_API_URL: str = (
"http://ip-api.com/batch?fields=status,message,country,countryCode,org,as,query"
)
#: Maximum IPs per batch request (ip-api.com hard limit is 100).
_BATCH_SIZE: int = 100
#: Maximum number of entries kept in the in-process cache before it is
#: flushed completely. A simple eviction strategy — the cache is cheap to
#: rebuild from the persistent store.
_MAX_CACHE_SIZE: int = 50_000
#: Timeout for outgoing geo API requests in seconds.
_REQUEST_TIMEOUT: float = 5.0
#: How many seconds a failed lookup result is suppressed before the IP is
#: eligible for a new API attempt. Default: 5 minutes.
_NEG_CACHE_TTL: float = 300.0
# ---------------------------------------------------------------------------
# Domain model
# ---------------------------------------------------------------------------
@dataclass
class GeoInfo:
"""Geographical and network metadata for a single IP address.
All fields default to ``None`` when the information is unavailable or
the lookup fails gracefully.
"""
country_code: str | None
"""ISO 3166-1 alpha-2 country code, e.g. ``"DE"``."""
country_name: str | None
"""Human-readable country name, e.g. ``"Germany"``."""
asn: str | None
"""Autonomous System Number string, e.g. ``"AS3320"``."""
org: str | None
"""Organisation name associated with the IP, e.g. ``"Deutsche Telekom"``."""
# ---------------------------------------------------------------------------
# Internal cache
# ---------------------------------------------------------------------------
#: Module-level in-memory cache: ``ip → GeoInfo`` (positive results only).
_cache: dict[str, GeoInfo] = {}
#: Negative cache: ``ip → epoch timestamp`` of last failed lookup attempt.
#: Entries within :data:`_NEG_CACHE_TTL` seconds are not re-queried.
_neg_cache: dict[str, float] = {}
#: Optional MaxMind GeoLite2 reader initialised by :func:`init_geoip`.
_geoip_reader: geoip2.database.Reader | None = None
def clear_cache() -> None:
"""Flush both the positive and negative lookup caches.
Useful in tests and when the operator suspects stale data.
"""
_cache.clear()
_neg_cache.clear()
def clear_neg_cache() -> None:
"""Flush only the negative (failed-lookups) cache.
Useful when triggering a manual re-resolve so that previously failed
IPs are immediately eligible for a new API attempt.
"""
_neg_cache.clear()
def init_geoip(mmdb_path: str | None) -> None:
"""Initialise the MaxMind GeoLite2-Country database reader.
If *mmdb_path* is ``None``, empty, or the file does not exist the
fallback is silently disabled — ip-api.com remains the sole resolver.
Args:
mmdb_path: Absolute path to a ``GeoLite2-Country.mmdb`` file.
"""
global _geoip_reader # noqa: PLW0603
if not mmdb_path:
return
from pathlib import Path # noqa: PLC0415
import geoip2.database # noqa: PLC0415
if not Path(mmdb_path).is_file():
log.warning("geoip_mmdb_not_found", path=mmdb_path)
return
_geoip_reader = geoip2.database.Reader(mmdb_path)
log.info("geoip_mmdb_loaded", path=mmdb_path)
def _geoip_lookup(ip: str) -> GeoInfo | None:
"""Attempt a local MaxMind GeoLite2 lookup for *ip*.
Returns ``None`` when the reader is not initialised, the IP is not in
the database, or any other error occurs.
Args:
ip: IPv4 or IPv6 address string.
Returns:
A :class:`GeoInfo` with at least ``country_code`` populated, or
``None`` when resolution is impossible.
"""
if _geoip_reader is None:
return None
import geoip2.errors # noqa: PLC0415
try:
response = _geoip_reader.country(ip)
code: str | None = response.country.iso_code or None
name: str | None = response.country.name or None
if code is None:
return None
return GeoInfo(country_code=code, country_name=name, asn=None, org=None)
except geoip2.errors.AddressNotFoundError:
return None
except Exception as exc: # noqa: BLE001
log.warning("geoip_lookup_failed", ip=ip, error=str(exc))
return None
# ---------------------------------------------------------------------------
# Persistent cache I/O
# ---------------------------------------------------------------------------
async def load_cache_from_db(db: aiosqlite.Connection) -> None:
"""Pre-populate the in-memory cache from the ``geo_cache`` table.
Should be called once during application startup so the service starts
with a warm cache instead of making cold API calls on the first request.
Args:
db: Open :class:`aiosqlite.Connection` to the BanGUI application
database (not the fail2ban database).
"""
count = 0
async with db.execute(
"SELECT ip, country_code, country_name, asn, org FROM geo_cache"
) as cur:
async for row in cur:
ip: str = str(row[0])
country_code: str | None = row[1]
if country_code is None:
continue
_cache[ip] = GeoInfo(
country_code=country_code,
country_name=row[2],
asn=row[3],
org=row[4],
)
count += 1
log.info("geo_cache_loaded_from_db", entries=count)
async def _persist_entry(
db: aiosqlite.Connection,
ip: str,
info: GeoInfo,
) -> None:
"""Upsert a resolved :class:`GeoInfo` into the ``geo_cache`` table.
Only called when ``info.country_code`` is not ``None`` so the persistent
store never contains empty placeholder rows.
Args:
db: BanGUI application database connection.
ip: IP address string.
info: Resolved geo data to persist.
"""
await db.execute(
"""
INSERT INTO geo_cache (ip, country_code, country_name, asn, org)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(ip) DO UPDATE SET
country_code = excluded.country_code,
country_name = excluded.country_name,
asn = excluded.asn,
org = excluded.org,
cached_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
""",
(ip, info.country_code, info.country_name, info.asn, info.org),
)
await db.commit()
async def _persist_neg_entry(db: aiosqlite.Connection, ip: str) -> None:
"""Record a failed lookup attempt in ``geo_cache`` with all-NULL fields.
Uses ``INSERT OR IGNORE`` so that an existing *positive* entry (one that
has a ``country_code``) is never overwritten by a later failure.
Args:
db: BanGUI application database connection.
ip: IP address string whose resolution failed.
"""
await db.execute(
"INSERT OR IGNORE INTO geo_cache (ip) VALUES (?)",
(ip,),
)
await db.commit()
# ---------------------------------------------------------------------------
# Public API — single lookup
# ---------------------------------------------------------------------------
async def lookup(
ip: str,
http_session: aiohttp.ClientSession,
db: aiosqlite.Connection | None = None,
) -> GeoInfo | None:
"""Resolve an IP address to country, ASN, and organisation metadata.
Results are cached in-process. If the cache exceeds ``_MAX_CACHE_SIZE``
entries it is flushed before the new result is stored.
Only successful resolutions (``country_code is not None``) are written to
the persistent cache when *db* is provided. Failed lookups are **not**
cached so they are retried on the next call.
Args:
ip: IPv4 or IPv6 address string.
http_session: Shared :class:`aiohttp.ClientSession` (from
``app.state.http_session``).
db: Optional BanGUI application database. When provided, successful
lookups are persisted for cross-restart cache warming.
Returns:
A :class:`GeoInfo` instance, or ``None`` when the lookup fails
in a way that should prevent the caller from caching a bad result
(e.g. network timeout).
"""
if ip in _cache:
return _cache[ip]
# Negative cache: skip IPs that recently failed to avoid hammering the API.
neg_ts = _neg_cache.get(ip)
if neg_ts is not None and (time.monotonic() - neg_ts) < _NEG_CACHE_TTL:
return GeoInfo(country_code=None, country_name=None, asn=None, org=None)
url: str = _API_URL.format(ip=ip)
api_ok = False
try:
async with http_session.get(url, timeout=_REQUEST_TIMEOUT) as resp: # type: ignore[arg-type]
if resp.status != 200:
log.warning("geo_lookup_non_200", ip=ip, status=resp.status)
else:
data: dict[str, object] = await resp.json(content_type=None)
if data.get("status") == "success":
api_ok = True
result = _parse_single_response(data)
_store(ip, result)
if result.country_code is not None and db is not None:
try:
await _persist_entry(db, ip, result)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_lookup_success", ip=ip, country=result.country_code, asn=result.asn)
return result
log.debug(
"geo_lookup_failed",
ip=ip,
message=data.get("message", "unknown"),
)
except Exception as exc: # noqa: BLE001
log.warning("geo_lookup_request_failed", ip=ip, error=str(exc))
if not api_ok:
# Try local MaxMind database as fallback.
fallback = _geoip_lookup(ip)
if fallback is not None:
_store(ip, fallback)
if fallback.country_code is not None and db is not None:
try:
await _persist_entry(db, ip, fallback)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
log.debug("geo_geoip_fallback_success", ip=ip, country=fallback.country_code)
return fallback
# Both resolvers failed — record in negative cache to avoid hammering.
_neg_cache[ip] = time.monotonic()
if db is not None:
try:
await _persist_neg_entry(db, ip)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_neg_failed", ip=ip, error=str(exc))
return GeoInfo(country_code=None, country_name=None, asn=None, org=None)
# ---------------------------------------------------------------------------
# Public API — batch lookup
# ---------------------------------------------------------------------------
async def lookup_batch(
ips: list[str],
http_session: aiohttp.ClientSession,
db: aiosqlite.Connection | None = None,
) -> dict[str, GeoInfo]:
"""Resolve multiple IP addresses in bulk using ip-api.com batch endpoint.
IPs already present in the in-memory cache are returned immediately
without making an HTTP request. Uncached IPs are sent to
``http://ip-api.com/batch`` in chunks of up to :data:`_BATCH_SIZE`.
Only successful resolutions (``country_code is not None``) are written to
the persistent cache when *db* is provided.
Args:
ips: List of IP address strings to resolve. Duplicates are ignored.
http_session: Shared :class:`aiohttp.ClientSession`.
db: Optional BanGUI application database for persistent cache writes.
Returns:
Dict mapping ``ip → GeoInfo`` for every input IP. IPs whose
resolution failed will have a ``GeoInfo`` with all-``None`` fields.
"""
geo_result: dict[str, GeoInfo] = {}
uncached: list[str] = []
_empty = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
unique_ips = list(dict.fromkeys(ips)) # deduplicate, preserve order
now = time.monotonic()
for ip in unique_ips:
if ip in _cache:
geo_result[ip] = _cache[ip]
elif ip in _neg_cache and (now - _neg_cache[ip]) < _NEG_CACHE_TTL:
# Recently failed — skip API call, return empty result.
geo_result[ip] = _empty
else:
uncached.append(ip)
if not uncached:
return geo_result
log.info("geo_batch_lookup_start", total=len(uncached))
for chunk_start in range(0, len(uncached), _BATCH_SIZE):
chunk = uncached[chunk_start : chunk_start + _BATCH_SIZE]
chunk_result = await _batch_api_call(chunk, http_session)
for ip, info in chunk_result.items():
if info.country_code is not None:
# Successful API resolution.
_store(ip, info)
geo_result[ip] = info
if db is not None:
try:
await _persist_entry(db, ip, info)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
else:
# API failed — try local GeoIP fallback.
fallback = _geoip_lookup(ip)
if fallback is not None:
_store(ip, fallback)
geo_result[ip] = fallback
if db is not None:
try:
await _persist_entry(db, ip, fallback)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_failed", ip=ip, error=str(exc))
else:
# Both resolvers failed — record in negative cache.
_neg_cache[ip] = time.monotonic()
geo_result[ip] = _empty
if db is not None:
try:
await _persist_neg_entry(db, ip)
except Exception as exc: # noqa: BLE001
log.warning("geo_persist_neg_failed", ip=ip, error=str(exc))
log.info(
"geo_batch_lookup_complete",
requested=len(uncached),
resolved=sum(1 for g in geo_result.values() if g.country_code is not None),
)
return geo_result
async def _batch_api_call(
ips: list[str],
http_session: aiohttp.ClientSession,
) -> dict[str, GeoInfo]:
"""Send one batch request to the ip-api.com batch endpoint.
Args:
ips: Up to :data:`_BATCH_SIZE` IP address strings.
http_session: Shared HTTP session.
Returns:
Dict mapping ``ip → GeoInfo`` for every IP in *ips*. IPs where the
API returned a failure record or the request raised an exception get
an all-``None`` :class:`GeoInfo`.
"""
empty = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
fallback: dict[str, GeoInfo] = dict.fromkeys(ips, empty)
payload = [{"query": ip} for ip in ips]
try:
async with http_session.post(
_BATCH_API_URL,
json=payload,
timeout=_REQUEST_TIMEOUT * 2, # type: ignore[arg-type]
) as resp:
if resp.status != 200:
log.warning("geo_batch_non_200", status=resp.status, count=len(ips))
return fallback
data: list[dict[str, object]] = await resp.json(content_type=None)
except Exception as exc: # noqa: BLE001
log.warning("geo_batch_request_failed", count=len(ips), error=str(exc))
return fallback
out: dict[str, GeoInfo] = {}
for entry in data:
ip_str: str = str(entry.get("query", ""))
if not ip_str:
continue
if entry.get("status") != "success":
out[ip_str] = empty
log.debug(
"geo_batch_entry_failed",
ip=ip_str,
message=entry.get("message", "unknown"),
)
continue
out[ip_str] = _parse_single_response(entry)
# Fill any IPs missing from the response.
for ip in ips:
if ip not in out:
out[ip] = empty
return out
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _parse_single_response(data: dict[str, object]) -> GeoInfo:
"""Build a :class:`GeoInfo` from a single ip-api.com response dict.
Args:
data: A ``status == "success"`` JSON response from ip-api.com.
Returns:
Populated :class:`GeoInfo`.
"""
country_code: str | None = _str_or_none(data.get("countryCode"))
country_name: str | None = _str_or_none(data.get("country"))
asn_raw: str | None = _str_or_none(data.get("as"))
org_raw: str | None = _str_or_none(data.get("org"))
# ip-api returns "AS12345 Some Org" in both "as" and "org".
asn: str | None = asn_raw.split()[0] if asn_raw else None
return GeoInfo(
country_code=country_code,
country_name=country_name,
asn=asn,
org=org_raw,
)
def _str_or_none(value: object) -> str | None:
"""Return *value* as a non-empty string, or ``None``.
Args:
value: Raw JSON value which may be ``None``, empty, or a string.
Returns:
Stripped string if non-empty, else ``None``.
"""
if value is None:
return None
s = str(value).strip()
return s if s else None
def _store(ip: str, info: GeoInfo) -> None:
"""Insert *info* into the module-level cache, flushing if over capacity.
Args:
ip: The IP address key.
info: The :class:`GeoInfo` to store.
"""
if len(_cache) >= _MAX_CACHE_SIZE:
_cache.clear()
log.info("geo_cache_flushed", reason="capacity")
_cache[ip] = info