Refactor geo re-resolve to use geo_cache repo and move data-access out of router

This commit is contained in:
2026-03-16 21:12:07 +01:00
parent 8f515893ea
commit dcd8059b27
6 changed files with 157 additions and 193 deletions

View File

@@ -13,12 +13,15 @@ from __future__ import annotations
import asyncio
import json
import time
from dataclasses import asdict
from datetime import UTC, datetime
from typing import TYPE_CHECKING, Any
import aiosqlite
import structlog
if TYPE_CHECKING:
import aiosqlite
from app.models.ban import (
BLOCKLIST_JAIL,
BUCKET_SECONDS,
@@ -31,11 +34,11 @@ from app.models.ban import (
BanTrendResponse,
DashboardBanItem,
DashboardBanListResponse,
JailBanCount,
TimeRange,
_derive_origin,
bucket_count,
)
from app.repositories import fail2ban_db_repo
from app.utils.fail2ban_client import Fail2BanClient
if TYPE_CHECKING:
@@ -244,33 +247,20 @@ async def list_bans(
origin=origin,
)
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
f2b_db.row_factory = aiosqlite.Row
async with f2b_db.execute(
"SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
(since, *origin_params),
) as cur:
count_row = await cur.fetchone()
total: int = int(count_row[0]) if count_row else 0
async with f2b_db.execute(
"SELECT jail, ip, timeofban, bancount, data "
"FROM bans "
"WHERE timeofban >= ?"
+ origin_clause
+ " ORDER BY timeofban DESC "
"LIMIT ? OFFSET ?",
(since, *origin_params, effective_page_size, offset),
) as cur:
rows = await cur.fetchall()
rows, total = await fail2ban_db_repo.get_currently_banned(
db_path=db_path,
since=since,
origin=origin,
limit=effective_page_size,
offset=offset,
)
# Batch-resolve geo data for all IPs on this page in a single API call.
# This avoids hitting the 45 req/min single-IP rate limit when the
# page contains many bans (e.g. after a large blocklist import).
geo_map: dict[str, Any] = {}
if http_session is not None and rows:
page_ips: list[str] = [str(r["ip"]) for r in rows]
page_ips: list[str] = [r.ip for r in rows]
try:
geo_map = await geo_service.lookup_batch(page_ips, http_session, db=app_db)
except Exception: # noqa: BLE001
@@ -278,11 +268,11 @@ async def list_bans(
items: list[DashboardBanItem] = []
for row in rows:
jail: str = str(row["jail"])
ip: str = str(row["ip"])
banned_at: str = _ts_to_iso(int(row["timeofban"]))
ban_count: int = int(row["bancount"])
matches, _ = _parse_data_json(row["data"])
jail: str = row.jail
ip: str = row.ip
banned_at: str = _ts_to_iso(row.timeofban)
ban_count: int = row.bancount
matches, _ = _parse_data_json(row.data)
service: str | None = matches[0] if matches else None
country_code: str | None = None
@@ -395,42 +385,31 @@ async def bans_by_country(
origin=origin,
)
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
f2b_db.row_factory = aiosqlite.Row
# Total count and companion rows reuse the same SQL query logic.
# Passing limit=0 returns only the total from the count query.
_, total = await fail2ban_db_repo.get_currently_banned(
db_path=db_path,
since=since,
origin=origin,
limit=0,
offset=0,
)
# Total count for the window.
async with f2b_db.execute(
"SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
(since, *origin_params),
) as cur:
count_row = await cur.fetchone()
total: int = int(count_row[0]) if count_row else 0
agg_rows = await fail2ban_db_repo.get_ban_event_counts(
db_path=db_path,
since=since,
origin=origin,
)
# Aggregation: unique IPs + their total event count.
# No LIMIT here — we need all unique source IPs for accurate country counts.
async with f2b_db.execute(
"SELECT ip, COUNT(*) AS event_count "
"FROM bans "
"WHERE timeofban >= ?"
+ origin_clause
+ " GROUP BY ip",
(since, *origin_params),
) as cur:
agg_rows = await cur.fetchall()
companion_rows, _ = await fail2ban_db_repo.get_currently_banned(
db_path=db_path,
since=since,
origin=origin,
limit=_MAX_COMPANION_BANS,
offset=0,
)
# Companion table: most recent raw rows for display alongside the map.
async with f2b_db.execute(
"SELECT jail, ip, timeofban, bancount, data "
"FROM bans "
"WHERE timeofban >= ?"
+ origin_clause
+ " ORDER BY timeofban DESC "
"LIMIT ?",
(since, *origin_params, _MAX_COMPANION_BANS),
) as cur:
companion_rows = await cur.fetchall()
unique_ips: list[str] = [str(r["ip"]) for r in agg_rows]
unique_ips: list[str] = [r.ip for r in agg_rows]
geo_map: dict[str, Any] = {}
if http_session is not None and unique_ips:
@@ -467,11 +446,11 @@ async def bans_by_country(
country_names: dict[str, str] = {}
for row in agg_rows:
ip: str = str(row["ip"])
ip: str = row.ip
geo = geo_map.get(ip)
cc: str | None = geo.country_code if geo else None
cn: str | None = geo.country_name if geo else None
event_count: int = int(row["event_count"])
event_count: int = row.event_count
if cc:
countries[cc] = countries.get(cc, 0) + event_count
@@ -481,26 +460,26 @@ async def bans_by_country(
# Build companion table from recent rows (geo already cached from batch step).
bans: list[DashboardBanItem] = []
for row in companion_rows:
ip = str(row["ip"])
ip = row.ip
geo = geo_map.get(ip)
cc = geo.country_code if geo else None
cn = geo.country_name if geo else None
asn: str | None = geo.asn if geo else None
org: str | None = geo.org if geo else None
matches, _ = _parse_data_json(row["data"])
matches, _ = _parse_data_json(row.data)
bans.append(
DashboardBanItem(
ip=ip,
jail=str(row["jail"]),
banned_at=_ts_to_iso(int(row["timeofban"])),
jail=row.jail,
banned_at=_ts_to_iso(row.timeofban),
service=matches[0] if matches else None,
country_code=cc,
country_name=cn,
asn=asn,
org=org,
ban_count=int(row["bancount"]),
origin=_derive_origin(str(row["jail"])),
ban_count=row.bancount,
origin=_derive_origin(row.jail),
)
)
@@ -565,32 +544,18 @@ async def ban_trend(
num_buckets=num_buckets,
)
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
f2b_db.row_factory = aiosqlite.Row
async with f2b_db.execute(
"SELECT CAST((timeofban - ?) / ? AS INTEGER) AS bucket_idx, "
"COUNT(*) AS cnt "
"FROM bans "
"WHERE timeofban >= ?"
+ origin_clause
+ " GROUP BY bucket_idx "
"ORDER BY bucket_idx",
(since, bucket_secs, since, *origin_params),
) as cur:
rows = await cur.fetchall()
# Map bucket_idx → count; ignore any out-of-range indices.
counts: dict[int, int] = {}
for row in rows:
idx: int = int(row["bucket_idx"])
if 0 <= idx < num_buckets:
counts[idx] = int(row["cnt"])
counts = await fail2ban_db_repo.get_ban_counts_by_bucket(
db_path=db_path,
since=since,
bucket_secs=bucket_secs,
num_buckets=num_buckets,
origin=origin,
)
buckets: list[BanTrendBucket] = [
BanTrendBucket(
timestamp=_ts_to_iso(since + i * bucket_secs),
count=counts.get(i, 0),
count=counts[i],
)
for i in range(num_buckets)
]
@@ -643,50 +608,37 @@ async def bans_by_jail(
origin=origin,
)
async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
f2b_db.row_factory = aiosqlite.Row
total, jails = await fail2ban_db_repo.get_bans_by_jail(
db_path=db_path,
since=since,
origin=origin,
)
async with f2b_db.execute(
"SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
(since, *origin_params),
) as cur:
count_row = await cur.fetchone()
total: int = int(count_row[0]) if count_row else 0
# Diagnostic guard: if zero results were returned, check whether the table
# has *any* rows and log a warning with min/max timeofban so operators can
# diagnose timezone or filter mismatches from logs.
if total == 0:
table_row_count, min_timeofban, max_timeofban = (
await fail2ban_db_repo.get_bans_table_summary(db_path)
)
if table_row_count > 0:
log.warning(
"ban_service_bans_by_jail_empty_despite_data",
table_row_count=table_row_count,
min_timeofban=min_timeofban,
max_timeofban=max_timeofban,
since=since,
range=range_,
)
# Diagnostic guard: if zero results were returned, check whether the
# table has *any* rows and log a warning with min/max timeofban so
# operators can diagnose timezone or filter mismatches from logs.
if total == 0:
async with f2b_db.execute(
"SELECT COUNT(*), MIN(timeofban), MAX(timeofban) FROM bans"
) as cur:
diag_row = await cur.fetchone()
if diag_row and diag_row[0] > 0:
log.warning(
"ban_service_bans_by_jail_empty_despite_data",
table_row_count=diag_row[0],
min_timeofban=diag_row[1],
max_timeofban=diag_row[2],
since=since,
range=range_,
)
async with f2b_db.execute(
"SELECT jail, COUNT(*) AS cnt "
"FROM bans "
"WHERE timeofban >= ?"
+ origin_clause
+ " GROUP BY jail ORDER BY cnt DESC",
(since, *origin_params),
) as cur:
rows = await cur.fetchall()
jails: list[JailBanCount] = [
JailBanCount(jail=str(row["jail"]), count=int(row["cnt"])) for row in rows
]
log.debug(
"ban_service_bans_by_jail_result",
total=total,
jail_count=len(jails),
)
return BansByJailResponse(jails=jails, total=total)
# Pydantic strict validation requires either dicts or model instances.
# Our repository returns dataclasses for simplicity, so convert them here.
jail_dicts: list[dict[str, object]] = [asdict(j) for j in jails]
return BansByJailResponse(jails=jail_dicts, total=total)