Refactor geo re-resolve to use geo_cache repo and move data-access out of router

2026-03-16 21:12:07 +01:00
parent 8f515893ea
commit dcd8059b27
6 changed files with 157 additions and 193 deletions
--- a/backend/app/services/ban_service.py
+++ b/backend/app/services/ban_service.py
@@ -13,12 +13,15 @@ from __future__ import annotations
 import asyncio
 import json
 import time
+from dataclasses import asdict
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING, Any

-import aiosqlite
 import structlog

+if TYPE_CHECKING:
+    import aiosqlite
+
 from app.models.ban import (
    BLOCKLIST_JAIL,
    BUCKET_SECONDS,
@@ -31,11 +34,11 @@ from app.models.ban import (
    BanTrendResponse,
    DashboardBanItem,
    DashboardBanListResponse,
-    JailBanCount,
    TimeRange,
    _derive_origin,
    bucket_count,
 )
+from app.repositories import fail2ban_db_repo
 from app.utils.fail2ban_client import Fail2BanClient

 if TYPE_CHECKING:
@@ -244,33 +247,20 @@ async def list_bans(
        origin=origin,
    )

-    async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
-        f2b_db.row_factory = aiosqlite.Row
-
-        async with f2b_db.execute(
-            "SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
-            (since, *origin_params),
-        ) as cur:
-            count_row = await cur.fetchone()
-            total: int = int(count_row[0]) if count_row else 0
-
-        async with f2b_db.execute(
-            "SELECT jail, ip, timeofban, bancount, data "
-            "FROM bans "
-            "WHERE timeofban >= ?"
-            + origin_clause
-            + " ORDER BY timeofban DESC "
-            "LIMIT ? OFFSET ?",
-            (since, *origin_params, effective_page_size, offset),
-        ) as cur:
-            rows = await cur.fetchall()
+    rows, total = await fail2ban_db_repo.get_currently_banned(
+        db_path=db_path,
+        since=since,
+        origin=origin,
+        limit=effective_page_size,
+        offset=offset,
+    )

    # Batch-resolve geo data for all IPs on this page in a single API call.
    # This avoids hitting the 45 req/min single-IP rate limit when the
    # page contains many bans (e.g. after a large blocklist import).
    geo_map: dict[str, Any] = {}
    if http_session is not None and rows:
-        page_ips: list[str] = [str(r["ip"]) for r in rows]
+        page_ips: list[str] = [r.ip for r in rows]
        try:
            geo_map = await geo_service.lookup_batch(page_ips, http_session, db=app_db)
        except Exception:  # noqa: BLE001
@@ -278,11 +268,11 @@ async def list_bans(

    items: list[DashboardBanItem] = []
    for row in rows:
-        jail: str = str(row["jail"])
-        ip: str = str(row["ip"])
-        banned_at: str = _ts_to_iso(int(row["timeofban"]))
-        ban_count: int = int(row["bancount"])
-        matches, _ = _parse_data_json(row["data"])
+        jail: str = row.jail
+        ip: str = row.ip
+        banned_at: str = _ts_to_iso(row.timeofban)
+        ban_count: int = row.bancount
+        matches, _ = _parse_data_json(row.data)
        service: str | None = matches[0] if matches else None

        country_code: str | None = None
@@ -395,42 +385,31 @@ async def bans_by_country(
        origin=origin,
    )

-    async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
-        f2b_db.row_factory = aiosqlite.Row
+    # Total count and companion rows reuse the same SQL query logic.
+    # Passing limit=0 returns only the total from the count query.
+    _, total = await fail2ban_db_repo.get_currently_banned(
+        db_path=db_path,
+        since=since,
+        origin=origin,
+        limit=0,
+        offset=0,
+    )

-        # Total count for the window.
-        async with f2b_db.execute(
-            "SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
-            (since, *origin_params),
-        ) as cur:
-            count_row = await cur.fetchone()
-            total: int = int(count_row[0]) if count_row else 0
+    agg_rows = await fail2ban_db_repo.get_ban_event_counts(
+        db_path=db_path,
+        since=since,
+        origin=origin,
+    )

-        # Aggregation: unique IPs + their total event count.
-        # No LIMIT here — we need all unique source IPs for accurate country counts.
-        async with f2b_db.execute(
-            "SELECT ip, COUNT(*) AS event_count "
-            "FROM bans "
-            "WHERE timeofban >= ?"
-            + origin_clause
-            + " GROUP BY ip",
-            (since, *origin_params),
-        ) as cur:
-            agg_rows = await cur.fetchall()
+    companion_rows, _ = await fail2ban_db_repo.get_currently_banned(
+        db_path=db_path,
+        since=since,
+        origin=origin,
+        limit=_MAX_COMPANION_BANS,
+        offset=0,
+    )

-        # Companion table: most recent raw rows for display alongside the map.
-        async with f2b_db.execute(
-            "SELECT jail, ip, timeofban, bancount, data "
-            "FROM bans "
-            "WHERE timeofban >= ?"
-            + origin_clause
-            + " ORDER BY timeofban DESC "
-            "LIMIT ?",
-            (since, *origin_params, _MAX_COMPANION_BANS),
-        ) as cur:
-            companion_rows = await cur.fetchall()
-
-    unique_ips: list[str] = [str(r["ip"]) for r in agg_rows]
+    unique_ips: list[str] = [r.ip for r in agg_rows]
    geo_map: dict[str, Any] = {}

    if http_session is not None and unique_ips:
@@ -467,11 +446,11 @@ async def bans_by_country(
    country_names: dict[str, str] = {}

    for row in agg_rows:
-        ip: str = str(row["ip"])
+        ip: str = row.ip
        geo = geo_map.get(ip)
        cc: str | None = geo.country_code if geo else None
        cn: str | None = geo.country_name if geo else None
-        event_count: int = int(row["event_count"])
+        event_count: int = row.event_count

        if cc:
            countries[cc] = countries.get(cc, 0) + event_count
@@ -481,26 +460,26 @@ async def bans_by_country(
    # Build companion table from recent rows (geo already cached from batch step).
    bans: list[DashboardBanItem] = []
    for row in companion_rows:
-        ip = str(row["ip"])
+        ip = row.ip
        geo = geo_map.get(ip)
        cc = geo.country_code if geo else None
        cn = geo.country_name if geo else None
        asn: str | None = geo.asn if geo else None
        org: str | None = geo.org if geo else None
-        matches, _ = _parse_data_json(row["data"])
+        matches, _ = _parse_data_json(row.data)

        bans.append(
            DashboardBanItem(
                ip=ip,
-                jail=str(row["jail"]),
-                banned_at=_ts_to_iso(int(row["timeofban"])),
+                jail=row.jail,
+                banned_at=_ts_to_iso(row.timeofban),
                service=matches[0] if matches else None,
                country_code=cc,
                country_name=cn,
                asn=asn,
                org=org,
-                ban_count=int(row["bancount"]),
-                origin=_derive_origin(str(row["jail"])),
+                ban_count=row.bancount,
+                origin=_derive_origin(row.jail),
            )
        )

@@ -565,32 +544,18 @@ async def ban_trend(
        num_buckets=num_buckets,
    )

-    async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
-        f2b_db.row_factory = aiosqlite.Row
-
-        async with f2b_db.execute(
-            "SELECT CAST((timeofban - ?) / ? AS INTEGER) AS bucket_idx, "
-            "COUNT(*) AS cnt "
-            "FROM bans "
-            "WHERE timeofban >= ?"
-            + origin_clause
-            + " GROUP BY bucket_idx "
-            "ORDER BY bucket_idx",
-            (since, bucket_secs, since, *origin_params),
-        ) as cur:
-            rows = await cur.fetchall()
-
-    # Map bucket_idx → count; ignore any out-of-range indices.
-    counts: dict[int, int] = {}
-    for row in rows:
-        idx: int = int(row["bucket_idx"])
-        if 0 <= idx < num_buckets:
-            counts[idx] = int(row["cnt"])
+    counts = await fail2ban_db_repo.get_ban_counts_by_bucket(
+        db_path=db_path,
+        since=since,
+        bucket_secs=bucket_secs,
+        num_buckets=num_buckets,
+        origin=origin,
+    )

    buckets: list[BanTrendBucket] = [
        BanTrendBucket(
            timestamp=_ts_to_iso(since + i * bucket_secs),
-            count=counts.get(i, 0),
+            count=counts[i],
        )
        for i in range(num_buckets)
    ]
@@ -643,50 +608,37 @@ async def bans_by_jail(
        origin=origin,
    )

-    async with aiosqlite.connect(f"file:{db_path}?mode=ro", uri=True) as f2b_db:
-        f2b_db.row_factory = aiosqlite.Row
+    total, jails = await fail2ban_db_repo.get_bans_by_jail(
+        db_path=db_path,
+        since=since,
+        origin=origin,
+    )

-        async with f2b_db.execute(
-            "SELECT COUNT(*) FROM bans WHERE timeofban >= ?" + origin_clause,
-            (since, *origin_params),
-        ) as cur:
-            count_row = await cur.fetchone()
-            total: int = int(count_row[0]) if count_row else 0
+    # Diagnostic guard: if zero results were returned, check whether the table
+    # has *any* rows and log a warning with min/max timeofban so operators can
+    # diagnose timezone or filter mismatches from logs.
+    if total == 0:
+        table_row_count, min_timeofban, max_timeofban = (
+            await fail2ban_db_repo.get_bans_table_summary(db_path)
+        )
+        if table_row_count > 0:
+            log.warning(
+                "ban_service_bans_by_jail_empty_despite_data",
+                table_row_count=table_row_count,
+                min_timeofban=min_timeofban,
+                max_timeofban=max_timeofban,
+                since=since,
+                range=range_,
+            )

-        # Diagnostic guard: if zero results were returned, check whether the
-        # table has *any* rows and log a warning with min/max timeofban so
-        # operators can diagnose timezone or filter mismatches from logs.
-        if total == 0:
-            async with f2b_db.execute(
-                "SELECT COUNT(*), MIN(timeofban), MAX(timeofban) FROM bans"
-            ) as cur:
-                diag_row = await cur.fetchone()
-            if diag_row and diag_row[0] > 0:
-                log.warning(
-                    "ban_service_bans_by_jail_empty_despite_data",
-                    table_row_count=diag_row[0],
-                    min_timeofban=diag_row[1],
-                    max_timeofban=diag_row[2],
-                    since=since,
-                    range=range_,
-                )
-
-        async with f2b_db.execute(
-            "SELECT jail, COUNT(*) AS cnt "
-            "FROM bans "
-            "WHERE timeofban >= ?"
-            + origin_clause
-            + " GROUP BY jail ORDER BY cnt DESC",
-            (since, *origin_params),
-        ) as cur:
-            rows = await cur.fetchall()
-
-    jails: list[JailBanCount] = [
-        JailBanCount(jail=str(row["jail"]), count=int(row["cnt"])) for row in rows
-    ]
    log.debug(
        "ban_service_bans_by_jail_result",
        total=total,
        jail_count=len(jails),
    )
-    return BansByJailResponse(jails=jails, total=total)
+
+    # Pydantic strict validation requires either dicts or model instances.
+    # Our repository returns dataclasses for simplicity, so convert them here.
+    jail_dicts: list[dict[str, object]] = [asdict(j) for j in jails]
+
+    return BansByJailResponse(jails=jail_dicts, total=total)