diff --git a/Docs/Tasks.md b/Docs/Tasks.md index 97aa80e..db71077 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -1,152 +1,3 @@ -### Issue #12: HIGH - Race Condition in Concurrent Writes (Import Runs Duplication) - -**Where found**: -- `backend/app/repositories/import_run_repo.py` (lines 89-100) -- `create_or_update()` not atomic -- Check then insert pattern (TOCTOU) - -**Why this is needed**: -Two concurrent imports of same source can create duplicate rows instead of updating existing one. - -**Goal**: -Make import run creation atomic using database-level constraints. - -**What to do**: -1. Replace check-then-insert with INSERT ON CONFLICT: - ```python - await self.db.execute(""" - INSERT INTO import_runs (source_id, content_hash, status, created_at) - VALUES (?, ?, 'pending', CURRENT_TIMESTAMP) - ON CONFLICT(source_id, content_hash) DO UPDATE SET - status = 'pending', - updated_at = CURRENT_TIMESTAMP - """, source_id, content_hash) - ``` -2. Ensure UNIQUE(source_id, content_hash) constraint exists -3. Test concurrent import scenario -4. Handle conflict resolution properly - -**Possible traps and issues**: -- ON CONFLICT syntax varies by database (SQLite vs PostgreSQL) -- Concurrent inserts might still have race windows -- Error handling for constraint violations - -**Docs changes needed**: -- Add concurrency guidelines to development docs -- Document data consistency model - -**Doc references**: -- DATABASE_API_DEPLOYMENT_ISSUES.md - Issue "10.1 Race Condition in Concurrent Writes" - ---- - -### Issue #13: HIGH - Frontend-Backend Type Mismatches at Runtime - -**Where found**: -- `frontend/src/types/ban.ts` expects `country_code: string | null` -- `backend/app/models/ban.py` could return empty string `""` -- Frontend type narrowing: `if (ban.country_code)` fails for empty string -- Timestamp format confusion (ISO string vs UNIX integer) - -**Why this is needed**: -Frontend expects specific types but backend returns slightly different types, causing: -- Silent data loss (empty string treated as falsy) -- Parsing errors (string timestamp passed to Date constructor) -- Incomplete rendering (missing data appears as undefined) - -**Goal**: -Align frontend and backend type definitions to eliminate runtime type mismatches. - -**What to do**: -1. Add validation in backend to ensure types match frontend expectations: - ```python - class BanResponse(BaseModel): - country_code: str | None = None - - @field_validator("country_code") - def validate_country_code(cls, v): - # Never empty string, must be None or 2-char code - if v is not None and (len(v) != 2 or not v.isupper()): - raise ValueError("Country code must be 2-char uppercase or None") - return v - ``` -2. Standardize timestamp format (use UNIX epoch everywhere) -3. Update frontend types to match backend validation -4. Add CI check to validate types stay in sync (generate and validate types on each build) -5. Write tests for edge cases (empty results, null fields, zero values) - -**Possible traps and issues**: -- Frontend code assumes old types - breaking change -- Type generation script might silently fail -- Null vs empty string distinction not enforced -- Serialization/deserialization edge cases - -**Docs changes needed**: -- Create `Docs/TYPE_SAFETY.md` explaining shared type system -- Add to API documentation type constraints -- Document type generation process in development guide - -**Doc references**: -- DATABASE_API_DEPLOYMENT_ISSUES.md - Issue "4.1 Type Mismatches in API Responses" - ---- - -## MEDIUM PRIORITY ISSUES - ---- - -### Issue #14: MEDIUM - ReDoS (Regular Expression Denial of Service) Vulnerability - -**Where found**: -- `backend/app/utils/regex_validator.py` (lines 71+) -- Pattern validation uses timeout but doesn't detect catastrophic backtracking patterns - -**Why this is needed**: -Regex patterns like `(x+)+y` can hang the regex engine even within timeout, causing DoS attacks via filter configuration. - -**Goal**: -Detect known ReDoS patterns before compiling them. - -**What to do**: -1. Add regex pattern analysis library: - ```bash - pip install regexploit - ``` -2. Update validator: - ```python - from regexploit import analyze - - def validate_regex(pattern: str): - # Check for ReDoS patterns - analysis = analyze(pattern) - if analysis.has_redos: - raise ValueError(f"ReDoS pattern detected: {analysis.reason}") - - # Also do timeout check - try: - re.compile(pattern, timeout=1) - except TimeoutError: - raise ValueError("Regex too complex") - ``` -3. Test against known ReDoS patterns -4. Add validation to filter/action config endpoints - -**Possible traps and issues**: -- `regexploit` library might have false positives/negatives -- Some legitimate complex patterns might be rejected -- Performance cost of analysis on every pattern -- Library might not support all regex flavors - -**Docs changes needed**: -- Add regex safety guidelines to config docs -- Document rejected pattern examples -- Add to `TROUBLESHOOTING.md` - "Regex pattern rejected" - -**Doc references**: -- DETAILED_FINDINGS.md - Issue #6 "ReDoS Vulnerability" - ---- - ### Issue #15: MEDIUM - N+1 Query Pattern in Geo Lookups **Where found**: diff --git a/backend/app/models/geo.py b/backend/app/models/geo.py index 135500c..d1111cb 100644 --- a/backend/app/models/geo.py +++ b/backend/app/models/geo.py @@ -75,6 +75,8 @@ class GeoCacheStatsResponse(BanGuiBaseModel): unresolved: int = Field(..., description="Number of geo_cache rows with country_code IS NULL.") neg_cache_size: int = Field(..., description="Number of entries in the in-memory negative cache.") dirty_size: int = Field(..., description="Number of newly resolved entries not yet flushed to disk.") + hits: int = Field(default=0, description="Number of cache hits since last clear.") + misses: int = Field(default=0, description="Number of cache misses since last clear.") class GeoReResolveResponse(BanGuiBaseModel): """Response for ``POST /api/geo/re-resolve``. diff --git a/backend/app/services/geo_cache.py b/backend/app/services/geo_cache.py index 729cbc1..c8f6382 100644 --- a/backend/app/services/geo_cache.py +++ b/backend/app/services/geo_cache.py @@ -16,6 +16,7 @@ An instance should be created once at startup and stored on ``app.state.geo_cach from __future__ import annotations import asyncio +import collections.abc import time from typing import TYPE_CHECKING @@ -26,6 +27,8 @@ from app.models.geo import GeoInfo from app.repositories import geo_cache_repo if TYPE_CHECKING: + import collections.abc + import aiosqlite import geoip2.database import geoip2.errors @@ -91,6 +94,8 @@ class GeoCache: _geoip_initialized: Indicates whether init_geoip() has been called. _allow_http_fallback: Whether to use ip-api.com as fallback. _cache_lock: Async lock protecting cache mutations. + _hits: Cache hit counter (increments on hit, resets on clear). + _misses: Cache miss counter (increments on miss, resets on clear). """ def __init__(self, allow_http_fallback: bool = False) -> None: @@ -108,6 +113,8 @@ class GeoCache: self._geoip_initialized: bool = False self._allow_http_fallback: bool = allow_http_fallback self._cache_lock: asyncio.Lock = asyncio.Lock() + self._hits: int = 0 + self._misses: int = 0 async def clear(self) -> None: """Flush both the positive and negative lookup caches. @@ -119,6 +126,8 @@ class GeoCache: self._cache.clear() self._neg_cache.clear() self._dirty.clear() + self._hits = 0 + self._misses = 0 async def clear_neg_cache(self) -> None: """Flush only the negative (failed-lookups) cache. @@ -162,6 +171,8 @@ class GeoCache: "unresolved": unresolved, "neg_cache_size": len(self._neg_cache), "dirty_size": len(self._dirty), + "hits": self._hits, + "misses": self._misses, } async def count_unresolved(self, db: aiosqlite.Connection) -> int: @@ -365,6 +376,7 @@ class GeoCache: (e.g. network timeout). """ if ip in self._cache: + self._hits += 1 return self._cache[ip] # Negative cache: skip IPs that recently failed to avoid hammering the API. @@ -396,6 +408,7 @@ class GeoCache: log.debug("geo_lookup_failed_no_http_fallback", ip=ip) async with self._cache_lock: self._neg_cache[ip] = time.monotonic() + self._misses += 1 if db is not None: try: await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip) @@ -447,6 +460,7 @@ class GeoCache: # Both resolvers failed — record in negative cache to avoid hammering. async with self._cache_lock: self._neg_cache[ip] = time.monotonic() + self._misses += 1 if db is not None: try: await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip) @@ -538,9 +552,15 @@ class GeoCache: else: uncached.append(ip) + # Count cache hits (IPs found in cache) before miss counting. + cache_hits = sum(1 for ip in unique_ips if ip in self._cache) + self._hits += cache_hits + if not uncached: return geo_result + self._misses += len(uncached) + log.info("geo_batch_lookup_start", total=len(uncached)) # PRIMARY: Try local MaxMind database for all uncached IPs. @@ -824,3 +844,20 @@ class GeoCache: log.info("geo_flush_dirty_complete", count=len(rows)) return len(rows) + + def prewarm( + self, + ips: collections.abc.Iterable[str], + http_session: aiohttp.ClientSession, + ) -> None: + """Pre-load geo data for *ips* without blocking the caller. + + Fires off :meth:`lookup_batch` as a background task. Useful at startup + or when warming the cache with commonly-seen IPs before they appear in + bans. + + Args: + ips: Iterable of IP address strings to pre-warm. + http_session: Shared :class:`aiohttp.ClientSession` for HTTP fallback. + """ + asyncio.create_task(self.lookup_batch(list(ips), http_session)) diff --git a/backend/tests/test_services/test_geo_service.py b/backend/tests/test_services/test_geo_service.py index 06ad741..1e9402f 100644 --- a/backend/tests/test_services/test_geo_service.py +++ b/backend/tests/test_services/test_geo_service.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio from collections.abc import Mapping, Sequence from unittest.mock import AsyncMock, MagicMock, patch @@ -443,13 +444,39 @@ def _make_async_db() -> MagicMock: """Build a minimal mock :class:`aiosqlite.Connection`. Returns: - MagicMock with ``execute``, ``executemany``, and ``commit`` wired as - async coroutines. + MagicMock with ``execute``, ``executemany``, ``commit``, and ``rollback`` + wired as async coroutines. ``execute`` is an async function that returns + an async context manager yielding a cursor. """ db = MagicMock() - db.execute = AsyncMock() + + # Build a mock cursor for count_unresolved and similar queries. + mock_cursor = MagicMock() + mock_cursor.fetchone = AsyncMock(return_value=(0,)) + mock_cursor.fetchall = AsyncMock(return_value=[]) + + # Build an async context manager wrapping the cursor. + mock_ctx = MagicMock() + mock_ctx.__aenter__ = AsyncMock(return_value=mock_cursor) + mock_ctx.__aexit__ = AsyncMock(return_value=None) + + # For BEGIN statements (transaction start), return a no-op ctx manager. + class _BeginCtx: + async def __aenter__(self) -> None: + return None + + async def __aexit__(self, *args: object) -> None: + return None + + async def fake_execute(sql: str, *args: object, **kwargs: object) -> MagicMock: # type: ignore[no-untyped-def] + if isinstance(sql, str) and sql.startswith("BEGIN"): + return MagicMock(__aenter__=AsyncMock(return_value=None), __aexit__=AsyncMock(return_value=None)) + return mock_ctx + + db.execute = MagicMock(side_effect=fake_execute) db.executemany = AsyncMock() db.commit = AsyncMock() + db.rollback = AsyncMock() return db @@ -1032,3 +1059,214 @@ class TestLookupBatchBulkWrites: assert db.executemany.await_count == 2 db.execute.assert_not_awaited() + +# --------------------------------------------------------------------------- +# Cache metrics (Issue #15) +# --------------------------------------------------------------------------- + + +class TestCacheMetrics: + """Metrics counters track cache hit/miss ratios.""" + + async def test_cache_hit_increments_hits(self) -> None: + """lookup() with a cached IP increments _hits.""" + geo_cache = GeoCache(allow_http_fallback=True) + geo_cache._cache["1.1.1.1"] = GeoInfo( + country_code="AU", country_name="Australia", asn=None, org=None + ) + + await geo_cache.lookup("1.1.1.1", MagicMock()) + + assert geo_cache._hits == 1 + assert geo_cache._misses == 0 + + async def test_cache_miss_increments_misses(self) -> None: + """lookup() with allow_http_fallback=False increments _misses on MMDB miss.""" + geo_cache = GeoCache(allow_http_fallback=False) + + await geo_cache.lookup("10.255.255.1", MagicMock()) + + assert geo_cache._hits == 0 + assert geo_cache._misses == 1 + + async def test_batch_hits_count_cached_ips(self) -> None: + """lookup_batch increments _hits for IPs already in cache.""" + geo_cache = GeoCache(allow_http_fallback=True) + + session = _make_batch_session( + [ + { + "query": "1.1.1.1", + "status": "success", + "countryCode": "AU", + "country": "Australia", + "as": "AS13335", + "org": "Cloudflare", + }, + ] + ) + # First: populate cache for 1.1.1.1. + await geo_cache.lookup_batch(["1.1.1.1"], session) + + # Second call: 1.1.1.1 is cache hit; 2.2.2.2 is not cached → HTTP call. + session2 = _make_batch_session( + [ + { + "query": "2.2.2.2", + "status": "success", + "countryCode": "BR", + "country": "Brazil", + "as": "AS1", + "org": "Org", + }, + ] + ) + await geo_cache.lookup_batch(["1.1.1.1", "2.2.2.2"], session2) + + assert geo_cache._hits == 1, f"Expected 1 hit, got {geo_cache._hits}" + assert geo_cache._misses == 2, f"Expected 2 misses (both IPs needed resolution), got {geo_cache._misses}" + + async def test_cache_stats_includes_hits_and_misses(self) -> None: + """cache_stats() returns hits and misses counters.""" + geo_cache = GeoCache() + db = MagicMock() + + # Patch count_unresolved to avoid the async context manager issue. + from app.repositories import geo_cache_repo + + with patch.object( + geo_cache_repo, + "count_unresolved", + AsyncMock(return_value=0), + ): + stats = await geo_cache.cache_stats(db) + + assert "hits" in stats + assert "misses" in stats + assert stats["hits"] == geo_cache._hits + assert stats["misses"] == geo_cache._misses + + async def test_clear_resets_hits_and_misses(self) -> None: + """clear() resets _hits and _misses to zero.""" + geo_cache = GeoCache() + geo_cache._hits = 42 + geo_cache._misses = 99 + + await geo_cache.clear() + + assert geo_cache._hits == 0 + assert geo_cache._misses == 0 + + +# --------------------------------------------------------------------------- +# Pre-warming (Issue #15) +# --------------------------------------------------------------------------- + + +class TestPrewarm: + """prewarm() loads geo data without blocking the caller.""" + + async def test_prewarm_fires_and_forgets(self) -> None: + """prewarm() returns None immediately (fire-and-forget).""" + geo_cache = GeoCache(allow_http_fallback=True) + session = _make_batch_session( + [ + { + "query": "1.1.1.1", + "status": "success", + "countryCode": "AU", + "country": "Australia", + "as": "AS13335", + "org": "Cloudflare", + }, + ] + ) + + result = geo_cache.prewarm(["1.1.1.1"], session) + assert result is None + # Let the fire-and-forget task complete. + await asyncio.sleep(0.05) + + async def test_prewarm_populates_cache_eventually(self) -> None: + """prewarm() eventually populates the cache via lookup_batch.""" + geo_cache = GeoCache(allow_http_fallback=True) + session = _make_batch_session( + [ + { + "query": "1.1.1.1", + "status": "success", + "countryCode": "AU", + "country": "Australia", + "as": "AS13335", + "org": "Cloudflare", + }, + ] + ) + + geo_cache.prewarm(["1.1.1.1"], session) + # Let the fire-and-forget task run. + await asyncio.sleep(0.05) + + assert "1.1.1.1" in geo_cache._cache + + async def test_prewarm_accepts_empty_list(self) -> None: + """prewarm() with an empty list does not raise.""" + geo_cache = GeoCache() + result = geo_cache.prewarm([], MagicMock()) + assert result is None + await asyncio.sleep(0.01) + + +# --------------------------------------------------------------------------- +# Large ban list performance (Issue #15) +# --------------------------------------------------------------------------- + + +class TestLargeBanList: + """lookup_batch handles large IP lists without O(n) per-IP overhead.""" + + async def test_batch_processes_1000_ips_single_db_write(self) -> None: + """1000 IPs should result in a single bulk DB write per chunk.""" + geo_cache = GeoCache(allow_http_fallback=True) + ips = [f"1.1.1.{i % 256}" for i in range(1000)] + batch_response = [ + { + "query": ip, + "status": "success", + "countryCode": "US", + "country": "United States", + "as": "AS1", + "org": "Org1", + } + for ip in ips + ] + session = _make_batch_session(batch_response) + db = _make_async_db() + + await geo_cache.lookup_batch(ips, session, db=db) + + assert db.executemany.await_count >= 1 + + async def test_batch_deduplicates_ips(self) -> None: + """lookup_batch deduplicates input IPs to avoid redundant work.""" + geo_cache = GeoCache(allow_http_fallback=True) + ips = ["1.1.1.1", "1.1.1.1", "1.1.1.1"] + + session = _make_batch_session( + [ + { + "query": "1.1.1.1", + "status": "success", + "countryCode": "AU", + "country": "Australia", + "as": "AS13335", + "org": "Cloudflare", + }, + ] + ) + + result = await geo_cache.lookup_batch(ips, session) + + assert len(result) == 1 + assert "1.1.1.1" in result +