"""Geo re-resolve background task. Registers an APScheduler job that periodically retries IP addresses in the ``geo_cache`` table whose ``country_code`` is ``NULL``. These are IPs that previously failed to resolve (e.g. due to ip-api.com rate limiting) and were recorded as negative entries. The task runs every 10 minutes. On each invocation it: 1. Queries all ``NULL``-country rows from ``geo_cache``. 2. Clears the in-memory negative cache so those IPs are eligible for a fresh API attempt. 3. Delegates to :meth:`~app.services.geo_cache.GeoCache.lookup_batch` which already handles rate-limit throttling and retries. 4. Logs how many IPs were retried and how many resolved successfully. Correlation IDs are propagated through the task using :mod:`app.utils.correlation` so that task logs can be correlated across runs. """ from __future__ import annotations import uuid from typing import TYPE_CHECKING from app.utils.logging_compat import get_logger from app.tasks.db import task_db from app.tasks.timeout_utils import run_with_timeout from app.utils.correlation import get_correlation_id, reset_correlation_id, set_correlation_id from app.utils.runtime_state import get_effective_settings if TYPE_CHECKING: from aiohttp import ClientSession from fastapi import FastAPI from app.config import Settings from app.services.geo_cache import GeoCache log = get_logger(__name__) #: How often the re-resolve job fires (seconds). 10 minutes. GEO_RE_RESOLVE_INTERVAL: int = 600 #: Stable APScheduler job ID — ensures re-registration replaces, not duplicates. JOB_ID: str = "geo_re_resolve" #: Maximum seconds to allow for geo re-resolve to complete. TASK_TIMEOUT_SECONDS: int = 120 async def _run_re_resolve_with_resources( geo_cache: GeoCache, settings: Settings, http_session: ClientSession, correlation_id: str | None = None, ) -> None: """Query NULL-country IPs from the database and re-resolve them. Args: geo_cache: The application's GeoCache instance. settings: The resolved application settings used for database access. http_session: The shared aiohttp session used for external lookups. correlation_id: Optional correlation ID for log correlation. """ if correlation_id is None: correlation_id = str(uuid.uuid4()) token = set_correlation_id(correlation_id) try: await _do_re_resolve_with_resources(geo_cache, settings, http_session) finally: reset_correlation_id(token) async def _do_re_resolve_with_resources( geo_cache: GeoCache, settings: Settings, http_session: ClientSession, ) -> None: """Inner re-resolve logic that runs with correlation context set.""" async def _do_re_resolve() -> None: async with task_db(settings) as db: # Fetch all IPs with NULL country_code from the persistent cache. unresolved_ips = await geo_cache.get_unresolved_ips(db) if not unresolved_ips: log.debug("geo_re_resolve_skip", correlation_id=get_correlation_id(), reason="no_unresolved_ips") return log.info("geo_re_resolve_start", correlation_id=get_correlation_id(), unresolved=len(unresolved_ips)) # Clear the negative cache so these IPs are eligible for fresh API calls. await geo_cache.clear_neg_cache() # lookup_batch handles throttling, retries, and persistence when db is # passed. This is a background task so DB writes are allowed. results = await geo_cache.lookup_batch(unresolved_ips, http_session, db=db) resolved_count: int = sum( 1 for info in results.values() if info.country_code is not None ) log.info( "geo_re_resolve_complete", correlation_id=get_correlation_id(), retried=len(unresolved_ips), resolved=resolved_count, ) await run_with_timeout("geo_re_resolve", _do_re_resolve(), TASK_TIMEOUT_SECONDS) async def _run_re_resolve(app: FastAPI) -> None: geo_cache: GeoCache = app.state.geo_cache await _run_re_resolve_with_resources( geo_cache, get_effective_settings(app), app.state.http_session ) def register(app: FastAPI) -> None: """Add (or replace) the geo re-resolve job in the application scheduler. Must be called after the scheduler has been started (i.e., inside the lifespan handler, after ``scheduler.start()``). The first invocation is deferred by one full interval so the initial blocklist prewarm has time to finish before re-resolve kicks in. Args: app: The :class:`fastapi.FastAPI` application instance whose ``app.state.scheduler`` will receive the job. """ geo_cache: GeoCache = app.state.geo_cache settings = get_effective_settings(app) app.state.scheduler.add_job( _run_re_resolve_with_resources, trigger="interval", seconds=GEO_RE_RESOLVE_INTERVAL, kwargs={"geo_cache": geo_cache, "settings": settings, "http_session": app.state.http_session}, id=JOB_ID, replace_existing=True, ) log.info("geo_re_resolve_scheduled", interval_seconds=GEO_RE_RESOLVE_INTERVAL)