"""Geo cache cleanup background task. Registers an APScheduler job that periodically removes stale entries from the ``geo_cache`` table — entries that have not been referenced in the configured retention period (default: 90 days). This prevents unbounded growth of the database file and maintains query performance on geo lookups. When a stale IP is encountered again after purge, it will be re-resolved from the MaxMind database or ip-api.com (if configured), which is acceptable. Correlation IDs are propagated through the task using :mod:`app.utils.correlation` so that task logs can be correlated across runs. """ from __future__ import annotations import uuid from datetime import UTC, datetime, timedelta from typing import TYPE_CHECKING import structlog from app.repositories import geo_cache_repo from app.tasks.db import task_db from app.tasks.timeout_utils import run_with_timeout from app.utils.correlation import get_correlation_id, reset_correlation_id, set_correlation_id from app.utils.runtime_state import get_effective_settings if TYPE_CHECKING: from fastapi import FastAPI from app.config import Settings log: structlog.stdlib.BoundLogger = structlog.get_logger() #: How long to retain geo cache entries (days). Configurable tuning constant. GEO_CACHE_RETENTION_DAYS: int = 90 #: How often the cleanup job fires (seconds). Default: once per day. GEO_CLEANUP_INTERVAL: int = 24 * 60 * 60 #: Stable APScheduler job ID — ensures re-registration replaces, not duplicates. JOB_ID: str = "geo_cache_cleanup" #: Maximum seconds to allow for geo cache cleanup to complete. TASK_TIMEOUT_SECONDS: int = 60 async def _run_cleanup_with_resources( settings: Settings, correlation_id: str | None = None, ) -> None: """Delete stale entries from the geo cache. Calculates a cutoff timestamp (now - retention period) and removes all entries with ``last_seen`` before that time. Logs the operation result. Args: settings: The resolved application settings used for database access. correlation_id: Optional correlation ID for log correlation. """ if correlation_id is None: correlation_id = str(uuid.uuid4()) token = set_correlation_id(correlation_id) try: await _do_cleanup_with_settings(settings) finally: reset_correlation_id(token) async def _do_cleanup_with_settings(settings: Settings) -> None: """Inner cleanup logic that runs with correlation context set.""" async def _do_cleanup() -> None: cutoff_dt = datetime.now(UTC) - timedelta(days=GEO_CACHE_RETENTION_DAYS) cutoff_iso = cutoff_dt.strftime("%Y-%m-%dT%H:%M:%SZ") async with task_db(settings) as db: deleted = await geo_cache_repo.delete_stale_entries(db, cutoff_iso) await db.commit() if deleted > 0: log.info( "geo_cache_cleanup_ran", correlation_id=get_correlation_id(), deleted=deleted, retention_days=GEO_CACHE_RETENTION_DAYS, ) else: log.debug( "geo_cache_cleanup_ran", correlation_id=get_correlation_id(), deleted=deleted, retention_days=GEO_CACHE_RETENTION_DAYS, ) await run_with_timeout("geo_cache_cleanup", _do_cleanup(), TASK_TIMEOUT_SECONDS) async def _run_cleanup(app: FastAPI) -> None: """Run cleanup with application settings.""" await _run_cleanup_with_resources(get_effective_settings(app)) def register(app: FastAPI) -> None: """Add (or replace) the geo cache cleanup job in the application scheduler. Must be called after the scheduler has been started (i.e., inside the lifespan handler, after ``scheduler.start()``). Args: app: The :class:`fastapi.FastAPI` application instance whose ``app.state.scheduler`` will receive the job. """ settings = get_effective_settings(app) scheduler = getattr(app.state, "scheduler", None) if scheduler is None: # In tests or standalone usage, scheduler may not be on app.state yet. # Use a no-op fallback — the heartbeat won't be registered but no crash. log.warning("geo_cache_cleanup_no_scheduler") return scheduler.add_job( _run_cleanup_with_resources, trigger="interval", seconds=GEO_CLEANUP_INTERVAL, kwargs={"settings": settings}, id=JOB_ID, replace_existing=True, ) log.info( "geo_cache_cleanup_scheduled", interval_seconds=GEO_CLEANUP_INTERVAL, retention_days=GEO_CACHE_RETENTION_DAYS, )