"""Blocklist downloader component. Handles downloading blocklist content from remote URLs with retry logic for transient failures (429, 5xx errors, timeouts, network errors). Works with DnsValidatedTCPConnector to prevent DNS-rebinding attacks at connection time. """ from __future__ import annotations import asyncio import aiohttp from app.utils.logging_compat import get_logger log = get_logger(__name__) #: HTTP status codes that should be retried for blocklist downloads. _BLOCKLIST_HTTP_RETRY_STATUSES: frozenset[int] = frozenset({429, 500, 502, 503, 504}) #: How many attempts to make for transient blocklist download failures. _BLOCKLIST_HTTP_RETRY_ATTEMPTS: int = 2 #: Base backoff in seconds used between retry attempts. _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS: float = 1.0 class BlocklistDownloader: """Downloads blocklist content from remote URLs with exponential backoff retry.""" def __init__( self, http_session: aiohttp.ClientSession, *, retry_attempts: int = _BLOCKLIST_HTTP_RETRY_ATTEMPTS, backoff_base: float = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS, retry_statuses: frozenset[int] = _BLOCKLIST_HTTP_RETRY_STATUSES, ) -> None: """Initialize the downloader. Args: http_session: Shared aiohttp session for HTTP requests. retry_attempts: Number of retry attempts for transient failures. backoff_base: Base backoff in seconds for exponential backoff. retry_statuses: HTTP status codes that trigger a retry. """ self.http_session = http_session self.retry_attempts = retry_attempts self.backoff_base = backoff_base self.retry_statuses = retry_statuses async def download( self, url: str, timeout: aiohttp.ClientTimeout, ) -> tuple[int, str]: """Download text from a URL with retry logic for transient failures. Args: url: URL to download. timeout: Request timeout configuration. Returns: Tuple of (HTTP status code, response text). Raises: TimeoutError: If the request times out after all retries. aiohttp.ClientError: If the request fails after all retries. Exception: If an unexpected error occurs after all retries. """ last_exception: Exception | None = None for attempt in range(1, self.retry_attempts + 1): try: async with self.http_session.get(url, timeout=timeout) as resp: text = await resp.text(errors="replace") if ( resp.status in self.retry_statuses and attempt < self.retry_attempts ): backoff = self.backoff_base * (2 ** (attempt - 1)) log.warning( "blocklist_download_retry", url=url, status=resp.status, attempt=attempt, backoff=backoff, ) await asyncio.sleep(backoff) continue return resp.status, text except (TimeoutError, aiohttp.ClientError) as exc: last_exception = exc if attempt >= self.retry_attempts: raise backoff = self.backoff_base * (2 ** (attempt - 1)) log.warning( "blocklist_download_retry_error", url=url, attempt=attempt, error=repr(exc), backoff=backoff, ) await asyncio.sleep(backoff) except Exception as exc: last_exception = exc if attempt >= self.retry_attempts: raise backoff = self.backoff_base * (2 ** (attempt - 1)) log.warning( "blocklist_download_retry_error", url=url, attempt=attempt, error=repr(exc), error_type="unexpected", backoff=backoff, ) await asyncio.sleep(backoff) assert last_exception is not None raise last_exception