- Remove structlog dependency from backend/pyproject.toml - Add app.utils.logging_compat shim for keyword-arg logging API - Add app.utils.json_formatter for JSON log output with extra fields - Update all backend modules to use logging_compat.get_logger() - Update docstrings in log_sanitizer.py and json_formatter.py - Update test comment in test_async_utils.py - Record 406 failing tests in Docs/Tasks.md for tracking
121 lines
4.3 KiB
Python
121 lines
4.3 KiB
Python
"""Blocklist downloader component.
|
|
|
|
Handles downloading blocklist content from remote URLs with retry logic for
|
|
transient failures (429, 5xx errors, timeouts, network errors). Works with
|
|
DnsValidatedTCPConnector to prevent DNS-rebinding attacks at connection time.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
import aiohttp
|
|
from app.utils.logging_compat import get_logger
|
|
|
|
log = get_logger(__name__)
|
|
|
|
#: HTTP status codes that should be retried for blocklist downloads.
|
|
_BLOCKLIST_HTTP_RETRY_STATUSES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
|
|
|
|
#: How many attempts to make for transient blocklist download failures.
|
|
_BLOCKLIST_HTTP_RETRY_ATTEMPTS: int = 2
|
|
|
|
#: Base backoff in seconds used between retry attempts.
|
|
_BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS: float = 1.0
|
|
|
|
|
|
class BlocklistDownloader:
|
|
"""Downloads blocklist content from remote URLs with exponential backoff retry."""
|
|
|
|
def __init__(
|
|
self,
|
|
http_session: aiohttp.ClientSession,
|
|
*,
|
|
retry_attempts: int = _BLOCKLIST_HTTP_RETRY_ATTEMPTS,
|
|
backoff_base: float = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS,
|
|
retry_statuses: frozenset[int] = _BLOCKLIST_HTTP_RETRY_STATUSES,
|
|
) -> None:
|
|
"""Initialize the downloader.
|
|
|
|
Args:
|
|
http_session: Shared aiohttp session for HTTP requests.
|
|
retry_attempts: Number of retry attempts for transient failures.
|
|
backoff_base: Base backoff in seconds for exponential backoff.
|
|
retry_statuses: HTTP status codes that trigger a retry.
|
|
"""
|
|
self.http_session = http_session
|
|
self.retry_attempts = retry_attempts
|
|
self.backoff_base = backoff_base
|
|
self.retry_statuses = retry_statuses
|
|
|
|
async def download(
|
|
self,
|
|
url: str,
|
|
timeout: aiohttp.ClientTimeout,
|
|
) -> tuple[int, str]:
|
|
"""Download text from a URL with retry logic for transient failures.
|
|
|
|
Args:
|
|
url: URL to download.
|
|
timeout: Request timeout configuration.
|
|
|
|
Returns:
|
|
Tuple of (HTTP status code, response text).
|
|
|
|
Raises:
|
|
TimeoutError: If the request times out after all retries.
|
|
aiohttp.ClientError: If the request fails after all retries.
|
|
Exception: If an unexpected error occurs after all retries.
|
|
"""
|
|
last_exception: Exception | None = None
|
|
|
|
for attempt in range(1, self.retry_attempts + 1):
|
|
try:
|
|
async with self.http_session.get(url, timeout=timeout) as resp:
|
|
text = await resp.text(errors="replace")
|
|
if (
|
|
resp.status in self.retry_statuses
|
|
and attempt < self.retry_attempts
|
|
):
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry",
|
|
url=url,
|
|
status=resp.status,
|
|
attempt=attempt,
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
continue
|
|
return resp.status, text
|
|
except (TimeoutError, aiohttp.ClientError) as exc:
|
|
last_exception = exc
|
|
if attempt >= self.retry_attempts:
|
|
raise
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry_error",
|
|
url=url,
|
|
attempt=attempt,
|
|
error=repr(exc),
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
except Exception as exc:
|
|
last_exception = exc
|
|
if attempt >= self.retry_attempts:
|
|
raise
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry_error",
|
|
url=url,
|
|
attempt=attempt,
|
|
error=repr(exc),
|
|
error_type="unexpected",
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
|
|
assert last_exception is not None
|
|
raise last_exception
|