Files
BanGUI/backend/app/services/blocklist_downloader.py
Lukas 7ec80fdeec refactor(logging): replace structlog with stdlib logging compat layer
- Remove structlog dependency from backend/pyproject.toml
- Add app.utils.logging_compat shim for keyword-arg logging API
- Add app.utils.json_formatter for JSON log output with extra fields
- Update all backend modules to use logging_compat.get_logger()
- Update docstrings in log_sanitizer.py and json_formatter.py
- Update test comment in test_async_utils.py
- Record 406 failing tests in Docs/Tasks.md for tracking
2026-05-10 13:37:54 +02:00

121 lines
4.3 KiB
Python

"""Blocklist downloader component.
Handles downloading blocklist content from remote URLs with retry logic for
transient failures (429, 5xx errors, timeouts, network errors). Works with
DnsValidatedTCPConnector to prevent DNS-rebinding attacks at connection time.
"""
from __future__ import annotations
import asyncio
import aiohttp
from app.utils.logging_compat import get_logger
log = get_logger(__name__)
#: HTTP status codes that should be retried for blocklist downloads.
_BLOCKLIST_HTTP_RETRY_STATUSES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
#: How many attempts to make for transient blocklist download failures.
_BLOCKLIST_HTTP_RETRY_ATTEMPTS: int = 2
#: Base backoff in seconds used between retry attempts.
_BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS: float = 1.0
class BlocklistDownloader:
"""Downloads blocklist content from remote URLs with exponential backoff retry."""
def __init__(
self,
http_session: aiohttp.ClientSession,
*,
retry_attempts: int = _BLOCKLIST_HTTP_RETRY_ATTEMPTS,
backoff_base: float = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS,
retry_statuses: frozenset[int] = _BLOCKLIST_HTTP_RETRY_STATUSES,
) -> None:
"""Initialize the downloader.
Args:
http_session: Shared aiohttp session for HTTP requests.
retry_attempts: Number of retry attempts for transient failures.
backoff_base: Base backoff in seconds for exponential backoff.
retry_statuses: HTTP status codes that trigger a retry.
"""
self.http_session = http_session
self.retry_attempts = retry_attempts
self.backoff_base = backoff_base
self.retry_statuses = retry_statuses
async def download(
self,
url: str,
timeout: aiohttp.ClientTimeout,
) -> tuple[int, str]:
"""Download text from a URL with retry logic for transient failures.
Args:
url: URL to download.
timeout: Request timeout configuration.
Returns:
Tuple of (HTTP status code, response text).
Raises:
TimeoutError: If the request times out after all retries.
aiohttp.ClientError: If the request fails after all retries.
Exception: If an unexpected error occurs after all retries.
"""
last_exception: Exception | None = None
for attempt in range(1, self.retry_attempts + 1):
try:
async with self.http_session.get(url, timeout=timeout) as resp:
text = await resp.text(errors="replace")
if (
resp.status in self.retry_statuses
and attempt < self.retry_attempts
):
backoff = self.backoff_base * (2 ** (attempt - 1))
log.warning(
"blocklist_download_retry",
url=url,
status=resp.status,
attempt=attempt,
backoff=backoff,
)
await asyncio.sleep(backoff)
continue
return resp.status, text
except (TimeoutError, aiohttp.ClientError) as exc:
last_exception = exc
if attempt >= self.retry_attempts:
raise
backoff = self.backoff_base * (2 ** (attempt - 1))
log.warning(
"blocklist_download_retry_error",
url=url,
attempt=attempt,
error=repr(exc),
backoff=backoff,
)
await asyncio.sleep(backoff)
except Exception as exc:
last_exception = exc
if attempt >= self.retry_attempts:
raise
backoff = self.backoff_base * (2 ** (attempt - 1))
log.warning(
"blocklist_download_retry_error",
url=url,
attempt=attempt,
error=repr(exc),
error_type="unexpected",
backoff=backoff,
)
await asyncio.sleep(backoff)
assert last_exception is not None
raise last_exception