## Problem The blocklist URL validation at create/update time has a TOCTOU (time-of-check-to-time-of-use) window. An attacker can perform a DNS-rebinding attack where: 1. User adds blocklist URL pointing to attacker.com 2. At create time, attacker.com resolves to a public IP → validation passes 3. Later, when fetching, attacker.com resolves to 192.168.1.1 (internal network) 4. HTTP client connects to the private IP, potentially accessing internal services ## Solution Add runtime destination IP validation at connection time via a custom socket factory: - Created 'dns_validated_connector.py' with create_dns_validated_socket_factory() that validates all resolved IPs before socket creation - HTTP session now uses the validated socket factory, protecting all blocklist imports globally - Rejects connections to RFC 1918 private ranges, loopback, link-local, ULA, multicast, and reserved addresses (IPv4 and IPv6) - Added comprehensive test coverage with 13 test cases ## Changes - backend/app/services/dns_validated_connector.py: Custom socket factory with IP validation - backend/app/startup.py: Use DNS-validated socket factory in HTTP session creation - backend/app/utils/ip_utils.py: Updated docstring explaining runtime validation - backend/app/services/blocklist_downloader.py: Updated module docstring - backend/app/services/blocklist_service.py: Updated docstrings explaining two-layer protection - backend/tests/test_services/test_dns_validated_connector.py: Test suite for socket factory - Docs/Architekture.md: Added detailed section on DNS-rebinding protection ## Testing - All 13 DNS validation tests pass - All blocklist downloader tests pass (unaffected by changes) - Linting: ruff, mypy pass with --strict - Test coverage: 90% line coverage on dns_validated_connector.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
121 lines
4.3 KiB
Python
121 lines
4.3 KiB
Python
"""Blocklist downloader component.
|
|
|
|
Handles downloading blocklist content from remote URLs with retry logic for
|
|
transient failures (429, 5xx errors, timeouts, network errors). Works with
|
|
DnsValidatedTCPConnector to prevent DNS-rebinding attacks at connection time.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
import aiohttp
|
|
import structlog
|
|
|
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
|
|
|
#: HTTP status codes that should be retried for blocklist downloads.
|
|
_BLOCKLIST_HTTP_RETRY_STATUSES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
|
|
|
|
#: How many attempts to make for transient blocklist download failures.
|
|
_BLOCKLIST_HTTP_RETRY_ATTEMPTS: int = 2
|
|
|
|
#: Base backoff in seconds used between retry attempts.
|
|
_BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS: float = 1.0
|
|
|
|
|
|
class BlocklistDownloader:
|
|
"""Downloads blocklist content from remote URLs with exponential backoff retry."""
|
|
|
|
def __init__(
|
|
self,
|
|
http_session: aiohttp.ClientSession,
|
|
*,
|
|
retry_attempts: int = _BLOCKLIST_HTTP_RETRY_ATTEMPTS,
|
|
backoff_base: float = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS,
|
|
retry_statuses: frozenset[int] = _BLOCKLIST_HTTP_RETRY_STATUSES,
|
|
) -> None:
|
|
"""Initialize the downloader.
|
|
|
|
Args:
|
|
http_session: Shared aiohttp session for HTTP requests.
|
|
retry_attempts: Number of retry attempts for transient failures.
|
|
backoff_base: Base backoff in seconds for exponential backoff.
|
|
retry_statuses: HTTP status codes that trigger a retry.
|
|
"""
|
|
self.http_session = http_session
|
|
self.retry_attempts = retry_attempts
|
|
self.backoff_base = backoff_base
|
|
self.retry_statuses = retry_statuses
|
|
|
|
async def download(
|
|
self,
|
|
url: str,
|
|
timeout: aiohttp.ClientTimeout,
|
|
) -> tuple[int, str]:
|
|
"""Download text from a URL with retry logic for transient failures.
|
|
|
|
Args:
|
|
url: URL to download.
|
|
timeout: Request timeout configuration.
|
|
|
|
Returns:
|
|
Tuple of (HTTP status code, response text).
|
|
|
|
Raises:
|
|
TimeoutError: If the request times out after all retries.
|
|
aiohttp.ClientError: If the request fails after all retries.
|
|
Exception: If an unexpected error occurs after all retries.
|
|
"""
|
|
last_exception: Exception | None = None
|
|
|
|
for attempt in range(1, self.retry_attempts + 1):
|
|
try:
|
|
async with self.http_session.get(url, timeout=timeout) as resp:
|
|
text = await resp.text(errors="replace")
|
|
if (
|
|
resp.status in self.retry_statuses
|
|
and attempt < self.retry_attempts
|
|
):
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry",
|
|
url=url,
|
|
status=resp.status,
|
|
attempt=attempt,
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
continue
|
|
return resp.status, text
|
|
except (TimeoutError, aiohttp.ClientError) as exc:
|
|
last_exception = exc
|
|
if attempt >= self.retry_attempts:
|
|
raise
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry_error",
|
|
url=url,
|
|
attempt=attempt,
|
|
error=repr(exc),
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
except Exception as exc:
|
|
last_exception = exc
|
|
if attempt >= self.retry_attempts:
|
|
raise
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry_error",
|
|
url=url,
|
|
attempt=attempt,
|
|
error=repr(exc),
|
|
error_type="unexpected",
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
|
|
assert last_exception is not None
|
|
raise last_exception
|