Extracted the monolithic import_source() function (776 lines) into focused, testable components with clear single responsibilities: - BlocklistDownloader: HTTP download with exponential backoff retry logic * Handles transient failures (429, 5xx errors, timeouts) * Configurable retry attempts and backoff strategy * 93% test coverage - BlocklistParser: Parse and validate IP addresses * Extract valid IPv4/IPv6 addresses from text * Skip CIDRs and malformed entries gracefully * Separate parsing from validation concerns * 100% test coverage - BanExecutor: Ban execution with error handling * Ban IPs via fail2ban socket * Stop on JailNotFoundError (jail doesn't exist) * Continue on JailOperationError (individual ban failures) * 100% test coverage - BlocklistImportWorkflow: Thin orchestrator * Coordinates the download → parse → ban → log flow * Pre-warms geo cache with newly banned IPs * 96% test coverage - blocklist_service.py: Maintains public API * Source CRUD (create, read, update, delete) * URL validation and preview functionality * Scheduling configuration and import triggers * 92% test coverage Benefits: * Each component is independently testable with mock dependencies * Error handling is explicit and localized * Components can evolve independently * Logging is contextual and clear * Retry and transient error handling are isolated Testing: * All 36 existing blocklist_service tests pass * All 13 blocklist import task tests pass * Added 17 comprehensive component unit tests * Combined 96%+ coverage on new modules * Zero type errors in new code Documentation: * Updated Refactoring.md with detailed architecture notes * Added component architecture diagram to Architekture.md * Documented ownership and responsibilities of each component Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
120 lines
4.2 KiB
Python
120 lines
4.2 KiB
Python
"""Blocklist downloader component.
|
|
|
|
Handles downloading blocklist content from remote URLs with retry logic for
|
|
transient failures (429, 5xx errors, timeouts, network errors).
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
|
|
import aiohttp
|
|
import structlog
|
|
|
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
|
|
|
#: HTTP status codes that should be retried for blocklist downloads.
|
|
_BLOCKLIST_HTTP_RETRY_STATUSES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
|
|
|
|
#: How many attempts to make for transient blocklist download failures.
|
|
_BLOCKLIST_HTTP_RETRY_ATTEMPTS: int = 2
|
|
|
|
#: Base backoff in seconds used between retry attempts.
|
|
_BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS: float = 1.0
|
|
|
|
|
|
class BlocklistDownloader:
|
|
"""Downloads blocklist content from remote URLs with exponential backoff retry."""
|
|
|
|
def __init__(
|
|
self,
|
|
http_session: aiohttp.ClientSession,
|
|
*,
|
|
retry_attempts: int = _BLOCKLIST_HTTP_RETRY_ATTEMPTS,
|
|
backoff_base: float = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS,
|
|
retry_statuses: frozenset[int] = _BLOCKLIST_HTTP_RETRY_STATUSES,
|
|
) -> None:
|
|
"""Initialize the downloader.
|
|
|
|
Args:
|
|
http_session: Shared aiohttp session for HTTP requests.
|
|
retry_attempts: Number of retry attempts for transient failures.
|
|
backoff_base: Base backoff in seconds for exponential backoff.
|
|
retry_statuses: HTTP status codes that trigger a retry.
|
|
"""
|
|
self.http_session = http_session
|
|
self.retry_attempts = retry_attempts
|
|
self.backoff_base = backoff_base
|
|
self.retry_statuses = retry_statuses
|
|
|
|
async def download(
|
|
self,
|
|
url: str,
|
|
timeout: aiohttp.ClientTimeout,
|
|
) -> tuple[int, str]:
|
|
"""Download text from a URL with retry logic for transient failures.
|
|
|
|
Args:
|
|
url: URL to download.
|
|
timeout: Request timeout configuration.
|
|
|
|
Returns:
|
|
Tuple of (HTTP status code, response text).
|
|
|
|
Raises:
|
|
TimeoutError: If the request times out after all retries.
|
|
aiohttp.ClientError: If the request fails after all retries.
|
|
Exception: If an unexpected error occurs after all retries.
|
|
"""
|
|
last_exception: Exception | None = None
|
|
|
|
for attempt in range(1, self.retry_attempts + 1):
|
|
try:
|
|
async with self.http_session.get(url, timeout=timeout) as resp:
|
|
text = await resp.text(errors="replace")
|
|
if (
|
|
resp.status in self.retry_statuses
|
|
and attempt < self.retry_attempts
|
|
):
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry",
|
|
url=url,
|
|
status=resp.status,
|
|
attempt=attempt,
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
continue
|
|
return resp.status, text
|
|
except (TimeoutError, aiohttp.ClientError) as exc:
|
|
last_exception = exc
|
|
if attempt >= self.retry_attempts:
|
|
raise
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry_error",
|
|
url=url,
|
|
attempt=attempt,
|
|
error=repr(exc),
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
except Exception as exc:
|
|
last_exception = exc
|
|
if attempt >= self.retry_attempts:
|
|
raise
|
|
backoff = self.backoff_base * (2 ** (attempt - 1))
|
|
log.warning(
|
|
"blocklist_download_retry_error",
|
|
url=url,
|
|
attempt=attempt,
|
|
error=repr(exc),
|
|
error_type="unexpected",
|
|
backoff=backoff,
|
|
)
|
|
await asyncio.sleep(backoff)
|
|
|
|
assert last_exception is not None
|
|
raise last_exception
|