Refactor: Split blocklist import flow into focused components
Extracted the monolithic import_source() function (776 lines) into focused, testable components with clear single responsibilities: - BlocklistDownloader: HTTP download with exponential backoff retry logic * Handles transient failures (429, 5xx errors, timeouts) * Configurable retry attempts and backoff strategy * 93% test coverage - BlocklistParser: Parse and validate IP addresses * Extract valid IPv4/IPv6 addresses from text * Skip CIDRs and malformed entries gracefully * Separate parsing from validation concerns * 100% test coverage - BanExecutor: Ban execution with error handling * Ban IPs via fail2ban socket * Stop on JailNotFoundError (jail doesn't exist) * Continue on JailOperationError (individual ban failures) * 100% test coverage - BlocklistImportWorkflow: Thin orchestrator * Coordinates the download → parse → ban → log flow * Pre-warms geo cache with newly banned IPs * 96% test coverage - blocklist_service.py: Maintains public API * Source CRUD (create, read, update, delete) * URL validation and preview functionality * Scheduling configuration and import triggers * 92% test coverage Benefits: * Each component is independently testable with mock dependencies * Error handling is explicit and localized * Components can evolve independently * Logging is contextual and clear * Retry and transient error handling are isolated Testing: * All 36 existing blocklist_service tests pass * All 13 blocklist import task tests pass * Added 17 comprehensive component unit tests * Combined 96%+ coverage on new modules * Zero type errors in new code Documentation: * Updated Refactoring.md with detailed architecture notes * Added component architecture diagram to Architekture.md * Documented ownership and responsibilities of each component Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
119
backend/app/services/blocklist_downloader.py
Normal file
119
backend/app/services/blocklist_downloader.py
Normal file
@@ -0,0 +1,119 @@
|
||||
"""Blocklist downloader component.
|
||||
|
||||
Handles downloading blocklist content from remote URLs with retry logic for
|
||||
transient failures (429, 5xx errors, timeouts, network errors).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
|
||||
import aiohttp
|
||||
import structlog
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
#: HTTP status codes that should be retried for blocklist downloads.
|
||||
_BLOCKLIST_HTTP_RETRY_STATUSES: frozenset[int] = frozenset({429, 500, 502, 503, 504})
|
||||
|
||||
#: How many attempts to make for transient blocklist download failures.
|
||||
_BLOCKLIST_HTTP_RETRY_ATTEMPTS: int = 2
|
||||
|
||||
#: Base backoff in seconds used between retry attempts.
|
||||
_BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS: float = 1.0
|
||||
|
||||
|
||||
class BlocklistDownloader:
|
||||
"""Downloads blocklist content from remote URLs with exponential backoff retry."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
http_session: aiohttp.ClientSession,
|
||||
*,
|
||||
retry_attempts: int = _BLOCKLIST_HTTP_RETRY_ATTEMPTS,
|
||||
backoff_base: float = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS,
|
||||
retry_statuses: frozenset[int] = _BLOCKLIST_HTTP_RETRY_STATUSES,
|
||||
) -> None:
|
||||
"""Initialize the downloader.
|
||||
|
||||
Args:
|
||||
http_session: Shared aiohttp session for HTTP requests.
|
||||
retry_attempts: Number of retry attempts for transient failures.
|
||||
backoff_base: Base backoff in seconds for exponential backoff.
|
||||
retry_statuses: HTTP status codes that trigger a retry.
|
||||
"""
|
||||
self.http_session = http_session
|
||||
self.retry_attempts = retry_attempts
|
||||
self.backoff_base = backoff_base
|
||||
self.retry_statuses = retry_statuses
|
||||
|
||||
async def download(
|
||||
self,
|
||||
url: str,
|
||||
timeout: aiohttp.ClientTimeout,
|
||||
) -> tuple[int, str]:
|
||||
"""Download text from a URL with retry logic for transient failures.
|
||||
|
||||
Args:
|
||||
url: URL to download.
|
||||
timeout: Request timeout configuration.
|
||||
|
||||
Returns:
|
||||
Tuple of (HTTP status code, response text).
|
||||
|
||||
Raises:
|
||||
TimeoutError: If the request times out after all retries.
|
||||
aiohttp.ClientError: If the request fails after all retries.
|
||||
Exception: If an unexpected error occurs after all retries.
|
||||
"""
|
||||
last_exception: Exception | None = None
|
||||
|
||||
for attempt in range(1, self.retry_attempts + 1):
|
||||
try:
|
||||
async with self.http_session.get(url, timeout=timeout) as resp:
|
||||
text = await resp.text(errors="replace")
|
||||
if (
|
||||
resp.status in self.retry_statuses
|
||||
and attempt < self.retry_attempts
|
||||
):
|
||||
backoff = self.backoff_base * (2 ** (attempt - 1))
|
||||
log.warning(
|
||||
"blocklist_download_retry",
|
||||
url=url,
|
||||
status=resp.status,
|
||||
attempt=attempt,
|
||||
backoff=backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
continue
|
||||
return resp.status, text
|
||||
except (TimeoutError, aiohttp.ClientError) as exc:
|
||||
last_exception = exc
|
||||
if attempt >= self.retry_attempts:
|
||||
raise
|
||||
backoff = self.backoff_base * (2 ** (attempt - 1))
|
||||
log.warning(
|
||||
"blocklist_download_retry_error",
|
||||
url=url,
|
||||
attempt=attempt,
|
||||
error=repr(exc),
|
||||
backoff=backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
except Exception as exc:
|
||||
last_exception = exc
|
||||
if attempt >= self.retry_attempts:
|
||||
raise
|
||||
backoff = self.backoff_base * (2 ** (attempt - 1))
|
||||
log.warning(
|
||||
"blocklist_download_retry_error",
|
||||
url=url,
|
||||
attempt=attempt,
|
||||
error=repr(exc),
|
||||
error_type="unexpected",
|
||||
backoff=backoff,
|
||||
)
|
||||
await asyncio.sleep(backoff)
|
||||
|
||||
assert last_exception is not None
|
||||
raise last_exception
|
||||
Reference in New Issue
Block a user