Refactor: Split blocklist import flow into focused components
Extracted the monolithic import_source() function (776 lines) into focused, testable components with clear single responsibilities: - BlocklistDownloader: HTTP download with exponential backoff retry logic * Handles transient failures (429, 5xx errors, timeouts) * Configurable retry attempts and backoff strategy * 93% test coverage - BlocklistParser: Parse and validate IP addresses * Extract valid IPv4/IPv6 addresses from text * Skip CIDRs and malformed entries gracefully * Separate parsing from validation concerns * 100% test coverage - BanExecutor: Ban execution with error handling * Ban IPs via fail2ban socket * Stop on JailNotFoundError (jail doesn't exist) * Continue on JailOperationError (individual ban failures) * 100% test coverage - BlocklistImportWorkflow: Thin orchestrator * Coordinates the download → parse → ban → log flow * Pre-warms geo cache with newly banned IPs * 96% test coverage - blocklist_service.py: Maintains public API * Source CRUD (create, read, update, delete) * URL validation and preview functionality * Scheduling configuration and import triggers * 92% test coverage Benefits: * Each component is independently testable with mock dependencies * Error handling is explicit and localized * Components can evolve independently * Logging is contextual and clear * Retry and transient error handling are isolated Testing: * All 36 existing blocklist_service tests pass * All 13 blocklist import task tests pass * Added 17 comprehensive component unit tests * Combined 96%+ coverage on new modules * Zero type errors in new code Documentation: * Updated Refactoring.md with detailed architecture notes * Added component architecture diagram to Architekture.md * Documented ownership and responsibilities of each component Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
112
backend/app/services/blocklist_parser.py
Normal file
112
backend/app/services/blocklist_parser.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Blocklist parser and validator component.
|
||||
|
||||
Parses blocklist text content and validates individual entries as IP addresses
|
||||
or CIDR networks. Separates valid IPs from invalid/CIDR entries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import structlog
|
||||
|
||||
from app.utils.ip_utils import is_valid_ip, is_valid_network
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
|
||||
class ParsedBlocklist:
|
||||
"""Result of parsing a blocklist text."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
valid_ips: list[str],
|
||||
skipped_entries: int,
|
||||
) -> None:
|
||||
"""Initialize parsed result.
|
||||
|
||||
Args:
|
||||
valid_ips: List of valid individual IP addresses.
|
||||
skipped_entries: Count of skipped/invalid entries (comments, CIDRs, malformed).
|
||||
"""
|
||||
self.valid_ips = valid_ips
|
||||
self.skipped_entries = skipped_entries
|
||||
|
||||
@property
|
||||
def total_entries(self) -> int:
|
||||
"""Total number of entries processed."""
|
||||
return len(self.valid_ips) + self.skipped_entries
|
||||
|
||||
|
||||
class BlocklistParser:
|
||||
"""Parses and validates blocklist text content."""
|
||||
|
||||
@staticmethod
|
||||
def parse(content: str) -> ParsedBlocklist:
|
||||
"""Parse blocklist text and extract valid individual IP addresses.
|
||||
|
||||
Lines starting with '#' are treated as comments and skipped.
|
||||
Empty lines are skipped. CIDR ranges and malformed entries are skipped
|
||||
but counted. Only individual IPv4/IPv6 addresses are extracted.
|
||||
|
||||
Args:
|
||||
content: Raw blocklist text content.
|
||||
|
||||
Returns:
|
||||
:class:`ParsedBlocklist` with valid IPs and skip count.
|
||||
"""
|
||||
valid_ips: list[str] = []
|
||||
skipped = 0
|
||||
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
|
||||
# Skip empty lines and comments
|
||||
if not stripped or stripped.startswith("#"):
|
||||
continue
|
||||
|
||||
# Accept only individual IP addresses, skip CIDRs and malformed
|
||||
if is_valid_ip(stripped):
|
||||
valid_ips.append(stripped)
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
return ParsedBlocklist(valid_ips=valid_ips, skipped_entries=skipped)
|
||||
|
||||
@staticmethod
|
||||
def parse_with_stats(
|
||||
content: str,
|
||||
*,
|
||||
sample_lines: int = 20,
|
||||
) -> tuple[list[str], dict[str, int]]:
|
||||
"""Parse blocklist and return sample of valid IPs with statistics.
|
||||
|
||||
Used by preview functionality to show sample entries and counts.
|
||||
|
||||
Args:
|
||||
content: Raw blocklist text content.
|
||||
sample_lines: Maximum number of sample entries to return.
|
||||
|
||||
Returns:
|
||||
Tuple of (sample IPs list, stats dict with keys: total_lines,
|
||||
valid_count, skipped_count).
|
||||
"""
|
||||
lines = content.splitlines()
|
||||
entries: list[str] = []
|
||||
valid = 0
|
||||
skipped = 0
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("#"):
|
||||
continue
|
||||
if is_valid_ip(stripped) or is_valid_network(stripped):
|
||||
valid += 1
|
||||
if len(entries) < sample_lines:
|
||||
entries.append(stripped)
|
||||
else:
|
||||
skipped += 1
|
||||
|
||||
return entries, {
|
||||
"total_lines": len(lines),
|
||||
"valid_count": valid,
|
||||
"skipped_count": skipped,
|
||||
}
|
||||
Reference in New Issue
Block a user