"""Blocklist service. Manages blocklist source CRUD, URL preview, IP import (download → validate → ban via fail2ban), and schedule persistence. All ban operations target a dedicated fail2ban jail (default: ``"blocklist-import"``) so blocklist-origin bans are tracked separately from regular bans. If that jail does not exist or fail2ban is unreachable, the error is recorded in the import log and processing continues. Schedule configuration is stored as JSON in the application settings table under the key ``"blocklist_schedule"``. """ from __future__ import annotations import json from typing import TYPE_CHECKING, Any import structlog from app.models.blocklist import ( BlocklistSource, ImportRunResult, ImportSourceResult, PreviewResponse, ScheduleConfig, ScheduleInfo, ) from app.repositories import blocklist_repo, import_log_repo, settings_repo from app.utils.ip_utils import is_valid_ip, is_valid_network if TYPE_CHECKING: import aiohttp import aiosqlite log: structlog.stdlib.BoundLogger = structlog.get_logger() #: Settings key used to persist the schedule config. _SCHEDULE_SETTINGS_KEY: str = "blocklist_schedule" #: fail2ban jail name for blocklist-origin bans. BLOCKLIST_JAIL: str = "blocklist-import" #: Maximum number of sample entries returned by the preview endpoint. _PREVIEW_LINES: int = 20 #: Maximum bytes to download for a preview (first 64 KB). _PREVIEW_MAX_BYTES: int = 65536 # --------------------------------------------------------------------------- # Source CRUD helpers # --------------------------------------------------------------------------- def _row_to_source(row: dict[str, Any]) -> BlocklistSource: """Convert a repository row dict to a :class:`BlocklistSource`. Args: row: Dict with keys matching the ``blocklist_sources`` columns. Returns: A validated :class:`~app.models.blocklist.BlocklistSource` instance. """ return BlocklistSource.model_validate(row) async def list_sources(db: aiosqlite.Connection) -> list[BlocklistSource]: """Return all configured blocklist sources. Args: db: Active application database connection. Returns: List of :class:`~app.models.blocklist.BlocklistSource` instances. """ rows = await blocklist_repo.list_sources(db) return [_row_to_source(r) for r in rows] async def get_source( db: aiosqlite.Connection, source_id: int, ) -> BlocklistSource | None: """Return a single blocklist source, or ``None`` if not found. Args: db: Active application database connection. source_id: Primary key of the desired source. Returns: :class:`~app.models.blocklist.BlocklistSource` or ``None``. """ row = await blocklist_repo.get_source(db, source_id) return _row_to_source(row) if row is not None else None async def create_source( db: aiosqlite.Connection, name: str, url: str, *, enabled: bool = True, ) -> BlocklistSource: """Create a new blocklist source and return the persisted record. Args: db: Active application database connection. name: Human-readable display name. url: URL of the blocklist text file. enabled: Whether the source is active. Defaults to ``True``. Returns: The newly created :class:`~app.models.blocklist.BlocklistSource`. """ new_id = await blocklist_repo.create_source(db, name, url, enabled=enabled) source = await get_source(db, new_id) assert source is not None # noqa: S101 log.info("blocklist_source_created", id=new_id, name=name, url=url) return source async def update_source( db: aiosqlite.Connection, source_id: int, *, name: str | None = None, url: str | None = None, enabled: bool | None = None, ) -> BlocklistSource | None: """Update fields on a blocklist source. Args: db: Active application database connection. source_id: Primary key of the source to modify. name: New display name, or ``None`` to leave unchanged. url: New URL, or ``None`` to leave unchanged. enabled: New enabled state, or ``None`` to leave unchanged. Returns: Updated :class:`~app.models.blocklist.BlocklistSource`, or ``None`` if the source does not exist. """ updated = await blocklist_repo.update_source( db, source_id, name=name, url=url, enabled=enabled ) if not updated: return None source = await get_source(db, source_id) log.info("blocklist_source_updated", id=source_id) return source async def delete_source(db: aiosqlite.Connection, source_id: int) -> bool: """Delete a blocklist source. Args: db: Active application database connection. source_id: Primary key of the source to delete. Returns: ``True`` if the source was found and deleted, ``False`` otherwise. """ deleted = await blocklist_repo.delete_source(db, source_id) if deleted: log.info("blocklist_source_deleted", id=source_id) return deleted # --------------------------------------------------------------------------- # Preview # --------------------------------------------------------------------------- async def preview_source( url: str, http_session: aiohttp.ClientSession, *, sample_lines: int = _PREVIEW_LINES, ) -> PreviewResponse: """Download the beginning of a blocklist URL and return a preview. Args: url: URL to download. http_session: Shared :class:`aiohttp.ClientSession`. sample_lines: Maximum number of lines to include in the preview. Returns: :class:`~app.models.blocklist.PreviewResponse` with a sample of valid IP entries and validation statistics. Raises: ValueError: If the URL cannot be reached or returns a non-200 status. """ try: async with http_session.get(url, timeout=_aiohttp_timeout(10)) as resp: if resp.status != 200: raise ValueError(f"HTTP {resp.status} from {url}") raw = await resp.content.read(_PREVIEW_MAX_BYTES) except Exception as exc: log.warning("blocklist_preview_failed", url=url, error=str(exc)) raise ValueError(str(exc)) from exc lines = raw.decode(errors="replace").splitlines() entries: list[str] = [] valid = 0 skipped = 0 for line in lines: stripped = line.strip() if not stripped or stripped.startswith("#"): continue if is_valid_ip(stripped) or is_valid_network(stripped): valid += 1 if len(entries) < sample_lines: entries.append(stripped) else: skipped += 1 return PreviewResponse( entries=entries, total_lines=len(lines), valid_count=valid, skipped_count=skipped, ) # --------------------------------------------------------------------------- # Import # --------------------------------------------------------------------------- async def import_source( source: BlocklistSource, http_session: aiohttp.ClientSession, socket_path: str, db: aiosqlite.Connection, ) -> ImportSourceResult: """Download and apply bans from a single blocklist source. The function downloads the URL, validates each line as an IP address, and bans valid IPv4/IPv6 addresses via fail2ban in :data:`BLOCKLIST_JAIL`. CIDR ranges are counted as skipped since fail2ban requires individual addresses. Any error encountered during download is recorded and the result is returned without raising. After a successful import the geo cache is pre-warmed by batch-resolving all newly banned IPs. This ensures the dashboard and map show country data immediately after import rather than facing cold-cache lookups. Args: source: The :class:`~app.models.blocklist.BlocklistSource` to import. http_session: Shared :class:`aiohttp.ClientSession`. socket_path: Path to the fail2ban Unix socket. db: Application database for logging. Returns: :class:`~app.models.blocklist.ImportSourceResult` with counters. """ # --- Download --- try: async with http_session.get( source.url, timeout=_aiohttp_timeout(30) ) as resp: if resp.status != 200: error_msg = f"HTTP {resp.status}" await _log_result(db, source, 0, 0, error_msg) log.warning("blocklist_import_download_failed", url=source.url, status=resp.status) return ImportSourceResult( source_id=source.id, source_url=source.url, ips_imported=0, ips_skipped=0, error=error_msg, ) content = await resp.text(errors="replace") except Exception as exc: error_msg = str(exc) await _log_result(db, source, 0, 0, error_msg) log.warning("blocklist_import_download_error", url=source.url, error=error_msg) return ImportSourceResult( source_id=source.id, source_url=source.url, ips_imported=0, ips_skipped=0, error=error_msg, ) # --- Validate and ban --- imported = 0 skipped = 0 ban_error: str | None = None imported_ips: list[str] = [] # Import jail_service here to avoid circular import at module level. from app.services import jail_service # noqa: PLC0415 for line in content.splitlines(): stripped = line.strip() if not stripped or stripped.startswith("#"): continue if not is_valid_ip(stripped): # Skip CIDRs and malformed entries gracefully. skipped += 1 continue try: await jail_service.ban_ip(socket_path, BLOCKLIST_JAIL, stripped) imported += 1 imported_ips.append(stripped) except jail_service.JailNotFoundError as exc: # The target jail does not exist in fail2ban — there is no point # continuing because every subsequent ban would also fail. ban_error = str(exc) log.warning( "blocklist_jail_not_found", jail=BLOCKLIST_JAIL, error=str(exc), ) break except Exception as exc: skipped += 1 if ban_error is None: ban_error = str(exc) log.debug("blocklist_ban_failed", ip=stripped, error=str(exc)) await _log_result(db, source, imported, skipped, ban_error) log.info( "blocklist_source_imported", source_id=source.id, url=source.url, imported=imported, skipped=skipped, error=ban_error, ) # --- Pre-warm geo cache for newly imported IPs --- if imported_ips: from app.services import geo_service # noqa: PLC0415 uncached_ips: list[str] = [ ip for ip in imported_ips if not geo_service.is_cached(ip) ] skipped_geo: int = len(imported_ips) - len(uncached_ips) if skipped_geo > 0: log.info( "blocklist_geo_prewarm_cache_hit", source_id=source.id, skipped=skipped_geo, to_lookup=len(uncached_ips), ) if uncached_ips: try: await geo_service.lookup_batch(uncached_ips, http_session, db=db) log.info( "blocklist_geo_prewarm_complete", source_id=source.id, count=len(uncached_ips), ) except Exception as exc: # noqa: BLE001 log.warning( "blocklist_geo_prewarm_failed", source_id=source.id, error=str(exc), ) return ImportSourceResult( source_id=source.id, source_url=source.url, ips_imported=imported, ips_skipped=skipped, error=ban_error, ) async def import_all( db: aiosqlite.Connection, http_session: aiohttp.ClientSession, socket_path: str, ) -> ImportRunResult: """Import all enabled blocklist sources. Iterates over every source with ``enabled = True``, calls :func:`import_source` for each, and aggregates the results. Args: db: Application database connection. http_session: Shared :class:`aiohttp.ClientSession`. socket_path: fail2ban socket path. Returns: :class:`~app.models.blocklist.ImportRunResult` with aggregated counters and per-source results. """ sources = await blocklist_repo.list_enabled_sources(db) results: list[ImportSourceResult] = [] total_imported = 0 total_skipped = 0 errors_count = 0 for row in sources: source = _row_to_source(row) result = await import_source(source, http_session, socket_path, db) results.append(result) total_imported += result.ips_imported total_skipped += result.ips_skipped if result.error is not None: errors_count += 1 log.info( "blocklist_import_all_complete", sources=len(sources), total_imported=total_imported, total_skipped=total_skipped, errors=errors_count, ) return ImportRunResult( results=results, total_imported=total_imported, total_skipped=total_skipped, errors_count=errors_count, ) # --------------------------------------------------------------------------- # Schedule # --------------------------------------------------------------------------- _DEFAULT_SCHEDULE = ScheduleConfig() async def get_schedule(db: aiosqlite.Connection) -> ScheduleConfig: """Read the import schedule config from the settings table. Returns the default config (daily at 03:00 UTC) if no schedule has been saved yet. Args: db: Active application database connection. Returns: The stored (or default) :class:`~app.models.blocklist.ScheduleConfig`. """ raw = await settings_repo.get_setting(db, _SCHEDULE_SETTINGS_KEY) if raw is None: return _DEFAULT_SCHEDULE try: data = json.loads(raw) return ScheduleConfig.model_validate(data) except Exception: log.warning("blocklist_schedule_invalid", raw=raw) return _DEFAULT_SCHEDULE async def set_schedule( db: aiosqlite.Connection, config: ScheduleConfig, ) -> ScheduleConfig: """Persist a new schedule configuration. Args: db: Active application database connection. config: The :class:`~app.models.blocklist.ScheduleConfig` to store. Returns: The saved configuration (same object after validation). """ await settings_repo.set_setting( db, _SCHEDULE_SETTINGS_KEY, config.model_dump_json() ) log.info("blocklist_schedule_updated", frequency=config.frequency, hour=config.hour) return config async def get_schedule_info( db: aiosqlite.Connection, next_run_at: str | None, ) -> ScheduleInfo: """Return the schedule config together with last-run metadata. Args: db: Active application database connection. next_run_at: ISO 8601 string of the next scheduled run, or ``None`` if not yet scheduled (provided by the caller from APScheduler). Returns: :class:`~app.models.blocklist.ScheduleInfo` combining config and runtime metadata. """ config = await get_schedule(db) last_log = await import_log_repo.get_last_log(db) last_run_at = last_log["timestamp"] if last_log else None last_run_errors: bool | None = (last_log["errors"] is not None) if last_log else None return ScheduleInfo( config=config, next_run_at=next_run_at, last_run_at=last_run_at, last_run_errors=last_run_errors, ) # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _aiohttp_timeout(seconds: float) -> Any: """Return an :class:`aiohttp.ClientTimeout` with the given total timeout. Args: seconds: Total timeout in seconds. Returns: An :class:`aiohttp.ClientTimeout` instance. """ import aiohttp # noqa: PLC0415 return aiohttp.ClientTimeout(total=seconds) async def _log_result( db: aiosqlite.Connection, source: BlocklistSource, ips_imported: int, ips_skipped: int, error: str | None, ) -> None: """Write an import log entry for a completed source import. Args: db: Application database connection. source: The source that was imported. ips_imported: Count of successfully banned IPs. ips_skipped: Count of skipped/invalid entries. error: Error string, or ``None`` on success. """ await import_log_repo.add_log( db, source_id=source.id, source_url=source.url, ips_imported=ips_imported, ips_skipped=ips_skipped, errors=error, )