Files
BanGUI/backend/app/models/blocklist.py
Lukas 52f237d5d4 Make background tasks idempotent - prevent duplicate bans on retry
CRITICAL FIX: Background tasks (especially blocklist_import) crashed mid-execution,
leaving partial state. On retry, the same bans were applied again, causing duplicates.

Solution: Content-hash based operation tracking for blocklist imports:
- Added import_runs table (migration 6) to track operations by source + content hash
- Before banning, check if this exact content has already been imported
- If completed: skip banning (already done), optionally re-warm cache
- If new or failed: proceed with ban and mark as completed or failed

Changes:
- Database: Migration 6 adds import_runs table with operation state tracking
- Model: Added ImportRunEntry for import run records
- Repository: New import_run_repo module with CRUD operations
- Workflow: Updated blocklist_import_workflow to check operation history before banning
- Dependencies: Registered import_run_repo for dependency injection
- Tests: Added test_import_source_idempotent_on_retry and test_import_source_different_content_not_reused
- Documentation: Added Task Idempotency section to Backend-Development.md

Verification:
- All 7 import tests pass (5 existing + 2 new idempotency tests)
- Type checking: mypy --strict 
- Linting: ruff 
- No API changes, backwards compatible via automatic migration

Fixes: Background tasks not idempotent #CRITICAL

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-30 21:54:14 +02:00

179 lines
5.7 KiB
Python

"""Blocklist source and import log Pydantic models.
Data shapes for blocklist source management, import operations, scheduling,
and import log retrieval.
"""
from __future__ import annotations
from enum import StrEnum
from pydantic import AnyHttpUrl, Field
from app.models.response import BanGuiBaseModel, PaginatedListResponse
# ---------------------------------------------------------------------------
# Blocklist source
# ---------------------------------------------------------------------------
class BlocklistSource(BanGuiBaseModel):
"""Domain model for a blocklist source definition."""
id: int
name: str
url: str
enabled: bool
created_at: str
updated_at: str
class BlocklistSourceCreate(BanGuiBaseModel):
"""Payload for ``POST /api/blocklists``.
URL must use http/https scheme. The hostname must resolve to a public IP
(not private, loopback, link-local, or reserved). Validation happens
asynchronously in the service layer.
"""
name: str = Field(..., min_length=1, max_length=100, description="Human-readable source name.")
url: AnyHttpUrl = Field(..., description="URL of the blocklist file (http/https only).")
enabled: bool = Field(default=True)
class BlocklistSourceUpdate(BanGuiBaseModel):
"""Payload for ``PUT /api/blocklists/{id}``. All fields are optional.
If URL is provided, it must use http/https scheme.
"""
name: str | None = Field(default=None, min_length=1, max_length=100)
url: AnyHttpUrl | None = Field(default=None)
enabled: bool | None = Field(default=None)
class BlocklistListResponse(BanGuiBaseModel):
"""Response for ``GET /api/blocklists``."""
sources: list[BlocklistSource] = Field(default_factory=list)
# ---------------------------------------------------------------------------
# Import log
# ---------------------------------------------------------------------------
class ImportLogEntry(BanGuiBaseModel):
"""A single blocklist import run record."""
id: int
source_id: int | None
source_url: str
timestamp: str
ips_imported: int
ips_skipped: int
errors: str | None
class ImportLogListResponse(PaginatedListResponse[ImportLogEntry]):
"""Response for ``GET /api/blocklists/log``.
Paginated list of all blocklist import runs with timestamps, source info,
and per-source import/skip counts.
"""
pass
# ---------------------------------------------------------------------------
# Import run tracking (for idempotency)
# ---------------------------------------------------------------------------
class ImportRunEntry(BanGuiBaseModel):
"""Tracks a unique blocklist import run by source and content hash.
Used to detect re-runs and prevent duplicate bans when the scheduler
retries after a crash.
"""
id: int
source_id: int
content_hash: str
status: str # 'pending' | 'completed' | 'failed'
imported_count: int
skipped_count: int
error_message: str | None
created_at: str
updated_at: str
# ---------------------------------------------------------------------------
# Schedule
# ---------------------------------------------------------------------------
class ScheduleFrequency(StrEnum):
"""Available import schedule frequency presets."""
hourly = "hourly"
daily = "daily"
weekly = "weekly"
class ScheduleConfig(BanGuiBaseModel):
"""Import schedule configuration.
The interpretation of fields depends on *frequency*:
- ``hourly``: ``interval_hours`` controls how often (every N hours).
- ``daily``: ``hour`` and ``minute`` specify the daily run time (UTC).
- ``weekly``: additionally uses ``day_of_week`` (0=Monday … 6=Sunday).
"""
# No strict=True here: FastAPI and json.loads() both supply enum values as
# plain strings; strict mode would reject string→enum coercion.
frequency: ScheduleFrequency = ScheduleFrequency.daily
interval_hours: int = Field(default=24, ge=1, le=168, description="Used when frequency=hourly")
hour: int = Field(default=3, ge=0, le=23, description="UTC hour for daily/weekly runs")
minute: int = Field(default=0, ge=0, le=59, description="Minute for daily/weekly runs")
day_of_week: int = Field(
default=0,
ge=0,
le=6,
description="Day of week for weekly runs (0=Monday … 6=Sunday)",
)
class ScheduleInfo(BanGuiBaseModel):
"""Current schedule configuration together with runtime metadata."""
config: ScheduleConfig
next_run_at: str | None
last_run_at: str | None
last_run_errors: bool | None = None
"""``True`` if the most recent import had errors, ``False`` if clean, ``None`` if never run."""
# ---------------------------------------------------------------------------
# Import results
# ---------------------------------------------------------------------------
class ImportSourceResult(BanGuiBaseModel):
"""Result of importing a single blocklist source."""
source_id: int | None
source_url: str
ips_imported: int
ips_skipped: int
error: str | None
class ImportRunResult(BanGuiBaseModel):
"""Aggregated result from a full import run across all enabled sources."""
results: list[ImportSourceResult] = Field(default_factory=list)
total_imported: int
total_skipped: int
errors_count: int
# ---------------------------------------------------------------------------
# Preview
# ---------------------------------------------------------------------------
class PreviewResponse(BanGuiBaseModel):
"""Response for ``GET /api/blocklists/{id}/preview``."""
entries: list[str] = Field(default_factory=list, description="Sample of valid IP entries")
total_lines: int
valid_count: int
skipped_count: int