CRITICAL FIX: Background tasks (especially blocklist_import) crashed mid-execution, leaving partial state. On retry, the same bans were applied again, causing duplicates. Solution: Content-hash based operation tracking for blocklist imports: - Added import_runs table (migration 6) to track operations by source + content hash - Before banning, check if this exact content has already been imported - If completed: skip banning (already done), optionally re-warm cache - If new or failed: proceed with ban and mark as completed or failed Changes: - Database: Migration 6 adds import_runs table with operation state tracking - Model: Added ImportRunEntry for import run records - Repository: New import_run_repo module with CRUD operations - Workflow: Updated blocklist_import_workflow to check operation history before banning - Dependencies: Registered import_run_repo for dependency injection - Tests: Added test_import_source_idempotent_on_retry and test_import_source_different_content_not_reused - Documentation: Added Task Idempotency section to Backend-Development.md Verification: - All 7 import tests pass (5 existing + 2 new idempotency tests) - Type checking: mypy --strict ✅ - Linting: ruff ✅ - No API changes, backwards compatible via automatic migration Fixes: Background tasks not idempotent #CRITICAL Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
179 lines
5.7 KiB
Python
179 lines
5.7 KiB
Python
"""Blocklist source and import log Pydantic models.
|
|
|
|
Data shapes for blocklist source management, import operations, scheduling,
|
|
and import log retrieval.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from enum import StrEnum
|
|
|
|
from pydantic import AnyHttpUrl, Field
|
|
|
|
from app.models.response import BanGuiBaseModel, PaginatedListResponse
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Blocklist source
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class BlocklistSource(BanGuiBaseModel):
|
|
"""Domain model for a blocklist source definition."""
|
|
|
|
id: int
|
|
name: str
|
|
url: str
|
|
enabled: bool
|
|
created_at: str
|
|
updated_at: str
|
|
|
|
class BlocklistSourceCreate(BanGuiBaseModel):
|
|
"""Payload for ``POST /api/blocklists``.
|
|
|
|
URL must use http/https scheme. The hostname must resolve to a public IP
|
|
(not private, loopback, link-local, or reserved). Validation happens
|
|
asynchronously in the service layer.
|
|
"""
|
|
|
|
name: str = Field(..., min_length=1, max_length=100, description="Human-readable source name.")
|
|
url: AnyHttpUrl = Field(..., description="URL of the blocklist file (http/https only).")
|
|
enabled: bool = Field(default=True)
|
|
|
|
class BlocklistSourceUpdate(BanGuiBaseModel):
|
|
"""Payload for ``PUT /api/blocklists/{id}``. All fields are optional.
|
|
|
|
If URL is provided, it must use http/https scheme.
|
|
"""
|
|
|
|
name: str | None = Field(default=None, min_length=1, max_length=100)
|
|
url: AnyHttpUrl | None = Field(default=None)
|
|
enabled: bool | None = Field(default=None)
|
|
|
|
class BlocklistListResponse(BanGuiBaseModel):
|
|
"""Response for ``GET /api/blocklists``."""
|
|
|
|
sources: list[BlocklistSource] = Field(default_factory=list)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Import log
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ImportLogEntry(BanGuiBaseModel):
|
|
"""A single blocklist import run record."""
|
|
|
|
id: int
|
|
source_id: int | None
|
|
source_url: str
|
|
timestamp: str
|
|
ips_imported: int
|
|
ips_skipped: int
|
|
errors: str | None
|
|
|
|
class ImportLogListResponse(PaginatedListResponse[ImportLogEntry]):
|
|
"""Response for ``GET /api/blocklists/log``.
|
|
|
|
Paginated list of all blocklist import runs with timestamps, source info,
|
|
and per-source import/skip counts.
|
|
"""
|
|
|
|
pass
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Import run tracking (for idempotency)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ImportRunEntry(BanGuiBaseModel):
|
|
"""Tracks a unique blocklist import run by source and content hash.
|
|
|
|
Used to detect re-runs and prevent duplicate bans when the scheduler
|
|
retries after a crash.
|
|
"""
|
|
|
|
id: int
|
|
source_id: int
|
|
content_hash: str
|
|
status: str # 'pending' | 'completed' | 'failed'
|
|
imported_count: int
|
|
skipped_count: int
|
|
error_message: str | None
|
|
created_at: str
|
|
updated_at: str
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Schedule
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ScheduleFrequency(StrEnum):
|
|
"""Available import schedule frequency presets."""
|
|
|
|
hourly = "hourly"
|
|
daily = "daily"
|
|
weekly = "weekly"
|
|
|
|
class ScheduleConfig(BanGuiBaseModel):
|
|
"""Import schedule configuration.
|
|
|
|
The interpretation of fields depends on *frequency*:
|
|
|
|
- ``hourly``: ``interval_hours`` controls how often (every N hours).
|
|
- ``daily``: ``hour`` and ``minute`` specify the daily run time (UTC).
|
|
- ``weekly``: additionally uses ``day_of_week`` (0=Monday … 6=Sunday).
|
|
"""
|
|
|
|
# No strict=True here: FastAPI and json.loads() both supply enum values as
|
|
# plain strings; strict mode would reject string→enum coercion.
|
|
|
|
frequency: ScheduleFrequency = ScheduleFrequency.daily
|
|
interval_hours: int = Field(default=24, ge=1, le=168, description="Used when frequency=hourly")
|
|
hour: int = Field(default=3, ge=0, le=23, description="UTC hour for daily/weekly runs")
|
|
minute: int = Field(default=0, ge=0, le=59, description="Minute for daily/weekly runs")
|
|
day_of_week: int = Field(
|
|
default=0,
|
|
ge=0,
|
|
le=6,
|
|
description="Day of week for weekly runs (0=Monday … 6=Sunday)",
|
|
)
|
|
|
|
class ScheduleInfo(BanGuiBaseModel):
|
|
"""Current schedule configuration together with runtime metadata."""
|
|
|
|
config: ScheduleConfig
|
|
next_run_at: str | None
|
|
last_run_at: str | None
|
|
last_run_errors: bool | None = None
|
|
"""``True`` if the most recent import had errors, ``False`` if clean, ``None`` if never run."""
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Import results
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class ImportSourceResult(BanGuiBaseModel):
|
|
"""Result of importing a single blocklist source."""
|
|
|
|
source_id: int | None
|
|
source_url: str
|
|
ips_imported: int
|
|
ips_skipped: int
|
|
error: str | None
|
|
|
|
class ImportRunResult(BanGuiBaseModel):
|
|
"""Aggregated result from a full import run across all enabled sources."""
|
|
|
|
results: list[ImportSourceResult] = Field(default_factory=list)
|
|
total_imported: int
|
|
total_skipped: int
|
|
errors_count: int
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Preview
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class PreviewResponse(BanGuiBaseModel):
|
|
"""Response for ``GET /api/blocklists/{id}/preview``."""
|
|
|
|
entries: list[str] = Field(default_factory=list, description="Sample of valid IP entries")
|
|
total_lines: int
|
|
valid_count: int
|
|
skipped_count: int
|