Make background tasks idempotent - prevent duplicate bans on retry
CRITICAL FIX: Background tasks (especially blocklist_import) crashed mid-execution, leaving partial state. On retry, the same bans were applied again, causing duplicates. Solution: Content-hash based operation tracking for blocklist imports: - Added import_runs table (migration 6) to track operations by source + content hash - Before banning, check if this exact content has already been imported - If completed: skip banning (already done), optionally re-warm cache - If new or failed: proceed with ban and mark as completed or failed Changes: - Database: Migration 6 adds import_runs table with operation state tracking - Model: Added ImportRunEntry for import run records - Repository: New import_run_repo module with CRUD operations - Workflow: Updated blocklist_import_workflow to check operation history before banning - Dependencies: Registered import_run_repo for dependency injection - Tests: Added test_import_source_idempotent_on_retry and test_import_source_different_content_not_reused - Documentation: Added Task Idempotency section to Backend-Development.md Verification: - All 7 import tests pass (5 existing + 2 new idempotency tests) - Type checking: mypy --strict ✅ - Linting: ruff ✅ - No API changes, backwards compatible via automatic migration Fixes: Background tasks not idempotent #CRITICAL Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -107,7 +107,7 @@ _SCHEMA_STATEMENTS: list[str] = [
|
||||
_CREATE_HISTORY_ARCHIVE,
|
||||
]
|
||||
|
||||
_CURRENT_SCHEMA_VERSION: int = 5
|
||||
_CURRENT_SCHEMA_VERSION: int = 6
|
||||
|
||||
_MIGRATIONS: dict[int, str] = {
|
||||
1: "\n".join(_SCHEMA_STATEMENTS),
|
||||
@@ -166,6 +166,27 @@ CREATE INDEX IF NOT EXISTS idx_history_archive_ip
|
||||
-- Index for action-based queries: supports ban/unban filtering.
|
||||
CREATE INDEX IF NOT EXISTS idx_history_archive_action
|
||||
ON history_archive (action);
|
||||
""",
|
||||
6: """
|
||||
-- Migration 6: Add import_runs table for tracking blocklist import idempotency.
|
||||
-- Tracks unique imports by source and content hash to enable idempotent retries.
|
||||
-- On import crash, retry will detect the operation_id and skip duplicate bans.
|
||||
-- This prevents duplicate IP bans if the scheduler retries after a failure.
|
||||
CREATE TABLE IF NOT EXISTS import_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_id INTEGER NOT NULL REFERENCES blocklist_sources(id) ON DELETE CASCADE,
|
||||
content_hash TEXT NOT NULL,
|
||||
status TEXT NOT NULL CHECK(status IN ('pending', 'completed', 'failed')),
|
||||
imported_count INTEGER NOT NULL DEFAULT 0,
|
||||
skipped_count INTEGER NOT NULL DEFAULT 0,
|
||||
error_message TEXT,
|
||||
created_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%fZ', 'now')),
|
||||
UNIQUE(source_id, content_hash)
|
||||
);
|
||||
-- Index for looking up completed imports by source
|
||||
CREATE INDEX IF NOT EXISTS idx_import_runs_source_status
|
||||
ON import_runs (source_id, status);
|
||||
""",
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user