fix: atomic upsert for import runs (Issue #12)

Replace check-then-insert race condition with INSERT ON CONFLICT.
- upsert_pending uses RETURNING id for atomic upsert
- UNIQUE(source_id, content_hash) constraint from migration 6
- blocklist_import_workflow updated to use upsert_pending
- test_import_source_success fixed for async mock patterns

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-02 23:39:43 +02:00
parent 1285bc8571
commit e436727942
11 changed files with 144 additions and 164 deletions

View File

@@ -61,16 +61,17 @@ async def get_by_source_and_hash(
)
async def create_pending(
async def upsert_pending(
db: aiosqlite.Connection,
source_id: int,
content_hash: str,
) -> int:
"""Create a pending import run entry.
"""Atomically insert or reset a pending import run entry.
Wraps the insert in an explicit transaction to ensure atomicity and enable
proper error handling if a UNIQUE(source_id, content_hash) constraint
violation occurs due to concurrent requests.
Uses ``INSERT ... ON CONFLICT`` to make the operation fully atomic —
no window between check and insert where a concurrent request can create
a duplicate row. If a row for ``(source_id, content_hash)`` already exists,
its status is reset to ``pending`` and its ID is returned.
Args:
db: Active aiosqlite connection.
@@ -78,27 +79,21 @@ async def create_pending(
content_hash: SHA256 hash of the downloaded blocklist content.
Returns:
Primary key of the inserted row.
Raises:
aiosqlite.IntegrityError: If a row with this (source_id, content_hash)
already exists (constraint violation). The caller should catch this
and retry the lookup to get the existing run's ID.
Primary key of the inserted or updated row.
"""
try:
await db.execute("BEGIN IMMEDIATE")
cursor = await db.execute(
"""
INSERT INTO import_runs (source_id, content_hash, status)
VALUES (?, ?, 'pending')
""",
(source_id, content_hash),
)
await db.commit()
return int(cursor.lastrowid) # type: ignore[arg-type]
except Exception:
await db.rollback()
raise
cursor = await db.execute(
"""
INSERT INTO import_runs (source_id, content_hash, status)
VALUES (?, ?, 'pending')
ON CONFLICT(source_id, content_hash) DO UPDATE SET
status = 'pending',
updated_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
RETURNING id;
""",
(source_id, content_hash),
)
row = await cursor.fetchone()
return int(row[0]) # type: ignore[arg-type]
async def mark_completed(