Files
BanGUI/backend/app/repositories/import_run_repo.py
Lukas e436727942 fix: atomic upsert for import runs (Issue #12)
Replace check-then-insert race condition with INSERT ON CONFLICT.
- upsert_pending uses RETURNING id for atomic upsert
- UNIQUE(source_id, content_hash) constraint from migration 6
- blocklist_import_workflow updated to use upsert_pending
- test_import_source_success fixed for async mock patterns

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-02 23:39:43 +02:00

164 lines
4.5 KiB
Python

"""Import run repository for blocklist import idempotency tracking.
Persists and queries import run records in the ``import_runs`` table.
Enables detection of duplicate import attempts and prevents re-running bans
on scheduler retry after a crash.
All methods are plain async functions that accept an :class:`aiosqlite.Connection`.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import aiosqlite
from app.models.blocklist import ImportRunEntry
async def get_by_source_and_hash(
db: aiosqlite.Connection,
source_id: int,
content_hash: str,
) -> ImportRunEntry | None:
"""Check if a specific import (by source and content hash) already exists.
Args:
db: Active aiosqlite connection.
source_id: FK to ``blocklist_sources.id``.
content_hash: SHA256 hash of the downloaded blocklist content.
Returns:
ImportRunEntry if found, None otherwise.
"""
async with db.execute(
"""
SELECT
id, source_id, content_hash, status,
imported_count, skipped_count, error_message,
created_at, updated_at
FROM import_runs
WHERE source_id = ? AND content_hash = ?
""",
(source_id, content_hash),
) as cursor:
row = await cursor.fetchone()
if not row:
return None
return ImportRunEntry(
id=row[0],
source_id=row[1],
content_hash=row[2],
status=row[3],
imported_count=row[4],
skipped_count=row[5],
error_message=row[6],
created_at=row[7],
updated_at=row[8],
)
async def upsert_pending(
db: aiosqlite.Connection,
source_id: int,
content_hash: str,
) -> int:
"""Atomically insert or reset a pending import run entry.
Uses ``INSERT ... ON CONFLICT`` to make the operation fully atomic —
no window between check and insert where a concurrent request can create
a duplicate row. If a row for ``(source_id, content_hash)`` already exists,
its status is reset to ``pending`` and its ID is returned.
Args:
db: Active aiosqlite connection.
source_id: FK to ``blocklist_sources.id``.
content_hash: SHA256 hash of the downloaded blocklist content.
Returns:
Primary key of the inserted or updated row.
"""
cursor = await db.execute(
"""
INSERT INTO import_runs (source_id, content_hash, status)
VALUES (?, ?, 'pending')
ON CONFLICT(source_id, content_hash) DO UPDATE SET
status = 'pending',
updated_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
RETURNING id;
""",
(source_id, content_hash),
)
row = await cursor.fetchone()
return int(row[0]) # type: ignore[arg-type]
async def mark_completed(
db: aiosqlite.Connection,
run_id: int,
imported_count: int,
skipped_count: int,
) -> None:
"""Mark an import run as completed with final counts.
Wraps the update in an explicit transaction to ensure atomicity.
Args:
db: Active aiosqlite connection.
run_id: Primary key of the import run.
imported_count: Number of IPs successfully banned.
skipped_count: Number of entries skipped (invalid or CIDR).
"""
try:
await db.execute("BEGIN IMMEDIATE")
await db.execute(
"""
UPDATE import_runs
SET status = 'completed',
imported_count = ?,
skipped_count = ?,
updated_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
WHERE id = ?
""",
(imported_count, skipped_count, run_id),
)
await db.commit()
except Exception:
await db.rollback()
raise
async def mark_failed(
db: aiosqlite.Connection,
run_id: int,
error_message: str,
) -> None:
"""Mark an import run as failed with error details.
Wraps the update in an explicit transaction to ensure atomicity.
Args:
db: Active aiosqlite connection.
run_id: Primary key of the import run.
error_message: Error description.
"""
try:
await db.execute("BEGIN IMMEDIATE")
await db.execute(
"""
UPDATE import_runs
SET status = 'failed',
error_message = ?,
updated_at = strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
WHERE id = ?
""",
(error_message, run_id),
)
await db.commit()
except Exception:
await db.rollback()
raise