Add explicit database transaction isolation to multi-step operations
This commit addresses race conditions in multi-step database operations by: 1. Wrap write operations in BEGIN IMMEDIATE ... COMMIT transactions: - import_run_repo: create_pending, mark_completed, mark_failed - geo_cache_repo: all upsert_*_and_commit functions - geo_cache_repo: bulk_upsert_entries_and_neg_entries_and_commit 2. Handle concurrent write collisions gracefully: - import_run_repo.create_pending can now raise IntegrityError - blocklist_import_workflow catches IntegrityError and retries lookup - Logs 'blocklist_import_lost_race' event when another request wins the race 3. Add comprehensive documentation: - Backend-Development.md § 6.3 Database Transactions - Explains when to use BEGIN IMMEDIATE - Shows transaction pattern with try-except-rollback - Documents race condition error handling pattern The solution leverages SQLite's UNIQUE constraint for data integrity while handling the concurrent case gracefully in application logic. This is more efficient than using BEGIN EXCLUSIVE which would serialize all writers. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -15,6 +15,7 @@ import hashlib
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import aiohttp
|
||||
import aiosqlite
|
||||
import structlog
|
||||
|
||||
from app.models.blocklist import BlocklistSource, ImportSourceResult
|
||||
@@ -26,8 +27,6 @@ from app.services.blocklist_parser import BlocklistParser
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Awaitable, Callable
|
||||
|
||||
import aiosqlite
|
||||
|
||||
from app.services.geo_cache import GeoCache
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
@@ -191,17 +190,41 @@ class BlocklistImportWorkflow:
|
||||
|
||||
# --- Create or update pending import run entry ---
|
||||
if existing_run is None:
|
||||
run_id = await import_run_repo.create_pending(
|
||||
db,
|
||||
source.id,
|
||||
content_hash,
|
||||
)
|
||||
log.info(
|
||||
"blocklist_import_tracking_created",
|
||||
source_id=source.id,
|
||||
run_id=run_id,
|
||||
content_hash=content_hash[:8],
|
||||
)
|
||||
try:
|
||||
run_id = await import_run_repo.create_pending(
|
||||
db,
|
||||
source.id,
|
||||
content_hash,
|
||||
)
|
||||
log.info(
|
||||
"blocklist_import_tracking_created",
|
||||
source_id=source.id,
|
||||
run_id=run_id,
|
||||
content_hash=content_hash[:8],
|
||||
)
|
||||
except aiosqlite.IntegrityError as e:
|
||||
# Race condition: another request created the same import between
|
||||
# our check and this insert. Fetch the existing run and use its ID.
|
||||
existing_run = await import_run_repo.get_by_source_and_hash(
|
||||
db,
|
||||
source.id,
|
||||
content_hash,
|
||||
)
|
||||
if existing_run is None:
|
||||
# Unexpected: the constraint error indicates a row exists, but
|
||||
# we can't find it. This should not happen in normal operation.
|
||||
raise RuntimeError(
|
||||
f"Integrity error indicates import exists, "
|
||||
f"but lookup failed for source_id={source.id}, "
|
||||
f"content_hash={content_hash[:8]}"
|
||||
) from e
|
||||
run_id = existing_run.id
|
||||
log.info(
|
||||
"blocklist_import_lost_race",
|
||||
source_id=source.id,
|
||||
run_id=run_id,
|
||||
content_hash=content_hash[:8],
|
||||
)
|
||||
else:
|
||||
# Retry case: existing run is pending or failed, try again
|
||||
run_id = existing_run.id
|
||||
|
||||
Reference in New Issue
Block a user