refactoring-backend #3
@@ -1196,6 +1196,99 @@ async def test_migration_2_is_atomic(tmp_path: Path) -> None:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 6.3 Database Transactions
|
||||||
|
|
||||||
|
Database transactions ensure atomicity for multi-step operations and prevent race conditions when concurrent requests interact with the database. BanGUI uses **SQLite with WAL (Write-Ahead Logging)** mode, which enables concurrent readers but serializes writers.
|
||||||
|
|
||||||
|
### When to Use Explicit Transactions
|
||||||
|
|
||||||
|
**Use `BEGIN IMMEDIATE ... COMMIT` for:**
|
||||||
|
|
||||||
|
1. **Multi-step logical operations** — Operations that should succeed or fail as a unit. Example:
|
||||||
|
```python
|
||||||
|
# Bad — two separate operations, race condition window exists
|
||||||
|
await db.execute("INSERT INTO sessions ...")
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
# Good — atomic single operation, no need for explicit transaction
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Operations that combine multiple queries with shared state** — When the operation outcome depends on reading and then writing based on that read:
|
||||||
|
```python
|
||||||
|
# Bad — race condition: another request checks between our read and write
|
||||||
|
existing_run = await import_run_repo.get_by_source_and_hash(db, source_id, content_hash)
|
||||||
|
if existing_run is None:
|
||||||
|
run_id = await import_run_repo.create_pending(db, source_id, content_hash)
|
||||||
|
|
||||||
|
# Good — atomic: both operations within same transaction boundary
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
|
cursor = await db.execute("INSERT INTO import_runs ...")
|
||||||
|
await db.commit()
|
||||||
|
except aiosqlite.IntegrityError:
|
||||||
|
# Another request won the race; fetch the existing record
|
||||||
|
existing = await import_run_repo.get_by_source_and_hash(...)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Bulk operations that should be all-or-nothing** — For example, upserting positive and negative geo cache entries:
|
||||||
|
```python
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
|
await bulk_upsert_entries(db, positive_rows)
|
||||||
|
await bulk_upsert_neg_entries(db, negative_ips)
|
||||||
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
```
|
||||||
|
|
||||||
|
**Do NOT use explicit transactions for:**
|
||||||
|
|
||||||
|
- Single SQL statements — SQLite guarantees atomic writes for individual statements. No explicit transaction needed.
|
||||||
|
- Read-only queries — Queries do not modify data and do not need transaction boundaries.
|
||||||
|
|
||||||
|
### Transaction Pattern
|
||||||
|
|
||||||
|
Always use this pattern for wrapped operations:
|
||||||
|
|
||||||
|
```python
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
|
# ... perform all operations ...
|
||||||
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
```
|
||||||
|
|
||||||
|
- **`BEGIN IMMEDIATE`** — Acquires a write lock immediately, preventing other writers from entering the transaction window. This is critical for crash-safety and consistency.
|
||||||
|
- **`COMMIT`** — Persists all changes.
|
||||||
|
- **`ROLLBACK`** — Rolls back on any exception, ensuring the database is left in a consistent state.
|
||||||
|
|
||||||
|
### Handling Race Condition Errors
|
||||||
|
|
||||||
|
When a `UNIQUE` constraint violation occurs due to a race condition (two concurrent requests attempt the same insert), the database raises `aiosqlite.IntegrityError`. **Handle this at the call site** by retrying the lookup:
|
||||||
|
|
||||||
|
```python
|
||||||
|
try:
|
||||||
|
run_id = await import_run_repo.create_pending(db, source_id, content_hash)
|
||||||
|
except aiosqlite.IntegrityError:
|
||||||
|
# Another concurrent request created it first
|
||||||
|
existing = await import_run_repo.get_by_source_and_hash(db, source_id, content_hash)
|
||||||
|
if existing is None:
|
||||||
|
raise RuntimeError("Constraint error indicates row exists but lookup failed")
|
||||||
|
run_id = existing.id
|
||||||
|
log.info("lost_race", run_id=run_id)
|
||||||
|
```
|
||||||
|
|
||||||
|
This approach:
|
||||||
|
1. Lets the database constraint prevent data corruption.
|
||||||
|
2. Gracefully handles the concurrent case in application logic.
|
||||||
|
3. Avoids unnecessary locking overhead for the common case (no concurrent writers).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## 7. Structured Logging Policy
|
## 7. Structured Logging Policy
|
||||||
|
|
||||||
All logging in BanGUI services and tasks must use **structlog** for consistent, queryable event tracking. This policy defines when and how to log at each level.
|
All logging in BanGUI services and tasks must use **structlog** for consistent, queryable event tracking. This policy defines when and how to log at each level.
|
||||||
|
|||||||
@@ -1,48 +1,3 @@
|
|||||||
## [CRITICAL] Health check endpoint returns wrong status code
|
|
||||||
|
|
||||||
**Where found**
|
|
||||||
|
|
||||||
- `backend/app/routers/health.py` — always returns 200, even when fail2ban offline
|
|
||||||
|
|
||||||
**Why this is needed**
|
|
||||||
|
|
||||||
Docker health checks interpret 200 as "healthy". If fail2ban offline but backend returns 200, Docker thinks container healthy and doesn't restart it.
|
|
||||||
|
|
||||||
**Goal**
|
|
||||||
|
|
||||||
Return 503 Service Unavailable when fail2ban is offline.
|
|
||||||
|
|
||||||
**What to do**
|
|
||||||
|
|
||||||
1. Change health endpoint to return 503 when offline:
|
|
||||||
```python
|
|
||||||
if not server_status.online:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code=503,
|
|
||||||
content={"status": "unavailable", "fail2ban": "offline"}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Update Docker health check to expect 503 as "unhealthy"
|
|
||||||
|
|
||||||
**Possible traps and issues**
|
|
||||||
|
|
||||||
- Returning 503 causes orchestration tools to restart container
|
|
||||||
- If fail2ban restarts frequently, health check becomes flaky
|
|
||||||
- Consider gradual degradation
|
|
||||||
|
|
||||||
**Docs changes needed**
|
|
||||||
|
|
||||||
- Update `Docker/Dockerfile.backend` health check documentation
|
|
||||||
- Update `Docs/Deployment.md` § Health Checks
|
|
||||||
|
|
||||||
**Doc references**
|
|
||||||
|
|
||||||
- `backend/app/routers/health.py`
|
|
||||||
- `Docker/Dockerfile.backend`
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## [IMPORTANT] Database transactions lack explicit isolation
|
## [IMPORTANT] Database transactions lack explicit isolation
|
||||||
|
|
||||||
**Where found**
|
**Where found**
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ if TYPE_CHECKING:
|
|||||||
|
|
||||||
from app.models.geo import GeoCacheEntry
|
from app.models.geo import GeoCacheEntry
|
||||||
|
|
||||||
|
|
||||||
# Alias for backward compatibility with protocols
|
# Alias for backward compatibility with protocols
|
||||||
GeoCacheRow = GeoCacheEntry
|
GeoCacheRow = GeoCacheEntry
|
||||||
|
|
||||||
@@ -109,9 +108,17 @@ async def upsert_entry_and_commit(
|
|||||||
asn: str | None,
|
asn: str | None,
|
||||||
org: str | None,
|
org: str | None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Insert or update a resolved geo cache entry and commit."""
|
"""Insert or update a resolved geo cache entry and commit.
|
||||||
|
|
||||||
|
Wraps the upsert in an explicit transaction to ensure atomicity.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
await upsert_entry(db, ip, country_code, country_name, asn, org)
|
await upsert_entry(db, ip, country_code, country_name, asn, org)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def upsert_neg_entry(db: aiosqlite.Connection, ip: str) -> None:
|
async def upsert_neg_entry(db: aiosqlite.Connection, ip: str) -> None:
|
||||||
@@ -127,9 +134,17 @@ async def upsert_neg_entry(db: aiosqlite.Connection, ip: str) -> None:
|
|||||||
|
|
||||||
|
|
||||||
async def upsert_neg_entry_and_commit(db: aiosqlite.Connection, ip: str) -> None:
|
async def upsert_neg_entry_and_commit(db: aiosqlite.Connection, ip: str) -> None:
|
||||||
"""Record a failed lookup attempt and commit the transaction."""
|
"""Record a failed lookup attempt and commit the transaction.
|
||||||
|
|
||||||
|
Wraps the upsert in an explicit transaction to ensure atomicity.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
await upsert_neg_entry(db, ip)
|
await upsert_neg_entry(db, ip)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def bulk_upsert_entries(
|
async def bulk_upsert_entries(
|
||||||
@@ -173,17 +188,33 @@ async def bulk_upsert_entries_and_commit(
|
|||||||
db: aiosqlite.Connection,
|
db: aiosqlite.Connection,
|
||||||
rows: Sequence[tuple[str, str | None, str | None, str | None, str | None]],
|
rows: Sequence[tuple[str, str | None, str | None, str | None, str | None]],
|
||||||
) -> int:
|
) -> int:
|
||||||
"""Bulk insert or update multiple geo cache entries and commit."""
|
"""Bulk insert or update multiple geo cache entries and commit.
|
||||||
|
|
||||||
|
Wraps the bulk upsert in an explicit transaction to ensure atomicity.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
count = await bulk_upsert_entries(db, rows)
|
count = await bulk_upsert_entries(db, rows)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
return count
|
return count
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def bulk_upsert_neg_entries_and_commit(db: aiosqlite.Connection, ips: list[str]) -> int:
|
async def bulk_upsert_neg_entries_and_commit(db: aiosqlite.Connection, ips: list[str]) -> int:
|
||||||
"""Bulk insert negative lookup entries and commit."""
|
"""Bulk insert negative lookup entries and commit.
|
||||||
|
|
||||||
|
Wraps the bulk upsert in an explicit transaction to ensure atomicity.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
count = await bulk_upsert_neg_entries(db, ips)
|
count = await bulk_upsert_neg_entries(db, ips)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
return count
|
return count
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def bulk_upsert_entries_and_neg_entries_and_commit(
|
async def bulk_upsert_entries_and_neg_entries_and_commit(
|
||||||
@@ -191,10 +222,24 @@ async def bulk_upsert_entries_and_neg_entries_and_commit(
|
|||||||
rows: Sequence[tuple[str, str | None, str | None, str | None, str | None]],
|
rows: Sequence[tuple[str, str | None, str | None, str | None, str | None]],
|
||||||
ips: list[str],
|
ips: list[str],
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
"""Persist positive and negative geo cache rows together, then commit."""
|
"""Persist positive and negative geo cache rows together, then commit.
|
||||||
|
|
||||||
|
Wraps both upserts in a single transaction to ensure atomicity.
|
||||||
|
Either all rows are persisted or none are.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db: Active aiosqlite connection.
|
||||||
|
rows: Sequence of (ip, country_code, country_name, asn, org) tuples.
|
||||||
|
ips: List of IP strings for negative entries (failed lookups).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple (positive_count, negative_count) of rows persisted.
|
||||||
|
"""
|
||||||
positive_count = 0
|
positive_count = 0
|
||||||
negative_count = 0
|
negative_count = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
if rows:
|
if rows:
|
||||||
positive_count = await bulk_upsert_entries(db, rows)
|
positive_count = await bulk_upsert_entries(db, rows)
|
||||||
if ips:
|
if ips:
|
||||||
@@ -202,6 +247,9 @@ async def bulk_upsert_entries_and_neg_entries_and_commit(
|
|||||||
|
|
||||||
if rows or ips:
|
if rows or ips:
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
return positive_count, negative_count
|
return positive_count, negative_count
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,10 @@ async def create_pending(
|
|||||||
) -> int:
|
) -> int:
|
||||||
"""Create a pending import run entry.
|
"""Create a pending import run entry.
|
||||||
|
|
||||||
|
Wraps the insert in an explicit transaction to ensure atomicity and enable
|
||||||
|
proper error handling if a UNIQUE(source_id, content_hash) constraint
|
||||||
|
violation occurs due to concurrent requests.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db: Active aiosqlite connection.
|
db: Active aiosqlite connection.
|
||||||
source_id: FK to ``blocklist_sources.id``.
|
source_id: FK to ``blocklist_sources.id``.
|
||||||
@@ -75,7 +79,14 @@ async def create_pending(
|
|||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Primary key of the inserted row.
|
Primary key of the inserted row.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
aiosqlite.IntegrityError: If a row with this (source_id, content_hash)
|
||||||
|
already exists (constraint violation). The caller should catch this
|
||||||
|
and retry the lookup to get the existing run's ID.
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
cursor = await db.execute(
|
cursor = await db.execute(
|
||||||
"""
|
"""
|
||||||
INSERT INTO import_runs (source_id, content_hash, status)
|
INSERT INTO import_runs (source_id, content_hash, status)
|
||||||
@@ -85,6 +96,9 @@ async def create_pending(
|
|||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
return int(cursor.lastrowid) # type: ignore[arg-type]
|
return int(cursor.lastrowid) # type: ignore[arg-type]
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def mark_completed(
|
async def mark_completed(
|
||||||
@@ -95,12 +109,16 @@ async def mark_completed(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Mark an import run as completed with final counts.
|
"""Mark an import run as completed with final counts.
|
||||||
|
|
||||||
|
Wraps the update in an explicit transaction to ensure atomicity.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db: Active aiosqlite connection.
|
db: Active aiosqlite connection.
|
||||||
run_id: Primary key of the import run.
|
run_id: Primary key of the import run.
|
||||||
imported_count: Number of IPs successfully banned.
|
imported_count: Number of IPs successfully banned.
|
||||||
skipped_count: Number of entries skipped (invalid or CIDR).
|
skipped_count: Number of entries skipped (invalid or CIDR).
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""
|
"""
|
||||||
UPDATE import_runs
|
UPDATE import_runs
|
||||||
@@ -113,6 +131,9 @@ async def mark_completed(
|
|||||||
(imported_count, skipped_count, run_id),
|
(imported_count, skipped_count, run_id),
|
||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
async def mark_failed(
|
async def mark_failed(
|
||||||
@@ -122,11 +143,15 @@ async def mark_failed(
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Mark an import run as failed with error details.
|
"""Mark an import run as failed with error details.
|
||||||
|
|
||||||
|
Wraps the update in an explicit transaction to ensure atomicity.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db: Active aiosqlite connection.
|
db: Active aiosqlite connection.
|
||||||
run_id: Primary key of the import run.
|
run_id: Primary key of the import run.
|
||||||
error_message: Error description.
|
error_message: Error description.
|
||||||
"""
|
"""
|
||||||
|
try:
|
||||||
|
await db.execute("BEGIN IMMEDIATE")
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""
|
"""
|
||||||
UPDATE import_runs
|
UPDATE import_runs
|
||||||
@@ -138,3 +163,6 @@ async def mark_failed(
|
|||||||
(error_message, run_id),
|
(error_message, run_id),
|
||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
|
except Exception:
|
||||||
|
await db.rollback()
|
||||||
|
raise
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import hashlib
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
import aiosqlite
|
||||||
import structlog
|
import structlog
|
||||||
|
|
||||||
from app.models.blocklist import BlocklistSource, ImportSourceResult
|
from app.models.blocklist import BlocklistSource, ImportSourceResult
|
||||||
@@ -26,8 +27,6 @@ from app.services.blocklist_parser import BlocklistParser
|
|||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from collections.abc import Awaitable, Callable
|
from collections.abc import Awaitable, Callable
|
||||||
|
|
||||||
import aiosqlite
|
|
||||||
|
|
||||||
from app.services.geo_cache import GeoCache
|
from app.services.geo_cache import GeoCache
|
||||||
|
|
||||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||||
@@ -191,6 +190,7 @@ class BlocklistImportWorkflow:
|
|||||||
|
|
||||||
# --- Create or update pending import run entry ---
|
# --- Create or update pending import run entry ---
|
||||||
if existing_run is None:
|
if existing_run is None:
|
||||||
|
try:
|
||||||
run_id = await import_run_repo.create_pending(
|
run_id = await import_run_repo.create_pending(
|
||||||
db,
|
db,
|
||||||
source.id,
|
source.id,
|
||||||
@@ -202,6 +202,29 @@ class BlocklistImportWorkflow:
|
|||||||
run_id=run_id,
|
run_id=run_id,
|
||||||
content_hash=content_hash[:8],
|
content_hash=content_hash[:8],
|
||||||
)
|
)
|
||||||
|
except aiosqlite.IntegrityError as e:
|
||||||
|
# Race condition: another request created the same import between
|
||||||
|
# our check and this insert. Fetch the existing run and use its ID.
|
||||||
|
existing_run = await import_run_repo.get_by_source_and_hash(
|
||||||
|
db,
|
||||||
|
source.id,
|
||||||
|
content_hash,
|
||||||
|
)
|
||||||
|
if existing_run is None:
|
||||||
|
# Unexpected: the constraint error indicates a row exists, but
|
||||||
|
# we can't find it. This should not happen in normal operation.
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Integrity error indicates import exists, "
|
||||||
|
f"but lookup failed for source_id={source.id}, "
|
||||||
|
f"content_hash={content_hash[:8]}"
|
||||||
|
) from e
|
||||||
|
run_id = existing_run.id
|
||||||
|
log.info(
|
||||||
|
"blocklist_import_lost_race",
|
||||||
|
source_id=source.id,
|
||||||
|
run_id=run_id,
|
||||||
|
content_hash=content_hash[:8],
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
# Retry case: existing run is pending or failed, try again
|
# Retry case: existing run is pending or failed, try again
|
||||||
run_id = existing_run.id
|
run_id = existing_run.id
|
||||||
|
|||||||
Reference in New Issue
Block a user