Refactor pagination with cursor-based support and standardized response format

- Implement cursor-based pagination in pagination.py
- Update response models to standardize pagination structure
- Add cursor pagination utilities for repositories
- Update HistoryArchiveRepository and ImportLogRepository with new pagination
- Add comprehensive tests for cursor pagination
- Update documentation for backend development and task tracking

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-01 17:54:05 +02:00
parent be974b9b0d
commit 67b26a3ef7
8 changed files with 613 additions and 51 deletions

View File

@@ -107,7 +107,7 @@ _SCHEMA_STATEMENTS: list[str] = [
_CREATE_HISTORY_ARCHIVE,
]
_CURRENT_SCHEMA_VERSION: int = 6
_CURRENT_SCHEMA_VERSION: int = 7
_MIGRATIONS: dict[int, str] = {
1: "\n".join(_SCHEMA_STATEMENTS),
@@ -187,10 +187,25 @@ CREATE TABLE IF NOT EXISTS import_runs (
-- Index for looking up completed imports by source
CREATE INDEX IF NOT EXISTS idx_import_runs_source_status
ON import_runs (source_id, status);
""",
7: """
-- Migration 7: Add indexes to import_log table for cursor-based pagination.
-- The import_log table is paginated by id (newest first) and filtered by source_id.
-- These indexes accelerate pagination queries and maintain consistent ordering.
-- See Docs/Backend-Development.md § Database Performance for details.
-- Index for ordering by id DESC for cursor-based pagination (newest first)
CREATE INDEX IF NOT EXISTS idx_import_log_id_desc
ON import_log (id DESC);
-- Composite index for source_id + id DESC ordering (filtered pagination)
CREATE INDEX IF NOT EXISTS idx_import_log_source_id_desc
ON import_log (source_id, id DESC);
""",
}
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

View File

@@ -125,16 +125,22 @@ class PaginationMetadata(BanGuiBaseModel):
"""Pagination metadata embedded in paginated list responses.
Contains page information and computed fields to support frontend pagination controls.
Supports both offset-based and cursor-based pagination modes.
Fields:
page: Current page number (1-based).
page: Current page number (1-based). Set to 1 for cursor pagination.
page_size: Number of items per page.
total: Total number of items matching the query (across all pages).
For cursor pagination, this is -1 (unknown without full scan).
total_pages: Computed total number of pages.
For cursor pagination, this is -1 (unknown without full scan).
has_next_page: Whether there is a next page after this one.
has_prev_page: Whether there is a previous page before this one.
Always False for cursor pagination (cannot navigate backward without storing history).
cursor: Opaque cursor token for fetching the next page (cursor pagination only).
None for offset pagination or when there are no more pages.
Example:
Example (offset pagination):
```python
pagination = PaginationMetadata(
page=2,
@@ -142,17 +148,36 @@ class PaginationMetadata(BanGuiBaseModel):
total=150,
total_pages=3,
has_next_page=True,
has_prev_page=True
has_prev_page=True,
cursor=None
)
```
Example (cursor pagination):
```python
pagination = PaginationMetadata(
page=1,
page_size=50,
total=-1,
total_pages=-1,
has_next_page=True,
has_prev_page=False,
cursor="eyJpZCI6IDQyN30="
)
```
"""
page: int = Field(..., ge=1, description="Current page number (1-based).")
page: int = Field(..., ge=1, description="Current page number (1-based). Set to 1 for cursor pagination.")
page_size: int = Field(..., ge=1, description="Number of items per page.")
total: int = Field(..., ge=0, description="Total number of items matching the query.")
total_pages: int = Field(..., ge=1, description="Computed total number of pages.")
total: int = Field(..., description="Total number of items matching the query. -1 if unknown (cursor pagination).")
total_pages: int = Field(..., description="Computed total number of pages. -1 if unknown (cursor pagination).")
has_next_page: bool = Field(..., description="Whether there is a next page after this one.")
has_prev_page: bool = Field(..., description="Whether there is a previous page before this one.")
cursor: str | None = Field(
default=None,
description="Opaque cursor token for fetching the next page (cursor pagination only).",
)
class PaginatedListResponse(BanGuiBaseModel, Generic[T]):

View File

@@ -2,6 +2,14 @@
Provides persistence APIs for the BanGUI archival history table in the
application database.
Supports both offset-based and cursor-based pagination:
- **Offset pagination** (legacy): ``get_archived_history(page=2, page_size=100)``
- convenient for small datasets but degrades on large offsets.
- **Cursor pagination** (recommended): ``get_archived_history_keyset(page_size=100, last_ban_id=None)``
- constant-time performance regardless of dataset size.
"""
from __future__ import annotations
@@ -164,3 +172,110 @@ async def purge_archived_history(db: aiosqlite.Connection, age_seconds: int) ->
deleted = cursor.rowcount
await db.commit()
return deleted
async def get_archived_history_keyset(
db: aiosqlite.Connection,
since: int | None = None,
jail: str | None = None,
ip_filter: str | list[str] | None = None,
origin: BanOrigin | None = None,
action: str | None = None,
page_size: int = 100,
last_ban_id: int | None = None,
) -> tuple[list[dict[str, Any]], bool]:
"""Return cursor-paginated archived history using keyset pagination.
Uses keyset pagination (WHERE id < last_id) for constant-time performance
regardless of result set size. This is the recommended pagination method
for large result sets.
Ordering is by timeofban DESC (newest first), with id DESC as tiebreaker for
events with identical timestamps. This ensures stable, deterministic pagination.
Args:
db: Active aiosqlite connection.
since: If given, filter to events on or after this Unix timestamp.
jail: If given, filter to events for this jail.
ip_filter: If given, filter by IP (exact match list or LIKE prefix).
origin: If given, filter by ban origin ('blocklist' or 'selfblock').
action: If given, filter to this action type ('ban' or 'unban').
page_size: Number of items per page (max returned is page_size + 1 to detect overflow).
last_ban_id: The ID of the last item from the previous page (for cursor).
None for the first page.
Returns:
A 2-tuple ``(records, has_more)`` where:
- *records* is a list of up to page_size dicts with ban details
- *has_more* is True if there are additional pages beyond this one
"""
if isinstance(ip_filter, list) and len(ip_filter) == 0:
return [], False
wheres: list[str] = []
params: list[object] = []
if since is not None:
wheres.append("timeofban >= ?")
params.append(since)
if jail is not None:
wheres.append("jail = ?")
params.append(jail)
if ip_filter is not None:
if isinstance(ip_filter, list):
placeholder = ", ".join("?" for _ in ip_filter)
wheres.append(f"ip IN ({placeholder})")
params.extend(ip_filter)
else:
wheres.append("ip LIKE ? ESCAPE '\\'")
params.append(f"{escape_like(ip_filter)}%")
if origin == "blocklist":
wheres.append("jail = ?")
params.append(BLOCKLIST_JAIL)
elif origin == "selfblock":
wheres.append("jail != ?")
params.append(BLOCKLIST_JAIL)
if action is not None:
wheres.append("action = ?")
params.append(action)
if last_ban_id is not None:
wheres.append("id < ?")
params.append(last_ban_id)
where_sql = "WHERE " + " AND ".join(wheres) if wheres else ""
# Fetch page_size + 1 to detect if there are more pages
fetch_limit = page_size + 1
params.append(fetch_limit)
async with db.execute(
"SELECT id, jail, ip, timeofban, bancount, data, action "
"FROM history_archive "
f"{where_sql} "
"ORDER BY id DESC "
"LIMIT ?", # noqa: S608
params,
) as cur:
rows_iterable = await cur.fetchall()
rows = list(rows_iterable)
records = [
{
"jail": str(r[1]),
"ip": str(r[2]),
"timeofban": int(r[3]),
"bancount": int(r[4]),
"data": str(r[5]),
"action": str(r[6]),
}
for r in rows[:page_size]
]
has_more = len(rows) > page_size
return records, has_more

View File

@@ -3,6 +3,14 @@
Persists and queries blocklist import run records in the ``import_log``
table. All methods are plain async functions that accept a
:class:`aiosqlite.Connection`.
Supports both offset-based and cursor-based pagination:
- **Offset pagination** (legacy): ``list_logs(page=2, page_size=50)`` - query-efficient
but degrades on large offsets.
- **Cursor pagination** (recommended): ``list_logs_keyset(page_size=50, last_log_id=None)``
- constant-time performance regardless of dataset size.
"""
from __future__ import annotations
@@ -17,7 +25,6 @@ if TYPE_CHECKING:
from app.models.blocklist import ImportLogEntry
# Alias for backward compatibility with protocols
ImportLogRow = ImportLogEntry
async def add_log(
@@ -144,6 +151,66 @@ def compute_total_pages(total: int, page_size: int) -> int:
return math.ceil(total / page_size)
async def list_logs_keyset(
db: aiosqlite.Connection,
*,
source_id: int | None = None,
page_size: int = 50,
last_log_id: int | None = None,
) -> tuple[list[ImportLogRow], bool]:
"""Return a cursor-paginated list of import log entries.
Uses keyset pagination (WHERE id < last_id) for constant-time performance
regardless of result set size. This is the recommended pagination method
for large result sets.
Args:
db: Active aiosqlite connection.
source_id: If given, filter to logs for this source only.
page_size: Number of items per page (max returned is page_size + 1 to detect overflow).
last_log_id: The ID of the last item from the previous page (for cursor).
None for the first page.
Returns:
A 2-tuple ``(items, has_more)`` where:
- *items* is a list of up to page_size ImportLogEntry objects
- *has_more* is True if there are additional pages beyond this one
"""
where = ""
params: list[object] = []
if source_id is not None:
where = " WHERE source_id = ?"
params.append(source_id)
if last_log_id is not None:
if where:
where += " AND id < ?"
else:
where = " WHERE id < ?"
params.append(last_log_id)
# Fetch page_size + 1 to detect if there are more pages
fetch_limit = page_size + 1
params.append(fetch_limit)
async with db.execute(
f"""
SELECT id, source_id, source_url, timestamp, ips_imported, ips_skipped, errors
FROM import_log{where}
ORDER BY id DESC
LIMIT ?
""", # noqa: S608
params,
) as cursor:
rows_iterable = await cursor.fetchall()
rows = list(rows_iterable)
items = [_row_to_dict(r) for r in rows[:page_size]]
has_more = len(rows) > page_size
return items, has_more
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
@@ -158,5 +225,6 @@ def _row_to_dict(row: object) -> ImportLogRow:
Returns:
ImportLogEntry Pydantic model instance.
"""
mapping = cast("Mapping[str, object]", row)
return ImportLogEntry(**mapping)
from typing import Any as AnyType
mapping = cast("Mapping[str, AnyType]", row)
return ImportLogEntry.model_validate(dict(mapping))

View File

@@ -4,11 +4,21 @@ This module provides reusable utilities for implementing consistent pagination
across all endpoints. All paginated endpoints should use these utilities to
ensure a uniform API contract.
Standard Pagination Contract:
Query parameters: page (1-based), page_size (1-500)
Response: PaginatedListResponse[T] with items and pagination metadata
Supported Pagination Modes:
Usage in routers:
1. **Offset-Based (Legacy)** — Uses page number + page_size.
Query parameters: page (1-based), page_size (1-500)
⚠️ Performance degrades on large offsets (OFFSET requires scanning N rows).
Use for: Small datasets, where performance is not critical.
2. **Cursor-Based (Recommended for large tables)** — Uses keyset pagination.
Query parameters: cursor (opaque token for next/prev), page_size
✓ Constant-time performance regardless of dataset size.
Use for: Large tables (>100K rows), paginated lists with sorting.
Usage Examples:
**Offset pagination (legacy):**
```python
from app.utils.pagination import PAGINATION_DEFAULTS, create_pagination_metadata
@@ -26,14 +36,50 @@ Usage in routers:
pagination = create_pagination_metadata(total, page, page_size)
return MyListResponse(items=items, pagination=pagination)
```
**Cursor pagination (recommended):**
```python
from app.utils.pagination import decode_cursor, encode_cursor, PAGINATION_DEFAULTS
@router.get("/items")
async def get_items(
cursor: str | None = Query(None),
page_size: int = Query(
default=PAGINATION_DEFAULTS["page_size"],
ge=1,
le=PAGINATION_DEFAULTS["max_page_size"],
),
):
# Decode cursor to get last_row_id
last_row_id = decode_cursor(cursor) if cursor else None
# Fetch items using keyset pagination (WHERE id > last_row_id)
items, has_more = await repo.get_items_keyset(page_size, last_row_id)
# Encode cursor for next page (last item's ID)
next_cursor = encode_cursor(items[-1]["id"]) if items and has_more else None
pagination = create_keyset_pagination_metadata(items, next_cursor, page_size)
return MyListResponse(items=items, pagination=pagination)
```
"""
import base64
import json
from typing import TYPE_CHECKING, Final
if TYPE_CHECKING:
from app.models.response import PaginationMetadata
__all__ = ["PAGINATION_DEFAULTS", "get_offset", "compute_total_pages", "create_pagination_metadata"]
__all__ = [
"PAGINATION_DEFAULTS",
"get_offset",
"compute_total_pages",
"create_pagination_metadata",
"encode_cursor",
"decode_cursor",
"create_keyset_pagination_metadata",
]
# Standardized pagination defaults
PAGINATION_DEFAULTS: Final[dict[str, int]] = {
@@ -148,3 +194,112 @@ def create_pagination_metadata(total: int, page: int, page_size: int) -> "Pagina
has_prev_page=has_prev_page,
)
# ---------------------------------------------------------------------------
# Cursor-Based Pagination Functions
# ---------------------------------------------------------------------------
def encode_cursor(row_id: int) -> str:
"""Encode a row ID into an opaque cursor token.
The cursor is a base64-encoded JSON object containing the row ID.
This format is opaque to the client and must not be modified manually.
Args:
row_id: The database row ID to encode.
Returns:
Base64-encoded cursor string that can be passed to decode_cursor().
Raises:
ValueError: If row_id is invalid (< 1).
Example:
```python
cursor = encode_cursor(42)
assert isinstance(cursor, str)
assert decode_cursor(cursor) == 42
```
"""
if row_id < 1:
raise ValueError(f"row_id must be >= 1, got {row_id}")
cursor_data = {"id": row_id}
json_str = json.dumps(cursor_data, separators=(",", ":"))
return base64.b64encode(json_str.encode()).decode("ascii")
def decode_cursor(cursor: str) -> int:
"""Decode an opaque cursor token to retrieve the row ID.
Decodes a base64-encoded JSON object containing the row ID.
This is the inverse of encode_cursor().
Args:
cursor: Cursor string produced by encode_cursor().
Returns:
The row ID stored in the cursor.
Raises:
ValueError: If cursor is invalid (not base64-decodable or missing 'id' field).
Example:
```python
cursor = encode_cursor(42)
assert decode_cursor(cursor) == 42
```
"""
try:
json_str = base64.b64decode(cursor.encode("ascii")).decode("utf-8")
cursor_data = json.loads(json_str)
row_id = cursor_data.get("id")
if not isinstance(row_id, int) or row_id < 1:
raise ValueError(f"Invalid cursor: 'id' field must be an integer >= 1, got {row_id}")
return row_id
except (ValueError, TypeError, json.JSONDecodeError) as e:
raise ValueError(f"Invalid cursor format: {e}") from e
def create_keyset_pagination_metadata(
items: list[dict[str, object]] | list[object],
next_cursor: str | None,
page_size: int,
) -> "PaginationMetadata":
"""Create pagination metadata for keyset (cursor-based) pagination.
This function creates metadata for cursor-based pagination without the need
to query the total row count. Frontend can determine if there are more pages
by checking if the returned items count equals page_size.
Args:
items: The items returned from the keyset query (fetched count + 1).
next_cursor: Cursor for fetching the next page, or None if no more pages.
page_size: The requested page size.
Returns:
:class:`~app.models.response.PaginationMetadata` adapted for cursor pagination.
Note: total and total_pages are set to -1 (unknown), has_prev_page is always False.
Example:
```python
items = await repo.get_items_keyset(page_size=10, last_row_id=None)
metadata = create_keyset_pagination_metadata(items, next_cursor, page_size=10)
assert metadata.total == -1 # Unknown in cursor pagination
assert metadata.has_next_page == (next_cursor is not None)
```
"""
from app.models.response import PaginationMetadata
has_next_page = next_cursor is not None
return PaginationMetadata(
page=1,
page_size=page_size,
total=-1,
total_pages=-1,
has_next_page=has_next_page,
has_prev_page=False,
cursor=next_cursor,
)