Refactor pagination with cursor-based support and standardized response format

- Implement cursor-based pagination in pagination.py
- Update response models to standardize pagination structure
- Add cursor pagination utilities for repositories
- Update HistoryArchiveRepository and ImportLogRepository with new pagination
- Add comprehensive tests for cursor pagination
- Update documentation for backend development and task tracking

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-01 17:54:05 +02:00
parent be974b9b0d
commit 67b26a3ef7
8 changed files with 613 additions and 51 deletions

View File

@@ -2,6 +2,14 @@
Provides persistence APIs for the BanGUI archival history table in the
application database.
Supports both offset-based and cursor-based pagination:
- **Offset pagination** (legacy): ``get_archived_history(page=2, page_size=100)``
- convenient for small datasets but degrades on large offsets.
- **Cursor pagination** (recommended): ``get_archived_history_keyset(page_size=100, last_ban_id=None)``
- constant-time performance regardless of dataset size.
"""
from __future__ import annotations
@@ -164,3 +172,110 @@ async def purge_archived_history(db: aiosqlite.Connection, age_seconds: int) ->
deleted = cursor.rowcount
await db.commit()
return deleted
async def get_archived_history_keyset(
db: aiosqlite.Connection,
since: int | None = None,
jail: str | None = None,
ip_filter: str | list[str] | None = None,
origin: BanOrigin | None = None,
action: str | None = None,
page_size: int = 100,
last_ban_id: int | None = None,
) -> tuple[list[dict[str, Any]], bool]:
"""Return cursor-paginated archived history using keyset pagination.
Uses keyset pagination (WHERE id < last_id) for constant-time performance
regardless of result set size. This is the recommended pagination method
for large result sets.
Ordering is by timeofban DESC (newest first), with id DESC as tiebreaker for
events with identical timestamps. This ensures stable, deterministic pagination.
Args:
db: Active aiosqlite connection.
since: If given, filter to events on or after this Unix timestamp.
jail: If given, filter to events for this jail.
ip_filter: If given, filter by IP (exact match list or LIKE prefix).
origin: If given, filter by ban origin ('blocklist' or 'selfblock').
action: If given, filter to this action type ('ban' or 'unban').
page_size: Number of items per page (max returned is page_size + 1 to detect overflow).
last_ban_id: The ID of the last item from the previous page (for cursor).
None for the first page.
Returns:
A 2-tuple ``(records, has_more)`` where:
- *records* is a list of up to page_size dicts with ban details
- *has_more* is True if there are additional pages beyond this one
"""
if isinstance(ip_filter, list) and len(ip_filter) == 0:
return [], False
wheres: list[str] = []
params: list[object] = []
if since is not None:
wheres.append("timeofban >= ?")
params.append(since)
if jail is not None:
wheres.append("jail = ?")
params.append(jail)
if ip_filter is not None:
if isinstance(ip_filter, list):
placeholder = ", ".join("?" for _ in ip_filter)
wheres.append(f"ip IN ({placeholder})")
params.extend(ip_filter)
else:
wheres.append("ip LIKE ? ESCAPE '\\'")
params.append(f"{escape_like(ip_filter)}%")
if origin == "blocklist":
wheres.append("jail = ?")
params.append(BLOCKLIST_JAIL)
elif origin == "selfblock":
wheres.append("jail != ?")
params.append(BLOCKLIST_JAIL)
if action is not None:
wheres.append("action = ?")
params.append(action)
if last_ban_id is not None:
wheres.append("id < ?")
params.append(last_ban_id)
where_sql = "WHERE " + " AND ".join(wheres) if wheres else ""
# Fetch page_size + 1 to detect if there are more pages
fetch_limit = page_size + 1
params.append(fetch_limit)
async with db.execute(
"SELECT id, jail, ip, timeofban, bancount, data, action "
"FROM history_archive "
f"{where_sql} "
"ORDER BY id DESC "
"LIMIT ?", # noqa: S608
params,
) as cur:
rows_iterable = await cur.fetchall()
rows = list(rows_iterable)
records = [
{
"jail": str(r[1]),
"ip": str(r[2]),
"timeofban": int(r[3]),
"bancount": int(r[4]),
"data": str(r[5]),
"action": str(r[6]),
}
for r in rows[:page_size]
]
has_more = len(rows) > page_size
return records, has_more

View File

@@ -3,6 +3,14 @@
Persists and queries blocklist import run records in the ``import_log``
table. All methods are plain async functions that accept a
:class:`aiosqlite.Connection`.
Supports both offset-based and cursor-based pagination:
- **Offset pagination** (legacy): ``list_logs(page=2, page_size=50)`` - query-efficient
but degrades on large offsets.
- **Cursor pagination** (recommended): ``list_logs_keyset(page_size=50, last_log_id=None)``
- constant-time performance regardless of dataset size.
"""
from __future__ import annotations
@@ -17,7 +25,6 @@ if TYPE_CHECKING:
from app.models.blocklist import ImportLogEntry
# Alias for backward compatibility with protocols
ImportLogRow = ImportLogEntry
async def add_log(
@@ -144,6 +151,66 @@ def compute_total_pages(total: int, page_size: int) -> int:
return math.ceil(total / page_size)
async def list_logs_keyset(
db: aiosqlite.Connection,
*,
source_id: int | None = None,
page_size: int = 50,
last_log_id: int | None = None,
) -> tuple[list[ImportLogRow], bool]:
"""Return a cursor-paginated list of import log entries.
Uses keyset pagination (WHERE id < last_id) for constant-time performance
regardless of result set size. This is the recommended pagination method
for large result sets.
Args:
db: Active aiosqlite connection.
source_id: If given, filter to logs for this source only.
page_size: Number of items per page (max returned is page_size + 1 to detect overflow).
last_log_id: The ID of the last item from the previous page (for cursor).
None for the first page.
Returns:
A 2-tuple ``(items, has_more)`` where:
- *items* is a list of up to page_size ImportLogEntry objects
- *has_more* is True if there are additional pages beyond this one
"""
where = ""
params: list[object] = []
if source_id is not None:
where = " WHERE source_id = ?"
params.append(source_id)
if last_log_id is not None:
if where:
where += " AND id < ?"
else:
where = " WHERE id < ?"
params.append(last_log_id)
# Fetch page_size + 1 to detect if there are more pages
fetch_limit = page_size + 1
params.append(fetch_limit)
async with db.execute(
f"""
SELECT id, source_id, source_url, timestamp, ips_imported, ips_skipped, errors
FROM import_log{where}
ORDER BY id DESC
LIMIT ?
""", # noqa: S608
params,
) as cursor:
rows_iterable = await cursor.fetchall()
rows = list(rows_iterable)
items = [_row_to_dict(r) for r in rows[:page_size]]
has_more = len(rows) > page_size
return items, has_more
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
@@ -158,5 +225,6 @@ def _row_to_dict(row: object) -> ImportLogRow:
Returns:
ImportLogEntry Pydantic model instance.
"""
mapping = cast("Mapping[str, object]", row)
return ImportLogEntry(**mapping)
from typing import Any as AnyType
mapping = cast("Mapping[str, AnyType]", row)
return ImportLogEntry.model_validate(dict(mapping))