Refactor pagination with cursor-based support and standardized response format

- Implement cursor-based pagination in pagination.py - Update response models to standardize pagination structure - Add cursor pagination utilities for repositories - Update HistoryArchiveRepository and ImportLogRepository with new pagination - Add comprehensive tests for cursor pagination - Update documentation for backend development and task tracking Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-01 17:54:05 +02:00
parent be974b9b0d
commit 67b26a3ef7
8 changed files with 613 additions and 51 deletions
--- a/backend/app/repositories/history_archive_repo.py
+++ b/backend/app/repositories/history_archive_repo.py
@@ -2,6 +2,14 @@

 Provides persistence APIs for the BanGUI archival history table in the
 application database.
+
+Supports both offset-based and cursor-based pagination:
+
+- **Offset pagination** (legacy): ``get_archived_history(page=2, page_size=100)``
+  - convenient for small datasets but degrades on large offsets.
+
+- **Cursor pagination** (recommended): ``get_archived_history_keyset(page_size=100, last_ban_id=None)``
+  - constant-time performance regardless of dataset size.
 """

 from __future__ import annotations
@@ -164,3 +172,110 @@ async def purge_archived_history(db: aiosqlite.Connection, age_seconds: int) ->
        deleted = cursor.rowcount
    await db.commit()
    return deleted
+
+
+async def get_archived_history_keyset(
+    db: aiosqlite.Connection,
+    since: int | None = None,
+    jail: str | None = None,
+    ip_filter: str | list[str] | None = None,
+    origin: BanOrigin | None = None,
+    action: str | None = None,
+    page_size: int = 100,
+    last_ban_id: int | None = None,
+) -> tuple[list[dict[str, Any]], bool]:
+    """Return cursor-paginated archived history using keyset pagination.
+
+    Uses keyset pagination (WHERE id < last_id) for constant-time performance
+    regardless of result set size. This is the recommended pagination method
+    for large result sets.
+
+    Ordering is by timeofban DESC (newest first), with id DESC as tiebreaker for
+    events with identical timestamps. This ensures stable, deterministic pagination.
+
+    Args:
+        db: Active aiosqlite connection.
+        since: If given, filter to events on or after this Unix timestamp.
+        jail: If given, filter to events for this jail.
+        ip_filter: If given, filter by IP (exact match list or LIKE prefix).
+        origin: If given, filter by ban origin ('blocklist' or 'selfblock').
+        action: If given, filter to this action type ('ban' or 'unban').
+        page_size: Number of items per page (max returned is page_size + 1 to detect overflow).
+        last_ban_id: The ID of the last item from the previous page (for cursor).
+                    None for the first page.
+
+    Returns:
+        A 2-tuple ``(records, has_more)`` where:
+        - *records* is a list of up to page_size dicts with ban details
+        - *has_more* is True if there are additional pages beyond this one
+    """
+    if isinstance(ip_filter, list) and len(ip_filter) == 0:
+        return [], False
+
+    wheres: list[str] = []
+    params: list[object] = []
+
+    if since is not None:
+        wheres.append("timeofban >= ?")
+        params.append(since)
+
+    if jail is not None:
+        wheres.append("jail = ?")
+        params.append(jail)
+
+    if ip_filter is not None:
+        if isinstance(ip_filter, list):
+            placeholder = ", ".join("?" for _ in ip_filter)
+            wheres.append(f"ip IN ({placeholder})")
+            params.extend(ip_filter)
+        else:
+            wheres.append("ip LIKE ? ESCAPE '\\'")
+            params.append(f"{escape_like(ip_filter)}%")
+
+    if origin == "blocklist":
+        wheres.append("jail = ?")
+        params.append(BLOCKLIST_JAIL)
+    elif origin == "selfblock":
+        wheres.append("jail != ?")
+        params.append(BLOCKLIST_JAIL)
+
+    if action is not None:
+        wheres.append("action = ?")
+        params.append(action)
+
+    if last_ban_id is not None:
+        wheres.append("id < ?")
+        params.append(last_ban_id)
+
+    where_sql = "WHERE " + " AND ".join(wheres) if wheres else ""
+
+    # Fetch page_size + 1 to detect if there are more pages
+    fetch_limit = page_size + 1
+    params.append(fetch_limit)
+
+    async with db.execute(
+        "SELECT id, jail, ip, timeofban, bancount, data, action "
+        "FROM history_archive "
+        f"{where_sql} "
+        "ORDER BY id DESC "
+        "LIMIT ?",  # noqa: S608
+        params,
+    ) as cur:
+        rows_iterable = await cur.fetchall()
+        rows = list(rows_iterable)
+
+    records = [
+        {
+            "jail": str(r[1]),
+            "ip": str(r[2]),
+            "timeofban": int(r[3]),
+            "bancount": int(r[4]),
+            "data": str(r[5]),
+            "action": str(r[6]),
+        }
+        for r in rows[:page_size]
+    ]
+    has_more = len(rows) > page_size
+
+    return records, has_more
+
--- a/backend/app/repositories/import_log_repo.py
+++ b/backend/app/repositories/import_log_repo.py
@@ -3,6 +3,14 @@
 Persists and queries blocklist import run records in the ``import_log``
 table.  All methods are plain async functions that accept a
 :class:`aiosqlite.Connection`.
+
+Supports both offset-based and cursor-based pagination:
+
+- **Offset pagination** (legacy): ``list_logs(page=2, page_size=50)`` - query-efficient
+  but degrades on large offsets.
+
+- **Cursor pagination** (recommended): ``list_logs_keyset(page_size=50, last_log_id=None)``
+  - constant-time performance regardless of dataset size.
 """

 from __future__ import annotations
@@ -17,7 +25,6 @@ if TYPE_CHECKING:

 from app.models.blocklist import ImportLogEntry

-
 # Alias for backward compatibility with protocols
 ImportLogRow = ImportLogEntry
 async def add_log(
@@ -144,6 +151,66 @@ def compute_total_pages(total: int, page_size: int) -> int:
    return math.ceil(total / page_size)


+async def list_logs_keyset(
+    db: aiosqlite.Connection,
+    *,
+    source_id: int | None = None,
+    page_size: int = 50,
+    last_log_id: int | None = None,
+) -> tuple[list[ImportLogRow], bool]:
+    """Return a cursor-paginated list of import log entries.
+
+    Uses keyset pagination (WHERE id < last_id) for constant-time performance
+    regardless of result set size. This is the recommended pagination method
+    for large result sets.
+
+    Args:
+        db: Active aiosqlite connection.
+        source_id: If given, filter to logs for this source only.
+        page_size: Number of items per page (max returned is page_size + 1 to detect overflow).
+        last_log_id: The ID of the last item from the previous page (for cursor).
+                    None for the first page.
+
+    Returns:
+        A 2-tuple ``(items, has_more)`` where:
+        - *items* is a list of up to page_size ImportLogEntry objects
+        - *has_more* is True if there are additional pages beyond this one
+    """
+    where = ""
+    params: list[object] = []
+
+    if source_id is not None:
+        where = " WHERE source_id = ?"
+        params.append(source_id)
+
+    if last_log_id is not None:
+        if where:
+            where += " AND id < ?"
+        else:
+            where = " WHERE id < ?"
+        params.append(last_log_id)
+
+    # Fetch page_size + 1 to detect if there are more pages
+    fetch_limit = page_size + 1
+    params.append(fetch_limit)
+
+    async with db.execute(
+        f"""
+        SELECT id, source_id, source_url, timestamp, ips_imported, ips_skipped, errors
+        FROM import_log{where}
+        ORDER BY id DESC
+        LIMIT ?
+        """,  # noqa: S608
+        params,
+    ) as cursor:
+        rows_iterable = await cursor.fetchall()
+        rows = list(rows_iterable)
+        items = [_row_to_dict(r) for r in rows[:page_size]]
+        has_more = len(rows) > page_size
+
+    return items, has_more
+
+
 # ---------------------------------------------------------------------------
 # Internal helpers
 # ---------------------------------------------------------------------------
@@ -158,5 +225,6 @@ def _row_to_dict(row: object) -> ImportLogRow:
    Returns:
        ImportLogEntry Pydantic model instance.
    """
-    mapping = cast("Mapping[str, object]", row)
-    return ImportLogEntry(**mapping)
+    from typing import Any as AnyType
+    mapping = cast("Mapping[str, AnyType]", row)
+    return ImportLogEntry.model_validate(dict(mapping))