Refactor pagination with cursor-based support and standardized response format

- Implement cursor-based pagination in pagination.py
- Update response models to standardize pagination structure
- Add cursor pagination utilities for repositories
- Update HistoryArchiveRepository and ImportLogRepository with new pagination
- Add comprehensive tests for cursor pagination
- Update documentation for backend development and task tracking

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-01 17:54:05 +02:00
parent be974b9b0d
commit 67b26a3ef7
8 changed files with 613 additions and 51 deletions

View File

@@ -4,11 +4,21 @@ This module provides reusable utilities for implementing consistent pagination
across all endpoints. All paginated endpoints should use these utilities to
ensure a uniform API contract.
Standard Pagination Contract:
Query parameters: page (1-based), page_size (1-500)
Response: PaginatedListResponse[T] with items and pagination metadata
Supported Pagination Modes:
Usage in routers:
1. **Offset-Based (Legacy)** — Uses page number + page_size.
Query parameters: page (1-based), page_size (1-500)
⚠️ Performance degrades on large offsets (OFFSET requires scanning N rows).
Use for: Small datasets, where performance is not critical.
2. **Cursor-Based (Recommended for large tables)** — Uses keyset pagination.
Query parameters: cursor (opaque token for next/prev), page_size
✓ Constant-time performance regardless of dataset size.
Use for: Large tables (>100K rows), paginated lists with sorting.
Usage Examples:
**Offset pagination (legacy):**
```python
from app.utils.pagination import PAGINATION_DEFAULTS, create_pagination_metadata
@@ -26,14 +36,50 @@ Usage in routers:
pagination = create_pagination_metadata(total, page, page_size)
return MyListResponse(items=items, pagination=pagination)
```
**Cursor pagination (recommended):**
```python
from app.utils.pagination import decode_cursor, encode_cursor, PAGINATION_DEFAULTS
@router.get("/items")
async def get_items(
cursor: str | None = Query(None),
page_size: int = Query(
default=PAGINATION_DEFAULTS["page_size"],
ge=1,
le=PAGINATION_DEFAULTS["max_page_size"],
),
):
# Decode cursor to get last_row_id
last_row_id = decode_cursor(cursor) if cursor else None
# Fetch items using keyset pagination (WHERE id > last_row_id)
items, has_more = await repo.get_items_keyset(page_size, last_row_id)
# Encode cursor for next page (last item's ID)
next_cursor = encode_cursor(items[-1]["id"]) if items and has_more else None
pagination = create_keyset_pagination_metadata(items, next_cursor, page_size)
return MyListResponse(items=items, pagination=pagination)
```
"""
import base64
import json
from typing import TYPE_CHECKING, Final
if TYPE_CHECKING:
from app.models.response import PaginationMetadata
__all__ = ["PAGINATION_DEFAULTS", "get_offset", "compute_total_pages", "create_pagination_metadata"]
__all__ = [
"PAGINATION_DEFAULTS",
"get_offset",
"compute_total_pages",
"create_pagination_metadata",
"encode_cursor",
"decode_cursor",
"create_keyset_pagination_metadata",
]
# Standardized pagination defaults
PAGINATION_DEFAULTS: Final[dict[str, int]] = {
@@ -148,3 +194,112 @@ def create_pagination_metadata(total: int, page: int, page_size: int) -> "Pagina
has_prev_page=has_prev_page,
)
# ---------------------------------------------------------------------------
# Cursor-Based Pagination Functions
# ---------------------------------------------------------------------------
def encode_cursor(row_id: int) -> str:
"""Encode a row ID into an opaque cursor token.
The cursor is a base64-encoded JSON object containing the row ID.
This format is opaque to the client and must not be modified manually.
Args:
row_id: The database row ID to encode.
Returns:
Base64-encoded cursor string that can be passed to decode_cursor().
Raises:
ValueError: If row_id is invalid (< 1).
Example:
```python
cursor = encode_cursor(42)
assert isinstance(cursor, str)
assert decode_cursor(cursor) == 42
```
"""
if row_id < 1:
raise ValueError(f"row_id must be >= 1, got {row_id}")
cursor_data = {"id": row_id}
json_str = json.dumps(cursor_data, separators=(",", ":"))
return base64.b64encode(json_str.encode()).decode("ascii")
def decode_cursor(cursor: str) -> int:
"""Decode an opaque cursor token to retrieve the row ID.
Decodes a base64-encoded JSON object containing the row ID.
This is the inverse of encode_cursor().
Args:
cursor: Cursor string produced by encode_cursor().
Returns:
The row ID stored in the cursor.
Raises:
ValueError: If cursor is invalid (not base64-decodable or missing 'id' field).
Example:
```python
cursor = encode_cursor(42)
assert decode_cursor(cursor) == 42
```
"""
try:
json_str = base64.b64decode(cursor.encode("ascii")).decode("utf-8")
cursor_data = json.loads(json_str)
row_id = cursor_data.get("id")
if not isinstance(row_id, int) or row_id < 1:
raise ValueError(f"Invalid cursor: 'id' field must be an integer >= 1, got {row_id}")
return row_id
except (ValueError, TypeError, json.JSONDecodeError) as e:
raise ValueError(f"Invalid cursor format: {e}") from e
def create_keyset_pagination_metadata(
items: list[dict[str, object]] | list[object],
next_cursor: str | None,
page_size: int,
) -> "PaginationMetadata":
"""Create pagination metadata for keyset (cursor-based) pagination.
This function creates metadata for cursor-based pagination without the need
to query the total row count. Frontend can determine if there are more pages
by checking if the returned items count equals page_size.
Args:
items: The items returned from the keyset query (fetched count + 1).
next_cursor: Cursor for fetching the next page, or None if no more pages.
page_size: The requested page size.
Returns:
:class:`~app.models.response.PaginationMetadata` adapted for cursor pagination.
Note: total and total_pages are set to -1 (unknown), has_prev_page is always False.
Example:
```python
items = await repo.get_items_keyset(page_size=10, last_row_id=None)
metadata = create_keyset_pagination_metadata(items, next_cursor, page_size=10)
assert metadata.total == -1 # Unknown in cursor pagination
assert metadata.has_next_page == (next_cursor is not None)
```
"""
from app.models.response import PaginationMetadata
has_next_page = next_cursor is not None
return PaginationMetadata(
page=1,
page_size=page_size,
total=-1,
total_pages=-1,
has_next_page=has_next_page,
has_prev_page=False,
cursor=next_cursor,
)