refactor(backend): clean up jail service, add error handling service

- Extract jail status/processing to helper functions
- Add error_handling.py service for centralized error handling
- Update config.py with validation and defaults
- Update .env.example with all config options
- Remove obsolete Tasks.md, add Service-Development.md
- Minor fixes across routers and services

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-03 17:40:37 +02:00
parent 2df029f7e8
commit 2f9fc8076d
15 changed files with 332 additions and 154 deletions

View File

@@ -534,6 +534,35 @@ class Settings(BaseSettings):
ge=1,
description="Max config update requests per IP per minute.",
)
# -------------------------------------------------------------------------
# Pagination & display limits (configurable per deployment)
# -------------------------------------------------------------------------
max_page_size: int = Field(
default=500,
ge=1,
le=10000,
description=(
"Maximum number of records returned per paginated API response. "
"Individual endpoints may further limit this value. "
"Must be between 1 and 10000."
),
)
blocklist_preview_max_lines: int = Field(
default=100,
ge=1,
description=(
"Maximum number of IP lines returned in a blocklist source preview. "
"Must be at least 1."
),
)
history_retention_days: int = Field(
default=90,
ge=1,
description=(
"Number of days historical ban records are retained before being "
"archived or purged by the cleanup task. Must be at least 1."
),
)
@field_validator("elasticsearch_hosts", mode="before")
@classmethod

View File

@@ -433,6 +433,7 @@ async def preview_blocklist(
source_id: int,
http_session: HttpSessionDep,
blocklist_ctx: BlocklistServiceContextDep,
settings: SettingsDep,
_auth: AuthDep,
) -> PreviewResponse:
"""Download and preview a sample of a blocklist source.
@@ -455,7 +456,9 @@ async def preview_blocklist(
raise BlocklistSourceNotFoundError(source_id)
try:
domain_result = await blocklist_service.preview_source(source.url, http_session)
domain_result = await blocklist_service.preview_source(
source.url, http_session, sample_lines=settings.blocklist_preview_max_lines
)
return blocklist_mappers.map_domain_preview_result_to_response(domain_result)
except ValueError as exc:
raise BadRequestError(f"Could not fetch blocklist: {exc}") from exc

View File

@@ -24,6 +24,7 @@ from app.dependencies import (
GeoCacheDep,
HttpSessionDep,
ServerStatusDep,
SettingsDep,
)
from app.mappers import (
map_domain_ban_trend_to_response,
@@ -101,13 +102,14 @@ async def get_dashboard_bans(
socket_path: Fail2BanSocketDep,
http_session: HttpSessionDep,
geo_cache: GeoCacheDep,
settings: SettingsDep,
range: TimeRange = Query(default=_DEFAULT_RANGE, description="Time-range preset."),
source: Literal["fail2ban", "archive"] = Query(
default="fail2ban",
description="Data source: 'fail2ban' or 'archive'.",
),
page: int = Query(default=1, ge=1, description="1-based page number."),
page_size: int = Query(default=DEFAULT_PAGE_SIZE, ge=1, le=500, description="Items per page."),
page_size: int = Query(default=DEFAULT_PAGE_SIZE, ge=1, description="Items per page."),
origin: BanOrigin | None = Query(
default=None,
description="Filter by ban origin: 'blocklist' or 'selfblock'. Omit for all.",
@@ -143,6 +145,7 @@ async def get_dashboard_bans(
source=source,
page=page,
page_size=page_size,
max_page_size=settings.max_page_size,
http_session=http_session,
app_db=ban_ctx.db,
geo_cache=geo_cache,

View File

@@ -25,6 +25,7 @@ from app.dependencies import (
Fail2BanSocketDep,
HistoryServiceContextDep,
HttpSessionDep,
SettingsDep,
)
from app.exceptions import HistoryNotFoundError
from app.mappers import history_mappers
@@ -54,6 +55,7 @@ async def get_history(
socket_path: Fail2BanSocketDep,
http_session: HttpSessionDep,
fail2ban_metadata_service: Fail2BanMetadataServiceDep,
settings: SettingsDep,
range: TimeRange | None = Query(
default=None,
description="Optional time-range filter. Omit for all-time.",
@@ -78,8 +80,7 @@ async def get_history(
page_size: int = Query(
default=DEFAULT_PAGE_SIZE,
ge=1,
le=500,
description="Items per page (max 500).",
description="Items per page.",
),
) -> HistoryListResponse:
"""Return a paginated list of historical bans with optional filters.
@@ -114,6 +115,7 @@ async def get_history(
source=source,
page=page,
page_size=page_size,
max_page_size=settings.max_page_size,
http_session=http_session,
db=history_ctx.db,
fail2ban_metadata_service=fail2ban_metadata_service,
@@ -138,6 +140,7 @@ async def get_history_archive(
socket_path: Fail2BanSocketDep,
http_session: HttpSessionDep,
fail2ban_metadata_service: Fail2BanMetadataServiceDep,
settings: SettingsDep,
range: TimeRange | None = Query(
default=None,
description="Optional time-range filter. Omit for all-time.",
@@ -145,7 +148,7 @@ async def get_history_archive(
jail: str | None = Query(default=None, description="Restrict results to this jail name."),
ip: str | None = Query(default=None, description="Restrict results to IPs matching this prefix."),
page: int = Query(default=1, ge=1, description="1-based page number."),
page_size: int = Query(default=DEFAULT_PAGE_SIZE, ge=1, le=500, description="Items per page (max 500)."),
page_size: int = Query(default=DEFAULT_PAGE_SIZE, ge=1, description="Items per page."),
) -> HistoryListResponse:
domain_result = await history_service.list_history(
@@ -156,6 +159,7 @@ async def get_history_archive(
source="archive",
page=page,
page_size=page_size,
max_page_size=settings.max_page_size,
http_session=http_session,
db=history_ctx.db,
fail2ban_metadata_service=fail2ban_metadata_service,

View File

@@ -47,7 +47,6 @@ from app.utils.async_utils import logged_task
from app.utils.constants import (
DEFAULT_PAGE_SIZE,
FAIL2BAN_SOCKET_TIMEOUT,
MAX_PAGE_SIZE,
)
from app.utils.fail2ban_client import (
Fail2BanClient,
@@ -87,7 +86,11 @@ async def get_fail2ban_db_path(socket_path: str) -> str:
async def ban_ip(socket_path: str, jail: str, ip: str) -> None:
"""Ban an IP address in the specified jail."""
"""Ban an IP address in the specified jail.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError or JailOperationError.
Router converts to HTTP 404 or 409.
"""
try:
ipaddress.ip_address(ip)
except ValueError as exc:
@@ -348,6 +351,7 @@ async def list_bans(
source: str = "fail2ban",
page: int = 1,
page_size: int = DEFAULT_PAGE_SIZE,
max_page_size: int = 500,
http_session: aiohttp.ClientSession | None = None,
app_db: aiosqlite.Connection | None = None,
geo_cache: GeoCache | None = None,
@@ -375,8 +379,9 @@ async def list_bans(
range_: Time-range preset (``"24h"``, ``"7d"``, ``"30d"``, or
``"365d"``).
page: 1-based page number (default: ``1``).
page_size: Maximum items per page, capped at ``MAX_PAGE_SIZE``
page_size: Maximum items per page, capped at ``max_page_size``
(default: ``100``).
max_page_size: Deployment-configured maximum page size (default: ``500``).
http_session: Optional shared :class:`aiohttp.ClientSession`. When
provided, :meth:`GeoCache.lookup_batch` is used
for efficient bulk geo resolution.
@@ -393,7 +398,7 @@ async def list_bans(
"""
since: int = since_unix(range_)
effective_page_size: int = min(page_size, MAX_PAGE_SIZE)
effective_page_size: int = min(page_size, max_page_size)
offset: int = (page - 1) * effective_page_size
if source not in ("fail2ban", "archive"):

View File

@@ -0,0 +1,64 @@
"""Error handling contracts for services.
Defines the three allowed error handling patterns so callers know what to
expect from any service method.
Pattern Selection
================
- ABORT_ON_ERROR: Operations where failure must propagate (auth, writes, config changes)
- RETURN_DEFAULT: Informational reads where partial data is acceptable
- PARTIAL_RESULT: Operations on collections where some items may fail independently
Switching patterns is a breaking change — document in changelog.
"""
from __future__ import annotations
ABORT_ON_ERROR = "abort_on_error"
"""Raise an exception. Router converts to HTTP. Use for auth, writes, state changes."""
RETURN_DEFAULT = "return_default"
"""Return empty result and log warning. Never raises. Use for informational reads."""
PARTIAL_RESULT = "partial_result"
"""Return (result, errors) tuple. Use for batch operations on collections."""
class ServiceErrorContract:
"""Documents the error handling pattern for a service or method.
Callers use this to understand how errors affect the return value:
ABORT_ON_ERROR
Raise an exception. Router handles it, converts to HTTP response.
Used for: authentication, authorization, write operations,
state changes, and any operation where partial success is meaningless.
RETURN_DEFAULT
Return empty/None result and log a warning. Caller gets a valid
result with no items, not an error.
Used for: informational reads (list, get) where infrastructure
unavailability should not block the UI.
PARTIAL_RESULT
Return a result that contains both successful items and a list
of errors. Caller decides what to do with each.
Used for: batch operations, multi-item fetches where one item
failing does not invalidate the rest.
"""
ABORT_ON_ERROR = ABORT_ON_ERROR
RETURN_DEFAULT = RETURN_DEFAULT
PARTIAL_RESULT = PARTIAL_RESULT
@classmethod
def doc(cls, pattern: str, *, since: str | None = None) -> str:
"""Return a docstring fragment describing the error pattern."""
desc = {
ABORT_ON_ERROR: "Raises exceptions on error. Router handles conversion to HTTP.",
RETURN_DEFAULT: "Returns empty result and logs warning on error. Never raises.",
PARTIAL_RESULT: "Returns (result, errors) tuple. Errors collected, not raised.",
}[pattern]
if since:
return f"{desc} (Since: {since})"
return desc

View File

@@ -33,7 +33,7 @@ from app.models.history_domain import (
)
from app.repositories import fail2ban_db_repo
from app.repositories import history_archive_repo as default_history_archive_repo
from app.utils.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
from app.utils.constants import DEFAULT_PAGE_SIZE
from app.utils.fail2ban_db_utils import parse_data_json, ts_to_iso
from app.utils.time_utils import since_unix
@@ -184,6 +184,7 @@ async def list_history(
source: str = "fail2ban",
page: int = 1,
page_size: int = DEFAULT_PAGE_SIZE,
max_page_size: int = 500,
http_session: aiohttp.ClientSession | None = None,
geo_enricher: GeoEnricher | None = None,
db: aiosqlite.Connection | None = None,
@@ -203,7 +204,8 @@ async def list_history(
ip_filter: If given, restrict results to bans for this exact IP
(or a prefix — the query uses ``LIKE ip_filter%``).
page: 1-based page number (default: ``1``).
page_size: Maximum items per page, capped at ``MAX_PAGE_SIZE``.
page_size: Maximum items per page, capped at ``max_page_size``.
max_page_size: Deployment-configured maximum page size (default: ``500``).
http_session: Optional shared :class:`aiohttp.ClientSession` (unused;
kept for backward compatibility).
geo_enricher: Optional async callable ``(ip: str) -> GeoInfo | None``.
@@ -216,7 +218,7 @@ async def list_history(
:class:`~app.models.history_domain.DomainHistoryList` with paginated items
and the total matching count.
"""
effective_page_size: int = min(page_size, MAX_PAGE_SIZE)
effective_page_size: int = min(page_size, max_page_size)
# Build WHERE clauses dynamically.
since: int | None = None

View File

@@ -24,12 +24,11 @@ import structlog
from app.exceptions import JailNotFoundError, JailOperationError
from app.models.ban_domain import DomainActiveBan
from app.models.config import BantimeEscalation
from app.models.geo import GeoDetail, IpLookupResponse
from app.models.jail_domain import (
DomainJailBannedIps,
DomainBantimeEscalation,
DomainJail,
DomainJailBannedIps,
DomainJailDetail,
DomainJailList,
DomainJailStatus,
@@ -50,7 +49,6 @@ from app.utils.fail2ban_response import (
to_dict,
)
from app.utils.jail_socket import reload_all
from app.utils.pagination import create_pagination_metadata
from app.utils.runtime_state import JailServiceState # noqa: TC001
if TYPE_CHECKING:
@@ -190,9 +188,8 @@ async def list_jails(socket_path: str, state: JailServiceState) -> DomainJailLis
Returns:
:class:`~app.models.jail_domain.DomainJailList` with all active jails.
Raises:
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises Fail2BanConnectionError on socket
unreachable. Empty jail list is not an error — returns empty DomainJailList.
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -344,10 +341,8 @@ async def get_jail(socket_path: str, name: str) -> DomainJailDetail:
Returns:
:class:`~app.models.jail_domain.DomainJailDetail` with the full jail.
Raises:
JailNotFoundError: If *name* is not a known jail.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -460,11 +455,8 @@ async def start_jail(socket_path: str, name: str) -> None:
socket_path: Path to the fail2ban Unix domain socket.
name: Jail name to start.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -486,10 +478,8 @@ async def stop_jail(socket_path: str, name: str) -> None:
socket_path: Path to the fail2ban Unix domain socket.
name: Jail name to stop.
Raises:
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -514,11 +504,8 @@ async def set_idle(socket_path: str, name: str, *, on: bool) -> None:
name: Jail name.
on: Pass ``True`` to enable idle, ``False`` to disable it.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
state = "on" if on else "off"
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -545,11 +532,8 @@ async def reload_jail(socket_path: str, name: str) -> None:
socket_path: Path to the fail2ban Unix domain socket.
name: Jail name to reload.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -879,10 +863,8 @@ async def get_ignore_list(socket_path: str, name: str) -> list[str]:
Returns:
List of IP addresses and CIDR networks on the jail's ignore list.
Raises:
JailNotFoundError: If *name* is not a known jail.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -932,11 +914,8 @@ async def del_ignore_ip(socket_path: str, name: str, ip: str) -> None:
name: Jail name.
ip: IP address or CIDR network to remove.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -958,10 +937,8 @@ async def get_ignore_self(socket_path: str, name: str) -> bool:
Returns:
``True`` when ``ignoreself`` is enabled for the jail.
Raises:
JailNotFoundError: If *name* is not a known jail.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -981,11 +958,8 @@ async def set_ignore_self(socket_path: str, name: str, *, on: bool) -> None:
name: Jail name.
on: ``True`` to enable ignoreself, ``False`` to disable.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (503).
"""
value = "true" if on else "false"
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)

View File

@@ -15,8 +15,8 @@ from typing import cast
import structlog
from app.exceptions import Fail2BanConnectionError, Fail2BanProtocolError, ServerOperationError
from app.models.server_domain import DomainServerSettings, DomainServerSettingsResult
from app.models.server import ServerSettingsUpdate
from app.models.server_domain import DomainServerSettings, DomainServerSettingsResult
from app.utils.constants import FAIL2BAN_SOCKET_TIMEOUT
from app.utils.fail2ban_client import Fail2BanClient, Fail2BanCommand, Fail2BanResponse
from app.utils.fail2ban_response import ok
@@ -103,6 +103,10 @@ async def get_settings(socket_path: str) -> DomainServerSettingsResult:
Raises:
~app.utils.fail2ban_client.Fail2BanConnectionError: Socket unreachable.
"""
#: Error contract: RETURN_DEFAULT. Fail2ban socket may be unavailable on
#: fresh boot; UI should still render with empty/default values.
#: Error contract: ABORT_ON_ERROR. Raises on invalid response from fail2ban.
#: Router converts Fail2BanConnectionError to HTTP 503.
import asyncio
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -156,9 +160,8 @@ async def update_settings(socket_path: str, update: ServerSettingsUpdate) -> Non
socket_path: Path to the fail2ban Unix domain socket.
update: Partial update payload.
Raises:
ServerOperationError: If any ``set`` command is rejected.
~app.utils.fail2ban_client.Fail2BanConnectionError: Socket unreachable.
Error contract: ABORT_ON_ERROR. Raises ServerOperationError (400) or
Fail2BanConnectionError (503). Router converts to HTTP.
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)

View File

@@ -86,7 +86,7 @@ TIME_RANGE_SLACK_SECONDS: Final[int] = 60
# ---------------------------------------------------------------------------
DEFAULT_PAGE_SIZE: Final[int] = 100
MAX_PAGE_SIZE: Final[int] = 500
"""Default items per page for paginated endpoints."""
# ---------------------------------------------------------------------------
# Blocklist import
@@ -95,9 +95,6 @@ MAX_PAGE_SIZE: Final[int] = 500
BLOCKLIST_IMPORT_DEFAULT_HOUR: Final[int] = 3
"""Default hour (UTC) for the nightly blocklist import job."""
BLOCKLIST_PREVIEW_MAX_LINES: Final[int] = 100
"""Maximum number of IP lines returned by the blocklist preview endpoint."""
# ---------------------------------------------------------------------------
# Health check
# ---------------------------------------------------------------------------