refactor(backend): clean up jail service, add error handling service

- Extract jail status/processing to helper functions
- Add error_handling.py service for centralized error handling
- Update config.py with validation and defaults
- Update .env.example with all config options
- Remove obsolete Tasks.md, add Service-Development.md
- Minor fixes across routers and services

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-03 17:40:37 +02:00
parent 2df029f7e8
commit 2f9fc8076d
15 changed files with 332 additions and 154 deletions

View File

@@ -47,7 +47,6 @@ from app.utils.async_utils import logged_task
from app.utils.constants import (
DEFAULT_PAGE_SIZE,
FAIL2BAN_SOCKET_TIMEOUT,
MAX_PAGE_SIZE,
)
from app.utils.fail2ban_client import (
Fail2BanClient,
@@ -87,7 +86,11 @@ async def get_fail2ban_db_path(socket_path: str) -> str:
async def ban_ip(socket_path: str, jail: str, ip: str) -> None:
"""Ban an IP address in the specified jail."""
"""Ban an IP address in the specified jail.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError or JailOperationError.
Router converts to HTTP 404 or 409.
"""
try:
ipaddress.ip_address(ip)
except ValueError as exc:
@@ -348,6 +351,7 @@ async def list_bans(
source: str = "fail2ban",
page: int = 1,
page_size: int = DEFAULT_PAGE_SIZE,
max_page_size: int = 500,
http_session: aiohttp.ClientSession | None = None,
app_db: aiosqlite.Connection | None = None,
geo_cache: GeoCache | None = None,
@@ -375,8 +379,9 @@ async def list_bans(
range_: Time-range preset (``"24h"``, ``"7d"``, ``"30d"``, or
``"365d"``).
page: 1-based page number (default: ``1``).
page_size: Maximum items per page, capped at ``MAX_PAGE_SIZE``
page_size: Maximum items per page, capped at ``max_page_size``
(default: ``100``).
max_page_size: Deployment-configured maximum page size (default: ``500``).
http_session: Optional shared :class:`aiohttp.ClientSession`. When
provided, :meth:`GeoCache.lookup_batch` is used
for efficient bulk geo resolution.
@@ -393,7 +398,7 @@ async def list_bans(
"""
since: int = since_unix(range_)
effective_page_size: int = min(page_size, MAX_PAGE_SIZE)
effective_page_size: int = min(page_size, max_page_size)
offset: int = (page - 1) * effective_page_size
if source not in ("fail2ban", "archive"):

View File

@@ -0,0 +1,64 @@
"""Error handling contracts for services.
Defines the three allowed error handling patterns so callers know what to
expect from any service method.
Pattern Selection
================
- ABORT_ON_ERROR: Operations where failure must propagate (auth, writes, config changes)
- RETURN_DEFAULT: Informational reads where partial data is acceptable
- PARTIAL_RESULT: Operations on collections where some items may fail independently
Switching patterns is a breaking change — document in changelog.
"""
from __future__ import annotations
ABORT_ON_ERROR = "abort_on_error"
"""Raise an exception. Router converts to HTTP. Use for auth, writes, state changes."""
RETURN_DEFAULT = "return_default"
"""Return empty result and log warning. Never raises. Use for informational reads."""
PARTIAL_RESULT = "partial_result"
"""Return (result, errors) tuple. Use for batch operations on collections."""
class ServiceErrorContract:
"""Documents the error handling pattern for a service or method.
Callers use this to understand how errors affect the return value:
ABORT_ON_ERROR
Raise an exception. Router handles it, converts to HTTP response.
Used for: authentication, authorization, write operations,
state changes, and any operation where partial success is meaningless.
RETURN_DEFAULT
Return empty/None result and log a warning. Caller gets a valid
result with no items, not an error.
Used for: informational reads (list, get) where infrastructure
unavailability should not block the UI.
PARTIAL_RESULT
Return a result that contains both successful items and a list
of errors. Caller decides what to do with each.
Used for: batch operations, multi-item fetches where one item
failing does not invalidate the rest.
"""
ABORT_ON_ERROR = ABORT_ON_ERROR
RETURN_DEFAULT = RETURN_DEFAULT
PARTIAL_RESULT = PARTIAL_RESULT
@classmethod
def doc(cls, pattern: str, *, since: str | None = None) -> str:
"""Return a docstring fragment describing the error pattern."""
desc = {
ABORT_ON_ERROR: "Raises exceptions on error. Router handles conversion to HTTP.",
RETURN_DEFAULT: "Returns empty result and logs warning on error. Never raises.",
PARTIAL_RESULT: "Returns (result, errors) tuple. Errors collected, not raised.",
}[pattern]
if since:
return f"{desc} (Since: {since})"
return desc

View File

@@ -33,7 +33,7 @@ from app.models.history_domain import (
)
from app.repositories import fail2ban_db_repo
from app.repositories import history_archive_repo as default_history_archive_repo
from app.utils.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE
from app.utils.constants import DEFAULT_PAGE_SIZE
from app.utils.fail2ban_db_utils import parse_data_json, ts_to_iso
from app.utils.time_utils import since_unix
@@ -184,6 +184,7 @@ async def list_history(
source: str = "fail2ban",
page: int = 1,
page_size: int = DEFAULT_PAGE_SIZE,
max_page_size: int = 500,
http_session: aiohttp.ClientSession | None = None,
geo_enricher: GeoEnricher | None = None,
db: aiosqlite.Connection | None = None,
@@ -203,7 +204,8 @@ async def list_history(
ip_filter: If given, restrict results to bans for this exact IP
(or a prefix — the query uses ``LIKE ip_filter%``).
page: 1-based page number (default: ``1``).
page_size: Maximum items per page, capped at ``MAX_PAGE_SIZE``.
page_size: Maximum items per page, capped at ``max_page_size``.
max_page_size: Deployment-configured maximum page size (default: ``500``).
http_session: Optional shared :class:`aiohttp.ClientSession` (unused;
kept for backward compatibility).
geo_enricher: Optional async callable ``(ip: str) -> GeoInfo | None``.
@@ -216,7 +218,7 @@ async def list_history(
:class:`~app.models.history_domain.DomainHistoryList` with paginated items
and the total matching count.
"""
effective_page_size: int = min(page_size, MAX_PAGE_SIZE)
effective_page_size: int = min(page_size, max_page_size)
# Build WHERE clauses dynamically.
since: int | None = None

View File

@@ -24,12 +24,11 @@ import structlog
from app.exceptions import JailNotFoundError, JailOperationError
from app.models.ban_domain import DomainActiveBan
from app.models.config import BantimeEscalation
from app.models.geo import GeoDetail, IpLookupResponse
from app.models.jail_domain import (
DomainJailBannedIps,
DomainBantimeEscalation,
DomainJail,
DomainJailBannedIps,
DomainJailDetail,
DomainJailList,
DomainJailStatus,
@@ -50,7 +49,6 @@ from app.utils.fail2ban_response import (
to_dict,
)
from app.utils.jail_socket import reload_all
from app.utils.pagination import create_pagination_metadata
from app.utils.runtime_state import JailServiceState # noqa: TC001
if TYPE_CHECKING:
@@ -190,9 +188,8 @@ async def list_jails(socket_path: str, state: JailServiceState) -> DomainJailLis
Returns:
:class:`~app.models.jail_domain.DomainJailList` with all active jails.
Raises:
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises Fail2BanConnectionError on socket
unreachable. Empty jail list is not an error — returns empty DomainJailList.
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -344,10 +341,8 @@ async def get_jail(socket_path: str, name: str) -> DomainJailDetail:
Returns:
:class:`~app.models.jail_domain.DomainJailDetail` with the full jail.
Raises:
JailNotFoundError: If *name* is not a known jail.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -460,11 +455,8 @@ async def start_jail(socket_path: str, name: str) -> None:
socket_path: Path to the fail2ban Unix domain socket.
name: Jail name to start.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -486,10 +478,8 @@ async def stop_jail(socket_path: str, name: str) -> None:
socket_path: Path to the fail2ban Unix domain socket.
name: Jail name to stop.
Raises:
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -514,11 +504,8 @@ async def set_idle(socket_path: str, name: str, *, on: bool) -> None:
name: Jail name.
on: Pass ``True`` to enable idle, ``False`` to disable it.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
state = "on" if on else "off"
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -545,11 +532,8 @@ async def reload_jail(socket_path: str, name: str) -> None:
socket_path: Path to the fail2ban Unix domain socket.
name: Jail name to reload.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (502).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -879,10 +863,8 @@ async def get_ignore_list(socket_path: str, name: str) -> list[str]:
Returns:
List of IP addresses and CIDR networks on the jail's ignore list.
Raises:
JailNotFoundError: If *name* is not a known jail.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -932,11 +914,8 @@ async def del_ignore_ip(socket_path: str, name: str, ip: str) -> None:
name: Jail name.
ip: IP address or CIDR network to remove.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -958,10 +937,8 @@ async def get_ignore_self(socket_path: str, name: str) -> bool:
Returns:
``True`` when ``ignoreself`` is enabled for the jail.
Raises:
JailNotFoundError: If *name* is not a known jail.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
Fail2BanConnectionError (503).
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
try:
@@ -981,11 +958,8 @@ async def set_ignore_self(socket_path: str, name: str, *, on: bool) -> None:
name: Jail name.
on: ``True`` to enable ignoreself, ``False`` to disable.
Raises:
JailNotFoundError: If *name* is not a known jail.
JailOperationError: If fail2ban reports the operation failed.
~app.utils.fail2ban_client.Fail2BanConnectionError: If the socket
cannot be reached.
Error contract: ABORT_ON_ERROR. Raises JailNotFoundError (404),
JailOperationError (409), Fail2BanConnectionError (503).
"""
value = "true" if on else "false"
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)

View File

@@ -15,8 +15,8 @@ from typing import cast
import structlog
from app.exceptions import Fail2BanConnectionError, Fail2BanProtocolError, ServerOperationError
from app.models.server_domain import DomainServerSettings, DomainServerSettingsResult
from app.models.server import ServerSettingsUpdate
from app.models.server_domain import DomainServerSettings, DomainServerSettingsResult
from app.utils.constants import FAIL2BAN_SOCKET_TIMEOUT
from app.utils.fail2ban_client import Fail2BanClient, Fail2BanCommand, Fail2BanResponse
from app.utils.fail2ban_response import ok
@@ -103,6 +103,10 @@ async def get_settings(socket_path: str) -> DomainServerSettingsResult:
Raises:
~app.utils.fail2ban_client.Fail2BanConnectionError: Socket unreachable.
"""
#: Error contract: RETURN_DEFAULT. Fail2ban socket may be unavailable on
#: fresh boot; UI should still render with empty/default values.
#: Error contract: ABORT_ON_ERROR. Raises on invalid response from fail2ban.
#: Router converts Fail2BanConnectionError to HTTP 503.
import asyncio
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)
@@ -156,9 +160,8 @@ async def update_settings(socket_path: str, update: ServerSettingsUpdate) -> Non
socket_path: Path to the fail2ban Unix domain socket.
update: Partial update payload.
Raises:
ServerOperationError: If any ``set`` command is rejected.
~app.utils.fail2ban_client.Fail2BanConnectionError: Socket unreachable.
Error contract: ABORT_ON_ERROR. Raises ServerOperationError (400) or
Fail2BanConnectionError (503). Router converts to HTTP.
"""
client = Fail2BanClient(socket_path=socket_path, timeout=FAIL2BAN_SOCKET_TIMEOUT)