This commit adds support for shipping logs to external centralized logging platforms, addressing the MEDIUM priority task for structured logging infrastructure. ## Key Changes: ### 1. New Documentation: Docs/Observability.md - Comprehensive guide to logging architecture and configuration - Covers all three supported platforms (Datadog, Papertrail, Elasticsearch) - Includes best practices, security considerations, and troubleshooting - Documents sensitive data handling and compliance requirements ### 2. Core Implementation: app/utils/external_logging.py - ExternalLogHandler: Abstract base class for non-blocking log delivery - DatadogLogHandler: HTTP API integration with JSON payloads - PapertrailLogHandler: Syslog protocol over TCP - ElasticsearchLogHandler: Bulk API integration with NDJSON format - Features: - Async buffering with configurable batch size and flush interval - Exponential backoff retry logic - Non-blocking delivery (never blocks application logic) - Proper error handling and internal logging - Lifecycle management (start/shutdown) ### 3. Configuration: app/config.py - New Settings fields for external logging: - external_logging_enabled (default: False) - external_logging_provider (datadog/papertrail/elasticsearch) - external_logging_buffer_size (default: 1000) - external_logging_flush_interval_seconds (default: 5.0) - Provider-specific configuration (API keys, hosts, batch sizes) - All fields have sensible defaults - Full field validation and normalization ### 4. Integration: app/main.py - Global _external_log_handler for application lifecycle - _external_logging_processor: structlog processor for handler integration - Updated _configure_logging(): Add handler to processor chain when enabled - Updated _lifespan(): Initialize handler before startup, shutdown on termination ### 5. Tests: backend/tests/test_external_logging.py - 20 comprehensive tests covering all handlers and factory - Configuration validation tests - All tests passing ## Design Decisions: 1. **Non-blocking Delivery**: External logging never blocks request handling. Failures are logged locally but don't impact application. 2. **Buffering Strategy**: In-memory buffer with configurable size prevents unbounded memory growth. When buffer fills, oldest logs are dropped with a warning. 3. **Retry Logic**: Transient failures (timeouts, 5xx errors) are retried with exponential backoff. Permanent failures (bad credentials) are logged and skipped. 4. **Disabled by Default**: External logging is opt-in via environment variables, maintaining backward compatibility with existing deployments. 5. **Provider Flexibility**: Support for multiple platforms allows users to choose based on their infrastructure (cloud-native, on-premise, etc). ## Backward Compatibility: - All new configuration fields have defaults - External logging disabled by default - No changes to existing logging behavior unless explicitly configured - No new required dependencies ## Testing: - All 20 new tests passing - Existing tests unaffected (same count of passing tests) - Configuration validation tested - Handler creation and lifecycle management tested Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1012 lines
35 KiB
Python
1012 lines
35 KiB
Python
"""BanGUI FastAPI application factory.
|
|
|
|
Call :func:`create_app` to obtain a configured :class:`fastapi.FastAPI`
|
|
instance suitable for direct use with an ASGI server (e.g. ``uvicorn``) or
|
|
in tests via ``httpx.AsyncClient``.
|
|
|
|
The lifespan handler manages all shared resources — database connection, HTTP
|
|
session, and scheduler — so every component can rely on them being available
|
|
on ``app.state`` throughout the request lifecycle.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import sys
|
|
from contextlib import asynccontextmanager
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import AsyncGenerator, Awaitable, Callable
|
|
|
|
from starlette.responses import Response as StarletteResponse
|
|
|
|
import structlog
|
|
from fastapi import FastAPI, HTTPException, Request, status
|
|
from fastapi.exceptions import RequestValidationError
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import JSONResponse, RedirectResponse
|
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
|
|
from app import __version__
|
|
from app.config import Settings, get_settings
|
|
from app.exceptions import (
|
|
AuthenticationError,
|
|
BadRequestError,
|
|
ConflictError,
|
|
DomainError,
|
|
Fail2BanConnectionError,
|
|
Fail2BanProtocolError,
|
|
NotFoundError,
|
|
OperationError,
|
|
RateLimitError,
|
|
ServiceUnavailableError,
|
|
)
|
|
from app.middleware.correlation import CorrelationIdMiddleware
|
|
from app.middleware.csrf import CsrfMiddleware
|
|
from app.middleware.rate_limit import RateLimitMiddleware
|
|
from app.models.response import ErrorResponse
|
|
from app.routers import (
|
|
auth,
|
|
bans,
|
|
blocklist,
|
|
config,
|
|
dashboard,
|
|
file_config,
|
|
geo,
|
|
health,
|
|
history,
|
|
jails,
|
|
server,
|
|
setup,
|
|
)
|
|
from app.startup import startup_shared_resources
|
|
from app.utils.external_logging import (
|
|
ExternalLogHandler,
|
|
create_external_log_handler,
|
|
)
|
|
from app.utils.rate_limiter import GlobalRateLimiter, RateLimiter
|
|
from app.utils.runtime_state import ApplicationState, RuntimeState
|
|
from app.utils.scheduler_lock import release_scheduler_lock
|
|
from app.utils.session_cache import InMemorySessionCache, NoOpSessionCache
|
|
from app.utils.setup_state import is_setup_complete_cached, set_setup_complete_cache
|
|
|
|
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Logging configuration
|
|
# ---------------------------------------------------------------------------
|
|
|
|
_external_log_handler: ExternalLogHandler | None = None
|
|
|
|
|
|
def _external_logging_processor(
|
|
logger: logging.Logger, method_name: str, event_dict: dict[str, Any]
|
|
) -> dict[str, Any]:
|
|
"""Structlog processor that queues logs to external logging handler.
|
|
|
|
Args:
|
|
logger: The logger instance.
|
|
method_name: The name of the method called on the logger.
|
|
event_dict: The event dictionary from structlog.
|
|
|
|
Returns:
|
|
The event dictionary unchanged (other processors handle rendering).
|
|
"""
|
|
if _external_log_handler is not None:
|
|
_external_log_handler.queue_log(event_dict.copy())
|
|
return event_dict
|
|
|
|
|
|
def _configure_logging(log_level: str, settings: Settings | None = None) -> None:
|
|
"""Configure structlog for production JSON output.
|
|
|
|
Args:
|
|
log_level: One of ``debug``, ``info``, ``warning``, ``error``, ``critical``.
|
|
settings: Optional Settings object to configure external logging.
|
|
"""
|
|
level: int = logging.getLevelName(log_level.upper())
|
|
logging.basicConfig(level=level, stream=sys.stdout, format="%(message)s")
|
|
|
|
processors = [
|
|
structlog.contextvars.merge_contextvars,
|
|
structlog.stdlib.filter_by_level,
|
|
structlog.processors.TimeStamper(fmt="iso"),
|
|
structlog.stdlib.add_logger_name,
|
|
structlog.stdlib.add_log_level,
|
|
structlog.stdlib.PositionalArgumentsFormatter(),
|
|
structlog.processors.StackInfoRenderer(),
|
|
structlog.processors.format_exc_info,
|
|
structlog.processors.UnicodeDecoder(),
|
|
]
|
|
|
|
if settings and settings.external_logging_enabled and settings.external_logging_provider:
|
|
processors.append(_external_logging_processor)
|
|
|
|
processors.append(structlog.processors.JSONRenderer())
|
|
|
|
structlog.configure(
|
|
processors=processors,
|
|
wrapper_class=structlog.stdlib.BoundLogger,
|
|
context_class=dict,
|
|
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
cache_logger_on_first_use=True,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Lifespan
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _update_session_cache(app: FastAPI, settings: Settings) -> None:
|
|
"""Update the session cache backend based on settings.
|
|
|
|
Replaces the current cache with InMemorySessionCache or NoOpSessionCache
|
|
depending on whether session caching is enabled and configured with a
|
|
positive TTL.
|
|
|
|
Args:
|
|
app: The :class:`fastapi.FastAPI` instance.
|
|
settings: The effective application settings.
|
|
"""
|
|
cache_enabled = settings.session_cache_enabled and settings.session_cache_ttl_seconds > 0.0
|
|
app.state.session_cache = (
|
|
InMemorySessionCache() if cache_enabled else NoOpSessionCache()
|
|
)
|
|
|
|
|
|
@asynccontextmanager
|
|
async def _lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
|
"""Manage the lifetime of all shared application resources.
|
|
|
|
Resources are initialised in order on startup and released in reverse
|
|
order on shutdown. They are stored on ``app.state`` so they are
|
|
accessible to dependency providers and tests.
|
|
|
|
The scheduler lock is released on shutdown to allow other instances to
|
|
acquire it during rolling deployments or after a crash.
|
|
|
|
Args:
|
|
app: The :class:`fastapi.FastAPI` instance being started.
|
|
"""
|
|
global _external_log_handler # noqa: PLW0603
|
|
|
|
settings: Settings = app.state.settings
|
|
|
|
http_session, scheduler, startup_db = await startup_shared_resources(app, settings)
|
|
app.state.http_session = http_session
|
|
app.state.scheduler = scheduler
|
|
app.state.startup_db = startup_db
|
|
|
|
# Initialize external logging handler before configuring logging
|
|
_external_log_handler = None
|
|
if settings.external_logging_enabled and settings.external_logging_provider:
|
|
try:
|
|
_external_log_handler = create_external_log_handler(
|
|
provider=settings.external_logging_provider,
|
|
api_key=settings.datadog_api_key,
|
|
datadog_site=settings.datadog_site,
|
|
datadog_batch_size=settings.datadog_batch_size,
|
|
papertrail_host=settings.papertrail_host,
|
|
papertrail_port=settings.papertrail_port,
|
|
papertrail_program_name=settings.papertrail_program_name,
|
|
elasticsearch_hosts=settings.elasticsearch_hosts,
|
|
elasticsearch_index_prefix=settings.elasticsearch_index_prefix,
|
|
elasticsearch_batch_size=settings.elasticsearch_batch_size,
|
|
flush_interval_seconds=settings.external_logging_flush_interval_seconds,
|
|
buffer_size=settings.external_logging_buffer_size,
|
|
http_session=http_session,
|
|
)
|
|
if _external_log_handler:
|
|
_external_log_handler.start_periodic_flush()
|
|
except ValueError as exc:
|
|
log.warning(
|
|
"external_logging_initialization_failed",
|
|
error=str(exc),
|
|
)
|
|
|
|
# Now configure logging with the handler in place
|
|
_configure_logging(settings.log_level, settings)
|
|
|
|
log.info("bangui_starting_up", database_path=settings.database_path)
|
|
|
|
# Ensure session cache is initialized based on effective settings.
|
|
# This cache is process-local and not cluster-safe. In multi-worker
|
|
# deployments, it should be replaced with a shared backend.
|
|
_update_session_cache(app, settings)
|
|
|
|
# Initialize the login rate limiter (5 attempts per 60 seconds per IP).
|
|
# This is process-local and not cluster-safe. In multi-worker deployments,
|
|
# each worker has independent counters, limiting the blast radius of attacks.
|
|
app.state.login_rate_limiter = RateLimiter(max_attempts=5, window_seconds=60)
|
|
|
|
# Initialize the global rate limiter (200 requests per 60 seconds per IP).
|
|
# Applied to all endpoints via middleware. Process-local implementation.
|
|
app.state.global_rate_limiter = GlobalRateLimiter(max_requests=200, window_seconds=60)
|
|
|
|
log.info("bangui_started")
|
|
|
|
try:
|
|
yield
|
|
finally:
|
|
log.info("bangui_shutting_down")
|
|
scheduler.shutdown(wait=False)
|
|
await http_session.close()
|
|
|
|
# Shutdown external logging handler
|
|
if _external_log_handler:
|
|
try:
|
|
await _external_log_handler.shutdown()
|
|
except Exception as exc:
|
|
log.error("external_logging_shutdown_failed", error=str(exc))
|
|
|
|
# Release the scheduler lock to allow other instances to take over
|
|
try:
|
|
await release_scheduler_lock(startup_db)
|
|
except Exception as e:
|
|
log.error("scheduler_lock_release_failed", error=str(e))
|
|
finally:
|
|
await startup_db.close()
|
|
log.info("bangui_shut_down")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Exception handlers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _get_error_code(exc: Exception) -> str:
|
|
"""Get the machine-readable error code from an exception.
|
|
|
|
First checks if the exception has an error_code class attribute.
|
|
Falls back to converting the exception class name to snake_case.
|
|
|
|
Args:
|
|
exc: The exception instance.
|
|
|
|
Returns:
|
|
A snake_case error code string.
|
|
"""
|
|
if hasattr(exc, "error_code"):
|
|
return exc.error_code
|
|
|
|
exc_name = exc.__class__.__name__
|
|
snake_case = re.sub(r"(?<!^)(?=[A-Z])", "_", exc_name).lower()
|
|
return snake_case
|
|
|
|
|
|
def _get_error_metadata(exc: Exception) -> dict[str, str | int | float | bool | None]:
|
|
"""Get structured metadata from an exception.
|
|
|
|
Calls the exception's get_error_metadata() method if available.
|
|
|
|
Args:
|
|
exc: The exception instance.
|
|
|
|
Returns:
|
|
A dictionary of metadata safe for API responses.
|
|
"""
|
|
if hasattr(exc, "get_error_metadata") and callable(exc.get_error_metadata):
|
|
return exc.get_error_metadata()
|
|
return {}
|
|
|
|
|
|
def _get_correlation_id(request: Request) -> str | None:
|
|
"""Extract correlation ID from request state if available.
|
|
|
|
The correlation ID is set by CorrelationIdMiddleware.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
|
|
Returns:
|
|
The correlation ID string, or None if not present.
|
|
"""
|
|
return getattr(request.state, "correlation_id", None)
|
|
|
|
|
|
async def _unhandled_exception_handler(
|
|
request: Request,
|
|
exc: Exception,
|
|
) -> JSONResponse:
|
|
"""Return a sanitised 500 JSON response for any unhandled exception.
|
|
|
|
The exception is logged with full context before the response is sent.
|
|
No stack trace is leaked to the client.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The unhandled exception.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 500.
|
|
"""
|
|
log.error(
|
|
"unhandled_exception",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
exc_info=exc,
|
|
)
|
|
error_response = ErrorResponse(
|
|
code="internal_error",
|
|
detail="An unexpected error occurred. Please try again later.",
|
|
metadata={},
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=500,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _fail2ban_connection_handler(
|
|
request: Request,
|
|
exc: Fail2BanConnectionError,
|
|
) -> JSONResponse:
|
|
"""Return a ``502 Bad Gateway`` response when fail2ban is unreachable.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`~app.exceptions.Fail2BanConnectionError`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 502.
|
|
"""
|
|
log.warning(
|
|
"fail2ban_connection_error",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code="fail2ban_unreachable",
|
|
detail="Cannot reach the fail2ban service. Check the server status page.",
|
|
metadata={"socket_path": exc.socket_path},
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=502,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _fail2ban_protocol_handler(
|
|
request: Request,
|
|
exc: Fail2BanProtocolError,
|
|
) -> JSONResponse:
|
|
"""Return a ``502 Bad Gateway`` response for fail2ban protocol errors.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`~app.exceptions.Fail2BanProtocolError`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 502.
|
|
"""
|
|
log.warning(
|
|
"fail2ban_protocol_error",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code="fail2ban_protocol_error",
|
|
detail="Cannot reach the fail2ban service. Check the server status page.",
|
|
metadata={},
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=502,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _not_found_handler(
|
|
request: Request,
|
|
exc: NotFoundError,
|
|
) -> JSONResponse:
|
|
"""Return a ``404 Not Found`` response for missing domain entities.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The not-found exception.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 404.
|
|
"""
|
|
log.warning(
|
|
"domain_not_found",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _bad_request_handler(
|
|
request: Request,
|
|
exc: BadRequestError,
|
|
) -> JSONResponse:
|
|
"""Return a ``400 Bad Request`` response for validation and domain contract errors.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The validation exception.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 400.
|
|
"""
|
|
log.warning(
|
|
"domain_bad_request",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _conflict_handler(
|
|
request: Request,
|
|
exc: ConflictError,
|
|
) -> JSONResponse:
|
|
"""Return a ``409 Conflict`` response for domain state conflicts."""
|
|
log.warning(
|
|
"domain_conflict",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_409_CONFLICT,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _domain_error_handler(
|
|
request: Request,
|
|
exc: DomainError,
|
|
) -> JSONResponse:
|
|
"""Return a ``500 Internal Server Error`` response for domain write failures."""
|
|
log.error(
|
|
"domain_internal_error",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
exc_info=exc,
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _value_error_handler(
|
|
request: Request,
|
|
exc: ValueError,
|
|
) -> JSONResponse:
|
|
"""Return a ``400 Bad Request`` response for validation and value errors.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`ValueError`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 400.
|
|
"""
|
|
log.warning(
|
|
"value_error",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code="invalid_input",
|
|
detail=str(exc),
|
|
metadata={},
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _service_unavailable_handler(
|
|
request: Request,
|
|
exc: ServiceUnavailableError,
|
|
) -> JSONResponse:
|
|
"""Return a ``503 Service Unavailable`` response for infrastructure errors.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The infrastructure exception (e.g., ConfigDirError).
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 503.
|
|
"""
|
|
log.warning(
|
|
"service_unavailable",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _authentication_error_handler(
|
|
request: Request,
|
|
exc: AuthenticationError,
|
|
) -> JSONResponse:
|
|
"""Return a ``401 Unauthorized`` response for authentication failures.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`~app.exceptions.AuthenticationError`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 401.
|
|
"""
|
|
log.warning(
|
|
"authentication_error",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
async def _rate_limit_error_handler(
|
|
request: Request,
|
|
exc: RateLimitError,
|
|
) -> JSONResponse:
|
|
"""Return a ``429 Too Many Requests`` response for rate limit exceeded errors.
|
|
|
|
Uses dynamic Retry-After header based on the actual rate limit configuration.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`~app.exceptions.RateLimitError`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 429 and Retry-After header.
|
|
"""
|
|
log.warning(
|
|
"rate_limit_exceeded",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error=str(exc),
|
|
retry_after_seconds=exc.retry_after_seconds,
|
|
)
|
|
error_response = ErrorResponse(
|
|
code=_get_error_code(exc),
|
|
detail=str(exc),
|
|
metadata=_get_error_metadata(exc),
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
return JSONResponse(
|
|
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
|
content=error_response.model_dump(),
|
|
headers={"Retry-After": str(int(exc.retry_after_seconds))},
|
|
)
|
|
|
|
|
|
async def _http_exception_handler(
|
|
request: Request,
|
|
exc: HTTPException,
|
|
) -> JSONResponse:
|
|
"""Return a standardized error response for FastAPI HTTPException.
|
|
|
|
This handler standardizes responses from FastAPI validation errors,
|
|
path parameter mismatches, and other built-in validation failures
|
|
to use the ErrorResponse envelope with a machine-readable error code.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`fastapi.HTTPException`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with the original status code.
|
|
"""
|
|
log.warning(
|
|
"http_exception",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
status_code=exc.status_code,
|
|
error=exc.detail,
|
|
)
|
|
|
|
error_code_map = {
|
|
status.HTTP_400_BAD_REQUEST: "invalid_input",
|
|
status.HTTP_401_UNAUTHORIZED: "authentication_required",
|
|
status.HTTP_403_FORBIDDEN: "forbidden",
|
|
status.HTTP_404_NOT_FOUND: "not_found",
|
|
status.HTTP_409_CONFLICT: "conflict",
|
|
status.HTTP_422_UNPROCESSABLE_ENTITY: "invalid_input",
|
|
status.HTTP_429_TOO_MANY_REQUESTS: "rate_limit_exceeded",
|
|
status.HTTP_500_INTERNAL_SERVER_ERROR: "internal_error",
|
|
status.HTTP_503_SERVICE_UNAVAILABLE: "service_unavailable",
|
|
}
|
|
|
|
error_code = error_code_map.get(exc.status_code, "internal_error")
|
|
error_response = ErrorResponse(
|
|
code=error_code,
|
|
detail=exc.detail,
|
|
metadata={},
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
|
|
return JSONResponse(
|
|
status_code=exc.status_code,
|
|
content=error_response.model_dump(),
|
|
headers=exc.headers or {},
|
|
)
|
|
|
|
|
|
async def _request_validation_error_handler(
|
|
request: Request,
|
|
exc: RequestValidationError,
|
|
) -> JSONResponse:
|
|
"""Return a standardized error response for Pydantic validation errors.
|
|
|
|
Converts FastAPI's RequestValidationError to our unified ErrorResponse format.
|
|
Aggregates validation errors into metadata for the client to handle.
|
|
|
|
Args:
|
|
request: The incoming FastAPI request.
|
|
exc: The :class:`fastapi.exceptions.RequestValidationError`.
|
|
|
|
Returns:
|
|
A :class:`fastapi.responses.JSONResponse` with status 400.
|
|
"""
|
|
log.warning(
|
|
"request_validation_error",
|
|
path=request.url.path,
|
|
method=request.method,
|
|
error_count=len(exc.errors()),
|
|
)
|
|
|
|
validation_errors = exc.errors()
|
|
error_details: dict[str, str | int | float | bool | None] = {}
|
|
|
|
if validation_errors:
|
|
error_details["field_errors"] = len(validation_errors)
|
|
first_error = validation_errors[0]
|
|
error_details["first_field"] = ".".join(str(x) for x in first_error["loc"])
|
|
|
|
error_response = ErrorResponse(
|
|
code="invalid_input",
|
|
detail="Request validation failed.",
|
|
metadata=error_details,
|
|
correlation_id=_get_correlation_id(request),
|
|
)
|
|
|
|
return JSONResponse(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
content=error_response.model_dump(),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Setup-redirect middleware
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Exact paths that are always reachable, even before setup is complete.
|
|
# Using exact matching prevents fragile prefix-based allowlists. For example,
|
|
# if we used startswith(), a future route like /api/setup-debug would bypass
|
|
# the guard without being explicitly allowed.
|
|
_EXACT_ALLOWED: frozenset[str] = frozenset(
|
|
{
|
|
"/api/setup", # GET/POST /api/setup
|
|
"/api/health", # Health check endpoint
|
|
"/api/docs", # Swagger UI
|
|
"/api/redoc", # ReDoc
|
|
"/api/openapi.json", # OpenAPI schema
|
|
},
|
|
)
|
|
|
|
# Prefix paths that are always reachable. These MUST end with "/" to prevent
|
|
# matching paths like "/api/setup-debug" while still matching nested routes
|
|
# like "/api/setup/timezone".
|
|
_PREFIX_ALLOWED: frozenset[str] = frozenset(
|
|
{
|
|
"/api/setup/", # Nested setup routes (e.g., /api/setup/timezone)
|
|
},
|
|
)
|
|
|
|
|
|
# Security headers constants
|
|
_CSP_POLICY: str = "default-src 'self'"
|
|
_X_FRAME_OPTIONS: str = "DENY"
|
|
_X_CONTENT_TYPE_OPTIONS: str = "nosniff"
|
|
_X_XSS_PROTECTION: str = "1; mode=block"
|
|
|
|
|
|
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
|
|
"""Add security-related HTTP response headers to prevent common attacks.
|
|
|
|
This middleware adds the following headers to every HTTP response:
|
|
- Content-Security-Policy: Prevents XSS by restricting resource origins
|
|
- X-Frame-Options: Prevents clickjacking by controlling iframe embedding
|
|
- X-Content-Type-Options: Prevents MIME-sniffing attacks
|
|
- X-XSS-Protection: Enables browser XSS protection (legacy header)
|
|
|
|
These headers implement defense-in-depth against client-side attacks
|
|
by relying on browser security policies rather than server-side logic alone.
|
|
"""
|
|
|
|
async def dispatch(
|
|
self,
|
|
request: Request,
|
|
call_next: Callable[[Request], Awaitable[StarletteResponse]],
|
|
) -> StarletteResponse:
|
|
"""Intercept responses to inject security headers.
|
|
|
|
Args:
|
|
request: The incoming HTTP request.
|
|
call_next: The next middleware / router handler.
|
|
|
|
Returns:
|
|
The response from the next middleware / router with security headers added.
|
|
"""
|
|
response: StarletteResponse = await call_next(request)
|
|
|
|
response.headers["Content-Security-Policy"] = _CSP_POLICY
|
|
response.headers["X-Frame-Options"] = _X_FRAME_OPTIONS
|
|
response.headers["X-Content-Type-Options"] = _X_CONTENT_TYPE_OPTIONS
|
|
response.headers["X-XSS-Protection"] = _X_XSS_PROTECTION
|
|
|
|
return response
|
|
|
|
|
|
class SetupRedirectMiddleware(BaseHTTPMiddleware):
|
|
"""Redirect all API requests to ``/api/setup`` until setup is done.
|
|
|
|
Once setup is complete this middleware is a no-op. Paths listed in
|
|
:data:`_EXACT_ALLOWED` and :data:`_PREFIX_ALLOWED` are exempt so the
|
|
setup endpoint and dependencies (health, docs, openapi schema) are always
|
|
reachable.
|
|
|
|
This middleware uses explicit path matching rather than prefix-based rules
|
|
to prevent fragile allowlists. For example, using startswith() could
|
|
accidentally allow paths like /api/setup-debug that shouldn't bypass
|
|
the setup guard.
|
|
"""
|
|
|
|
async def dispatch(
|
|
self,
|
|
request: Request,
|
|
call_next: Callable[[Request], Awaitable[StarletteResponse]],
|
|
) -> StarletteResponse:
|
|
"""Intercept requests before they reach the router.
|
|
|
|
Args:
|
|
request: The incoming HTTP request.
|
|
call_next: The next middleware / router handler.
|
|
|
|
Returns:
|
|
Either a ``307 Temporary Redirect`` to ``/api/setup`` or the
|
|
normal router response.
|
|
"""
|
|
# Remove trailing slash for consistent path comparison.
|
|
# Note: request.url.path does not include query parameters, so those
|
|
# don't need special handling.
|
|
path: str = request.url.path.rstrip("/") or "/"
|
|
|
|
# Check if path is in the explicit allowlist (exact match).
|
|
if path in _EXACT_ALLOWED:
|
|
return await call_next(request)
|
|
|
|
# Check if path matches any allowed prefix. Prefixes in _PREFIX_ALLOWED
|
|
# end with "/" to prevent accidental matches. For example:
|
|
# - "/api/setup/" matches "/api/setup/timezone" (prefix match)
|
|
# - "/api/setup/" does NOT match "/api/setup-debug" (exact prefix without /)
|
|
for prefix in _PREFIX_ALLOWED:
|
|
if path == prefix.rstrip("/") or path.startswith(prefix):
|
|
return await call_next(request)
|
|
|
|
# If setup is not complete, block all other API requests.
|
|
# The setup completion state is resolved at startup and stored in
|
|
# ``app.state.setup_complete_cached`` so this middleware does not
|
|
# perform any database queries during normal request handling.
|
|
if path.startswith("/api") and not is_setup_complete_cached(request.app):
|
|
return RedirectResponse(
|
|
url="/api/setup",
|
|
status_code=status.HTTP_307_TEMPORARY_REDIRECT,
|
|
)
|
|
|
|
return await call_next(request)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Application factory
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def create_app(settings: Settings | None = None) -> FastAPI:
|
|
"""Create and configure the BanGUI FastAPI application.
|
|
|
|
This factory is the single entry point for creating the application.
|
|
Tests can pass a custom ``settings`` object to override defaults
|
|
without touching environment variables.
|
|
|
|
Args:
|
|
settings: Optional pre-built :class:`~app.config.Settings` instance.
|
|
If ``None``, settings are loaded from the environment via
|
|
:func:`~app.config.get_settings`.
|
|
|
|
Returns:
|
|
A fully configured :class:`fastapi.FastAPI` application ready for use.
|
|
"""
|
|
resolved_settings: Settings = settings if settings is not None else get_settings()
|
|
|
|
# Configure API docs based on enable_docs setting.
|
|
# In production, docs are disabled (None). In development, docs are served at /api/*.
|
|
docs_url = "/api/docs" if resolved_settings.enable_docs else None
|
|
redoc_url = "/api/redoc" if resolved_settings.enable_docs else None
|
|
openapi_url = "/api/openapi.json" if resolved_settings.enable_docs else None
|
|
|
|
app: FastAPI = FastAPI(
|
|
title="BanGUI",
|
|
description="Web interface for monitoring, managing, and configuring fail2ban.",
|
|
version=__version__,
|
|
lifespan=_lifespan,
|
|
docs_url=docs_url,
|
|
redoc_url=redoc_url,
|
|
openapi_url=openapi_url,
|
|
)
|
|
|
|
# Store immutable configuration and the dedicated runtime state manager on
|
|
# app.state. Runtime state values are proxied through the wrapper so the
|
|
# shared Starlette state bag itself does not hold mutable business state.
|
|
app.state = ApplicationState(RuntimeState())
|
|
app.state.settings = resolved_settings
|
|
app.state.session_cache = (
|
|
InMemorySessionCache()
|
|
if resolved_settings.session_cache_enabled and resolved_settings.session_cache_ttl_seconds > 0.0
|
|
else NoOpSessionCache()
|
|
)
|
|
# Initialize the login rate limiter (5 attempts per 60 seconds per IP).
|
|
# This is also re-initialized in the lifespan, but must be present here
|
|
# for tests that bypass the lifespan via ASGITransport.
|
|
app.state.login_rate_limiter = RateLimiter(max_attempts=5, window_seconds=60)
|
|
|
|
# Initialize the global rate limiter (200 requests per 60 seconds per IP).
|
|
# This is also re-initialized in the lifespan, but must be present here
|
|
# for tests that bypass the lifespan via ASGITransport.
|
|
app.state.global_rate_limiter = GlobalRateLimiter(max_requests=200, window_seconds=60)
|
|
|
|
set_setup_complete_cache(app, False)
|
|
|
|
# --- CORS ---
|
|
# Allow origins configured by the runtime environment. In production,
|
|
# this should be explicitly set to the frontend origin(s) or left empty
|
|
# when the UI is served from the same origin as the API.
|
|
if resolved_settings.cors_allowed_origins:
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=resolved_settings.cors_allowed_origins,
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# --- Middleware ---
|
|
# Note: middleware is applied in reverse order of registration.
|
|
# SecurityHeadersMiddleware must run early but after CORS/CSRF so headers
|
|
# are added to all responses including error responses.
|
|
# CorrelationIdMiddleware must run first (added last) so correlation ID
|
|
# is available to all downstream handlers and loggers.
|
|
app.add_middleware(CorrelationIdMiddleware)
|
|
app.add_middleware(SecurityHeadersMiddleware)
|
|
app.add_middleware(SetupRedirectMiddleware)
|
|
app.add_middleware(CsrfMiddleware)
|
|
app.add_middleware(
|
|
RateLimitMiddleware,
|
|
rate_limiter=app.state.global_rate_limiter,
|
|
settings=resolved_settings,
|
|
)
|
|
|
|
|
|
# --- Exception handlers ---
|
|
#
|
|
# Exception handlers are registered from most specific to least specific. FastAPI evaluates
|
|
# them in registration order, allowing specific handlers to match before fallback handlers.
|
|
#
|
|
# The hierarchy (in order) is:
|
|
# 1. Network-specific errors (Fail2BanConnectionError, Fail2BanProtocolError) → HTTP 502
|
|
# 2. Auth/rate-limit errors (AuthenticationError, RateLimitError) → HTTP 401/429
|
|
# 3. Category handlers (NotFoundError, BadRequestError, ConflictError) → HTTP 404/400/409
|
|
# 4. OperationError handler → HTTP 500
|
|
# 5. ServiceUnavailableError handler → HTTP 503
|
|
# 6. Generic DomainError handler (catch-all for any unregistered DomainError subclass) → HTTP 500
|
|
# 7. RequestValidationError handler (Pydantic validation errors) → HTTP 400
|
|
# 8. HTTPException (FastAPI built-ins) → HTTP varies
|
|
# 9. ValueError (Pydantic validation) → HTTP 400
|
|
# 10. Exception (absolute catch-all for unexpected errors) → HTTP 500
|
|
#
|
|
# This ensures that any new DomainError subclass that inherits from a registered category
|
|
# is automatically handled with the correct error_code and metadata. If a developer adds
|
|
# a DomainError subclass without putting it in a category, it falls through to the
|
|
# generic DomainError handler rather than the unhandled_exception_handler.
|
|
app.add_exception_handler(Fail2BanConnectionError, _fail2ban_connection_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(Fail2BanProtocolError, _fail2ban_protocol_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(AuthenticationError, _authentication_error_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(RateLimitError, _rate_limit_error_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(NotFoundError, _not_found_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(BadRequestError, _bad_request_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(ConflictError, _conflict_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(OperationError, _domain_error_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(ServiceUnavailableError, _service_unavailable_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(DomainError, _domain_error_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(RequestValidationError, _request_validation_error_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(HTTPException, _http_exception_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(ValueError, _value_error_handler) # type: ignore[arg-type]
|
|
app.add_exception_handler(Exception, _unhandled_exception_handler)
|
|
|
|
# --- Routers ---
|
|
app.include_router(health.router)
|
|
app.include_router(setup.router)
|
|
app.include_router(auth.router)
|
|
app.include_router(dashboard.router)
|
|
app.include_router(jails.router)
|
|
app.include_router(bans.router)
|
|
app.include_router(geo.router)
|
|
app.include_router(config.router)
|
|
app.include_router(file_config.router)
|
|
app.include_router(server.router)
|
|
app.include_router(history.router)
|
|
app.include_router(blocklist.router)
|
|
|
|
return app
|