- Split /health into /health/live (liveness) and /health/ready (readiness) following Kubernetes conventions. Combined /health retained for backward compatibility with existing Docker HEALTHCHECK definitions. - Add ReadyCheck and ReadyResponse models for structured readiness output. - Add _assert_middleware_order() startup check enforcing: RateLimit → Csrf → CorrelationId middleware chain. - Register CorrelationIdMiddleware, CsrfMiddleware, RateLimitMiddleware in create_app() with documented required order (reverse of processing). - Add correlation.py, csrf.py, rate_limit.py middleware modules. - Add health probe tests in test_health_probes.py. - Update test_main.py with middleware order assertion tests. - Update frontend useFetchData hook tests. - Docs: update Deployment.md with Kubernetes probe config examples.
127 lines
4.8 KiB
Python
127 lines
4.8 KiB
Python
"""Global rate limiting middleware.
|
||
|
||
Implements per-IP request rate limiting for all endpoints using a configurable
|
||
sliding window algorithm. Intercepts requests before they reach route handlers
|
||
and blocks those exceeding the per-IP limit with a 429 response.
|
||
|
||
Rate limits can be customized per endpoint or use a global default.
|
||
IP addresses are extracted using the same trusted-proxy-aware logic as
|
||
authentication to ensure consistent behavior across all rate limiting.
|
||
|
||
**Process-local implementation** — Each worker process maintains its own
|
||
independent counter store. In multi-worker deployments (N workers), an
|
||
attacker can send up to N × limit requests before any single worker triggers
|
||
the limit. This is a fundamental limitation of in-process stores.
|
||
|
||
**Short-term mitigation:** Deploy with a single worker (enforced by the
|
||
scheduler lock). The startup warning log documents this constraint.
|
||
|
||
**Long-term solution:** Replace the in-process GlobalRateLimiter with a
|
||
Redis-backed adapter that uses atomic INCR + EXPIRE semantics. The
|
||
check_allowed() and check_allowed_for_bucket() interfaces are designed
|
||
to make this swap-in without touching middleware or router code.
|
||
|
||
Processing order
|
||
----------------
|
||
This middleware must be the innermost in the security-critical chain:
|
||
|
||
CorrelationIdMiddleware → CsrfMiddleware → RateLimitMiddleware
|
||
|
||
Rate limiting is last so that requests blocked by CsrfMiddleware do not
|
||
consume rate-limit budget, and so that rate-limit log entries (which are
|
||
unusual and potentially suspicious) always carry a correlation ID for tracing.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from collections.abc import Awaitable, Callable
|
||
from typing import TYPE_CHECKING
|
||
|
||
import structlog
|
||
from starlette.middleware.base import BaseHTTPMiddleware
|
||
from starlette.requests import Request
|
||
from starlette.responses import JSONResponse, Response
|
||
|
||
from app.exceptions import RateLimitError
|
||
from app.utils.client_ip import get_client_ip
|
||
|
||
if TYPE_CHECKING:
|
||
from app.config import Settings
|
||
from app.utils.rate_limiter import GlobalRateLimiter
|
||
|
||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||
|
||
|
||
class RateLimitMiddleware(BaseHTTPMiddleware):
|
||
"""Enforce global per-IP request rate limiting on all endpoints.
|
||
|
||
Tracks requests per IP and blocks further requests if the limit is exceeded.
|
||
Uses the application's GlobalRateLimiter instance and trusted-proxy settings
|
||
for consistent IP extraction.
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
app: object,
|
||
rate_limiter: GlobalRateLimiter,
|
||
settings: Settings,
|
||
) -> None:
|
||
"""Initialize the rate limit middleware.
|
||
|
||
Args:
|
||
app: The FastAPI application.
|
||
rate_limiter: The GlobalRateLimiter instance to use for checking limits.
|
||
settings: Application settings (used for trusted proxies).
|
||
"""
|
||
super().__init__(app) # type: ignore[arg-type]
|
||
self.rate_limiter: GlobalRateLimiter = rate_limiter
|
||
self.settings: Settings = settings
|
||
|
||
async def dispatch(
|
||
self,
|
||
request: Request,
|
||
call_next: Callable[[Request], Awaitable[Response]],
|
||
) -> Response:
|
||
"""Check rate limit before passing request to next middleware/handler.
|
||
|
||
If the client IP has exceeded the request limit, returns a 429 response
|
||
immediately. Otherwise passes the request through normally.
|
||
|
||
Args:
|
||
request: The incoming HTTP request.
|
||
call_next: Callable to pass the request to the next middleware/handler.
|
||
|
||
Returns:
|
||
A response object (either rate limit response or from handler).
|
||
"""
|
||
client_ip = get_client_ip(request, trusted_proxies=self.settings.trusted_proxies)
|
||
|
||
is_allowed, retry_after = self.rate_limiter.check_allowed(client_ip)
|
||
if not is_allowed:
|
||
log.warning(
|
||
"global_rate_limit_exceeded",
|
||
client_ip=client_ip,
|
||
path=request.url.path,
|
||
method=request.method,
|
||
retry_after=retry_after,
|
||
)
|
||
rate_limit_error = RateLimitError(
|
||
"Too many requests. Please try again later.",
|
||
retry_after_seconds=retry_after,
|
||
)
|
||
# Return the error response directly
|
||
return JSONResponse(
|
||
status_code=429,
|
||
content={
|
||
"code": "rate_limit_exceeded",
|
||
"detail": str(rate_limit_error),
|
||
"metadata": rate_limit_error.get_error_metadata(),
|
||
"correlation_id": getattr(request.state, "correlation_id", None),
|
||
},
|
||
headers={"Retry-After": str(int(retry_after))},
|
||
)
|
||
|
||
# Request is allowed, continue to next handler
|
||
response: Response = await call_next(request)
|
||
return response
|