BanGUI/backend/app/middleware/rate_limit.py

"""Global rate limiting middleware.

Implements per-IP request rate limiting for all endpoints using a configurable
sliding window algorithm. Intercepts requests before they reach route handlers
and blocks those exceeding the per-IP limit with a 429 response.

Rate limits can be customized per endpoint or use a global default.
IP addresses are extracted using the same trusted-proxy-aware logic as
authentication to ensure consistent behavior across all rate limiting.

**Process-local implementation** — Each worker process maintains its own
independent counter store. In multi-worker deployments (N workers), an
attacker can send up to N × limit requests before any single worker triggers
the limit. This is a fundamental limitation of in-process stores.

**Short-term mitigation:** Deploy with a single worker (enforced by the
scheduler lock). The startup warning log documents this constraint.

**Long-term solution:** Replace the in-process GlobalRateLimiter with a
Redis-backed adapter that uses atomic INCR + EXPIRE semantics. The
check_allowed() and check_allowed_for_bucket() interfaces are designed
to make this swap-in without touching middleware or router code.

Processing order
----------------
This middleware must be the innermost in the security-critical chain:

    CorrelationIdMiddleware → CsrfMiddleware → RateLimitMiddleware

Rate limiting is last so that requests blocked by CsrfMiddleware do not
consume rate-limit budget, and so that rate-limit log entries (which are
unusual and potentially suspicious) always carry a correlation ID for tracing.
"""

from __future__ import annotations

from collections.abc import Awaitable, Callable
from typing import TYPE_CHECKING

import structlog
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse, Response

from app.exceptions import RateLimitError
from app.utils.client_ip import get_client_ip

if TYPE_CHECKING:
    from app.config import Settings
    from app.utils.rate_limiter import GlobalRateLimiter

log: structlog.stdlib.BoundLogger = structlog.get_logger()


class RateLimitMiddleware(BaseHTTPMiddleware):
    """Enforce global per-IP request rate limiting on all endpoints.

    Tracks requests per IP and blocks further requests if the limit is exceeded.
    Uses the application's GlobalRateLimiter instance and trusted-proxy settings
    for consistent IP extraction.
    """

    def __init__(
        self,
        app: object,
        rate_limiter: GlobalRateLimiter,
        settings: Settings,
    ) -> None:
        """Initialize the rate limit middleware.

        Args:
            app: The FastAPI application.
            rate_limiter: The GlobalRateLimiter instance to use for checking limits.
            settings: Application settings (used for trusted proxies).
        """
        super().__init__(app)  # type: ignore[arg-type]
        self.rate_limiter: GlobalRateLimiter = rate_limiter
        self.settings: Settings = settings

    async def dispatch(
        self,
        request: Request,
        call_next: Callable[[Request], Awaitable[Response]],
    ) -> Response:
        """Check rate limit before passing request to next middleware/handler.

        If the client IP has exceeded the request limit, returns a 429 response
        immediately. Otherwise passes the request through normally.

        Args:
            request: The incoming HTTP request.
            call_next: Callable to pass the request to the next middleware/handler.

        Returns:
            A response object (either rate limit response or from handler).
        """
        client_ip = get_client_ip(request, trusted_proxies=self.settings.trusted_proxies)

        is_allowed, retry_after = self.rate_limiter.check_allowed(client_ip)
        if not is_allowed:
            log.warning(
                "global_rate_limit_exceeded",
                client_ip=client_ip,
                path=request.url.path,
                method=request.method,
                retry_after=retry_after,
            )
            rate_limit_error = RateLimitError(
                "Too many requests. Please try again later.",
                retry_after_seconds=retry_after,
            )
            # Return the error response directly
            return JSONResponse(
                status_code=429,
                content={
                    "code": "rate_limit_exceeded",
                    "detail": str(rate_limit_error),
                    "metadata": rate_limit_error.get_error_metadata(),
                    "correlation_id": getattr(request.state, "correlation_id", None),
                },
                headers={"Retry-After": str(int(retry_after))},
            )

        # Request is allowed, continue to next handler
        response: Response = await call_next(request)
        return response