BanGUI/backend/app/middleware/rate_limit.py

"""Global rate limiting middleware.

Implements per-IP request rate limiting for all endpoints using a configurable
sliding window algorithm. Intercepts requests before they reach route handlers
and blocks those exceeding the per-IP limit with a 429 response.

Rate limits can be customized per endpoint or use a global default.
IP addresses are extracted using the same trusted-proxy-aware logic as
authentication to ensure consistent behavior across all rate limiting.

**Process-local implementation** — Each worker process maintains its own
independent counter store. In multi-worker deployments (N workers), an
attacker can send up to N × limit requests before any single worker triggers
the limit. This is a fundamental limitation of in-process stores.

**Short-term mitigation:** Deploy with a single worker (enforced by the
scheduler lock). The startup warning log documents this constraint.

**Long-term solution:** Replace the in-process GlobalRateLimiter with a
Redis-backed adapter that uses atomic INCR + EXPIRE semantics. The
check_allowed() and check_allowed_for_bucket() interfaces are designed
to make this swap-in without touching middleware or router code.

Processing order
----------------
This middleware must be the innermost in the security-critical chain:

    CorrelationIdMiddleware → CsrfMiddleware → RateLimitMiddleware

Rate limiting is last so that requests blocked by CsrfMiddleware do not
consume rate-limit budget, and so that rate-limit log entries (which are
unusual and potentially suspicious) always carry a correlation ID for tracing.
"""

from __future__ import annotations

from collections.abc import Awaitable, Callable
from typing import TYPE_CHECKING

from app.utils.logging_compat import get_logger
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request
from starlette.responses import JSONResponse, Response

from app.exceptions import RateLimitError
from app.utils.client_ip import get_client_ip

if TYPE_CHECKING:
    from app.config import Settings
    from app.utils.rate_limiter import GlobalRateLimiter

log = get_logger(__name__)


class RateLimitMiddleware(BaseHTTPMiddleware):
    """Enforce global per-IP request rate limiting on all endpoints.

    Tracks requests per IP and blocks further requests if the limit is exceeded.
    Uses the application's GlobalRateLimiter instance and trusted-proxy settings
    for consistent IP extraction.
    """

    def __init__(
        self,
        app: object,
        rate_limiter: GlobalRateLimiter,
        settings: Settings,
        bucket_override: str | None = None,
        bucket_max_requests: int | None = None,
        bucket_window_seconds: int | None = None,
    ) -> None:
        """Initialize the rate limit middleware.

        Args:
            app: The FastAPI application.
            rate_limiter: The GlobalRateLimiter instance to use for checking limits.
            settings: Application settings (used for trusted proxies).
            bucket_override: Optional named bucket to use instead of the default limiter.
            bucket_max_requests: Max requests for the bucket override.
            bucket_window_seconds: Window for the bucket override.
        """
        super().__init__(app)  # type: ignore[arg-type]
        self.rate_limiter: GlobalRateLimiter = rate_limiter
        self.settings: Settings = settings
        self.bucket_override = bucket_override
        self.bucket_max_requests = bucket_max_requests
        self.bucket_window_seconds = bucket_window_seconds

    async def dispatch(
        self,
        request: Request,
        call_next: Callable[[Request], Awaitable[Response]],
    ) -> Response:
        """Check rate limit before passing request to next middleware/handler.

        If the client IP has exceeded the request limit, returns a 429 response
        immediately. Otherwise passes the request through normally.

        Args:
            request: The incoming HTTP request.
            call_next: Callable to pass the request to the next middleware/handler.

        Returns:
            A response object (either rate limit response or from handler).
        """
        client_ip = get_client_ip(request, trusted_proxies=self.settings.trusted_proxies)

        # Use higher-rate bucket for specific endpoints.
        # Check path to apply the appropriate bucket.
        path = request.url.path

        if self.bucket_override and self.bucket_max_requests and self.bucket_window_seconds:
            if path.startswith("/api/v1/history"):
                is_allowed, retry_after = self.rate_limiter.check_allowed_for_bucket(
                    self.bucket_override,
                    client_ip,
                    self.bucket_max_requests,
                    self.bucket_window_seconds,
                )
            elif path.startswith("/api/v1/login") or path.startswith("/api/v1/setup"):
                # Auth endpoints use their own bucket
                is_allowed, retry_after = self.rate_limiter.check_allowed_for_bucket(
                    self.bucket_override,
                    client_ip,
                    self.bucket_max_requests,
                    self.bucket_window_seconds,
                )
            else:
                is_allowed, retry_after = self.rate_limiter.check_allowed(client_ip)
        else:
            is_allowed, retry_after = self.rate_limiter.check_allowed(client_ip)
        if not is_allowed:
            log.warning(
                "global_rate_limit_exceeded",
                client_ip=client_ip,
                path=request.url.path,
                method=request.method,
                retry_after=retry_after,
            )
            rate_limit_error = RateLimitError(
                "Too many requests. Please try again later.",
                retry_after_seconds=retry_after,
            )
            # Return the error response directly
            return JSONResponse(
                status_code=429,
                content={
                    "code": "rate_limit_exceeded",
                    "detail": str(rate_limit_error),
                    "metadata": rate_limit_error.get_error_metadata(),
                    "correlation_id": getattr(request.state, "correlation_id", None),
                },
                headers={"Retry-After": str(int(retry_after))},
            )

        # Request is allowed, continue to next handler
        response: Response = await call_next(request)
        return response