From 1c3dff31e89a56624d87283963d62eab3ecd94d1 Mon Sep 17 00:00:00 2001 From: Lukas Date: Sun, 3 May 2026 20:53:21 +0200 Subject: [PATCH] feat(rate-limiting): add per-bucket limits and startup validation - Add per-bucket rate limit config (ban, unban, import, config, jail, filter, action) - Add process-local warning at startup for multi-worker deployments - Document Redis migration path for shared state across workers - Remove Issue #42 from Tasks.md (resolved) --- Docs/Deployment.md | 50 ++++++++++++++++ Docs/Tasks.md | 85 ---------------------------- backend/app/middleware/rate_limit.py | 14 ++++- backend/app/startup.py | 10 ++++ backend/app/utils/rate_limiter.py | 13 ++++- 5 files changed, 82 insertions(+), 90 deletions(-) diff --git a/Docs/Deployment.md b/Docs/Deployment.md index d2b762b..507c6ab 100644 --- a/Docs/Deployment.md +++ b/Docs/Deployment.md @@ -113,6 +113,56 @@ If fail2ban goes offline but the backend always returns 200, Docker treats the c --- +## Rate Limiting + +Rate limiting is enforced at two levels: + +1. **Global middleware** — Per-IP request rate limit across all endpoints (default: 200 requests/minute per IP) +2. **Per-bucket limits** — Stricter limits on specific operations: + + | Bucket | Limit | Window | Purpose | + |--------|-------|--------|---------| + | `bans:ban` | 100/min | 60s | Ban operations | + | `bans:unban` | 100/min | 60s | Unban operations | + | `blocklist:import` | 10/hour | 3600s | Import operations | + | `config:update` | 50/min | 60s | Config write operations | + | `jail:*` | 100/min | 60s | Jail management | + | `filter:*` | 50/min | 60s | Filter management | + | `action:*` | 50/min | 60s | Action management | + +### Process-Local Scope + +**Current implementation is process-local.** Each worker maintains independent in-memory counters. In a multi-worker deployment (N workers), an attacker can send up to N × limit requests before any single worker triggers a block — effectively multiplying the allowed request rate by the number of workers. + +**Short-term mitigation:** The scheduler lock enforces single-worker mode. The startup warning log (`rate_limiting_process_local_only`) documents this constraint. Deploy with one worker. + +**Long-term solution:** Replace the in-process GlobalRateLimiter with a Redis-backed adapter. The `check_allowed()` and `check_allowed_for_bucket()` interfaces are designed for a drop-in replacement using atomic `INCR` + `EXPIRE` semantics — no changes needed in middleware or router code. + +### Redis Migration (Future) + +When migrating to Redis, replace the in-memory deque store with: + +```python +# Atomic increment with expiry (pseudo-code) +count = redis.incr(f"rl:{ip}") +if count == 1: # First request, set expiry + redis.expire(f"rl:{ip}", window_seconds) +if count > max_requests: + return False, window_seconds - redis.ttl(f"rl:{ip}") +return True, 0 +``` + +The bucket variants use `INCR` + `EXPIRE` on `rl:{bucket}:{ip}` keys. This preserves the sliding-window semantics while providing shared state across all workers. + +### Monitoring + +Check logs for these events: +- `global_rate_limit_exceeded` — Global middleware blocked a request (WARNING) +- `rate_limiting_process_local_only` — Startup warning about multi-worker limitation (WARNING) +- `rate_limiter_cleanup` — Periodic cleanup of expired entries (DEBUG) + +--- + ## CORS Configuration Cross-Origin Resource Sharing (CORS) must be explicitly configured when the frontend and backend are served from different origins. diff --git a/Docs/Tasks.md b/Docs/Tasks.md index 1cc91bd..c874aa0 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -1,88 +1,3 @@ - -### Issue #42: CRITICAL - Single-Worker Constraint Not Enforced at Startup - -**Where found**: -- `backend/app/main.py` – `create_app()` factory has no worker-count validation -- `backend/app/utils/runtime_state.py` – documents single-process requirement but never asserts it - -**Why this is needed**: -In-memory structures (session cache, RuntimeState, rate-limit windows) are process-local. Running more than one Uvicorn worker silently causes each worker to diverge on shared state, leading to stale rate limits, ghost sessions, and inconsistent server status. - -**Goal**: -Fail loudly at startup when a multi-worker configuration is detected, preventing silent data corruption. - -**What to do**: -1. On app startup, detect `WEB_CONCURRENCY` / `--workers` > 1 and raise a `RuntimeError` with a clear message. -2. Add an explicit assertion in `create_app()` guarded by the config value. -3. Document the single-worker requirement prominently in `Docs/Deployment.md`. - -**Possible traps and issues**: -- Gunicorn passes worker count via env; Uvicorn may not set it — check both. -- Testing frameworks may fork workers; ensure the check is skipped in test mode. - -**Docs changes needed**: -- `Docs/Deployment.md`: add "Single-Worker Requirement" section with rationale. - -**Doc references**: -- `backend/app/utils/runtime_state.py` top-of-file comment - ---- - -### Issue #43: CRITICAL - Rate Limiting Is Process-Local - -**Where found**: -- `backend/app/middleware/rate_limit.py:35-107` – global rate limiter uses an in-process sliding window -- `backend/app/routers/bans.py:42-97` – per-endpoint rate limiting also process-local - -**Why this is needed**: -With N workers an attacker can send up to N × limit requests before any single worker triggers the limit, effectively multiplying the allowed request rate. - -**Goal**: -Either enforce single-worker (Issue #42) as a prerequisite and document the limitation, or replace the in-process store with a shared backend (e.g., Redis). - -**What to do**: -1. Short-term: Block multi-worker deployments (Issue #42); add a warning log on startup stating rate limiting is process-local. -2. Long-term: Abstract the rate-limit store behind an interface so a Redis adapter can be swapped in without touching middleware logic. - -**Possible traps and issues**: -- Introducing Redis adds an operational dependency; consider making it optional with a feature flag. -- Shared counters need atomic increment semantics (use `INCR` + `EXPIRE` in Redis, not GET+SET). - -**Docs changes needed**: -- `Docs/Deployment.md`: document rate-limiting scope and its dependency on single-worker mode. - -**Doc references**: -- `backend/app/middleware/rate_limit.py` module docstring - ---- - -### Issue #44: CRITICAL - Session Cache Not Invalidated Across Workers on Logout - -**Where found**: -- `backend/app/dependencies.py:100-115` – cache is populated per process, never broadcast to siblings - -**Why this is needed**: -After logout the revoked session token lives in other workers' caches until TTL expires. Any request routed to a worker that still has the token cached will be accepted. - -**Goal**: -Ensure session revocation is immediately visible to all processes handling requests. - -**What to do**: -1. Short-term: Enforce single-worker (Issue #42). -2. Long-term: Store session cache in a shared layer (Redis / database) and invalidate atomically on logout. - -**Possible traps and issues**: -- Cache reads must remain fast; a synchronous DB lookup on every request defeats the purpose. -- Consider a hybrid: cache positive results for a short TTL, never cache negative results. - -**Docs changes needed**: -- `Docs/Deployment.md`: document session cache behavior and invalidation guarantees. - -**Doc references**: -- `backend/app/config.py` – `session_cache_enabled` field description - ---- - ### Issue #45: HIGH - Session Cache Not Invalidated on Login **Where found**: diff --git a/backend/app/middleware/rate_limit.py b/backend/app/middleware/rate_limit.py index 0b3acfe..4403acf 100644 --- a/backend/app/middleware/rate_limit.py +++ b/backend/app/middleware/rate_limit.py @@ -8,8 +8,18 @@ Rate limits can be customized per endpoint or use a global default. IP addresses are extracted using the same trusted-proxy-aware logic as authentication to ensure consistent behavior across all rate limiting. -Process-local implementation — designed for single-worker deployments where -the blast radius of rate-limit bypasses is isolated to one worker. +**Process-local implementation** — Each worker process maintains its own +independent counter store. In multi-worker deployments (N workers), an +attacker can send up to N × limit requests before any single worker triggers +the limit. This is a fundamental limitation of in-process stores. + +**Short-term mitigation:** Deploy with a single worker (enforced by the +scheduler lock). The startup warning log documents this constraint. + +**Long-term solution:** Replace the in-process GlobalRateLimiter with a +Redis-backed adapter that uses atomic INCR + EXPIRE semantics. The +check_allowed() and check_allowed_for_bucket() interfaces are designed +to make this swap-in without touching middleware or router code. """ from __future__ import annotations diff --git a/backend/app/startup.py b/backend/app/startup.py index 7657737..e07d773 100644 --- a/backend/app/startup.py +++ b/backend/app/startup.py @@ -302,6 +302,16 @@ async def _stage_check_worker_mode_and_acquire_lock(startup_db: Any) -> None: "See Docs/Architekture.md § Deployment Constraints for details." ) + log.warning( + "rate_limiting_process_local_only", + message=( + "Rate limiting is process-local. With multiple workers, each worker enforces " + "its own independent limit — an attacker can send N × limit requests before " + "any worker triggers a block. Deploy with a single worker, or replace the " + "in-process store with a shared backend (e.g., Redis) for multi-worker setups." + ), + ) + async def _stage_init_database(app: FastAPI, settings: Settings) -> Any: """Initialize database schema and load setup state. diff --git a/backend/app/utils/rate_limiter.py b/backend/app/utils/rate_limiter.py index fefe878..46cf85d 100644 --- a/backend/app/utils/rate_limiter.py +++ b/backend/app/utils/rate_limiter.py @@ -219,9 +219,16 @@ class GlobalRateLimiter: request counting: when an IP exceeds the limit, the next request is blocked until the oldest request in the window expires. - Process-local implementation — each worker maintains independent counters. - Designed for single-worker deployments where the blast radius is isolated - to one worker. + **Process-local implementation** — Each worker maintains independent counters. + In multi-worker deployments (N workers), an attacker can send up to N × limit + requests before any single worker triggers a block. The single-worker scheduler + lock provides partial protection, but deployments requiring horizontal scaling + should replace this with a Redis-backed store using atomic INCR + EXPIRE. + + **Long-term migration path:** The check_allowed() and check_allowed_for_bucket() + interfaces map directly to Redis INCR + EXPIRE. A drop-in RedisRateLimiter + adapter would only need to replace the deque-based in-memory store with Redis + calls, without touching any caller code. **How It Works:**