refactoring-backend #3
@@ -41,6 +41,7 @@ from app.tasks import (
|
||||
geo_re_resolve,
|
||||
health_check,
|
||||
history_sync,
|
||||
rate_limiter_cleanup,
|
||||
session_cleanup,
|
||||
)
|
||||
from app.utils.async_utils import run_blocking
|
||||
@@ -395,6 +396,7 @@ async def _stage_register_tasks(app: FastAPI, scheduler: AsyncIOScheduler) -> No
|
||||
- geo_re_resolve: Periodic re-resolution of stale records
|
||||
- history_sync: Periodic synchronization of ban history
|
||||
- session_cleanup: Periodic cleanup of expired sessions
|
||||
- rate_limiter_cleanup: Periodic cleanup of expired rate-limiter entries
|
||||
|
||||
Args:
|
||||
app: The FastAPI application instance.
|
||||
@@ -407,5 +409,6 @@ async def _stage_register_tasks(app: FastAPI, scheduler: AsyncIOScheduler) -> No
|
||||
geo_re_resolve.register(app)
|
||||
history_sync.register(app)
|
||||
session_cleanup.register(app)
|
||||
rate_limiter_cleanup.register(app)
|
||||
|
||||
log.info("startup_tasks_registered", count=7)
|
||||
log.info("startup_tasks_registered", count=8)
|
||||
|
||||
71
backend/app/tasks/rate_limiter_cleanup.py
Normal file
71
backend/app/tasks/rate_limiter_cleanup.py
Normal file
@@ -0,0 +1,71 @@
|
||||
"""Rate limiter cleanup background task.
|
||||
|
||||
Registers an APScheduler job that periodically removes expired rate-limit
|
||||
entries from the in-memory rate limiter. Without this cleanup, the
|
||||
rate-limiter state dictionary grows unbounded over long runtimes, eventually
|
||||
consuming excessive memory.
|
||||
|
||||
The cleanup is conservative: it only removes IPs with no recent attempts
|
||||
(all timestamps outside the rate-limit window), so active or recently-active
|
||||
IPs are preserved.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import structlog
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fastapi import FastAPI
|
||||
|
||||
log: structlog.stdlib.BoundLogger = structlog.get_logger()
|
||||
|
||||
#: How often the cleanup job fires (seconds). Chosen to balance memory
|
||||
#: management against CPU overhead. A 30-minute interval handles typical
|
||||
#: brute-force attack patterns while staying lightweight.
|
||||
RATE_LIMITER_CLEANUP_INTERVAL: int = 30 * 60 # 30 minutes
|
||||
|
||||
#: Stable APScheduler job ID — ensures re-registration replaces, not duplicates.
|
||||
JOB_ID: str = "rate_limiter_cleanup"
|
||||
|
||||
|
||||
def _run_cleanup(app: FastAPI) -> None:
|
||||
"""Trigger cleanup of expired rate-limiter entries.
|
||||
|
||||
Args:
|
||||
app: The FastAPI application instance (holds the rate limiter).
|
||||
"""
|
||||
rate_limiter = getattr(app.state, "login_rate_limiter", None)
|
||||
if rate_limiter is None:
|
||||
log.warning(
|
||||
"rate_limiter_cleanup_skipped",
|
||||
reason="rate_limiter not found on app.state",
|
||||
)
|
||||
return
|
||||
|
||||
rate_limiter.cleanup_expired()
|
||||
|
||||
|
||||
def register(app: FastAPI) -> None:
|
||||
"""Add (or replace) the rate-limiter cleanup job in the application scheduler.
|
||||
|
||||
Must be called after the scheduler has been started (i.e., inside the
|
||||
lifespan handler, after ``scheduler.start()``).
|
||||
|
||||
Args:
|
||||
app: The :class:`fastapi.FastAPI` application instance whose
|
||||
``app.state.scheduler`` will receive the job.
|
||||
"""
|
||||
app.state.scheduler.add_job(
|
||||
_run_cleanup,
|
||||
trigger="interval",
|
||||
seconds=RATE_LIMITER_CLEANUP_INTERVAL,
|
||||
kwargs={"app": app},
|
||||
id=JOB_ID,
|
||||
replace_existing=True,
|
||||
)
|
||||
log.info(
|
||||
"rate_limiter_cleanup_scheduled",
|
||||
interval_seconds=RATE_LIMITER_CLEANUP_INTERVAL,
|
||||
)
|
||||
@@ -11,6 +11,23 @@ attacks to a single worker.
|
||||
The penalty strategy for failed login attempts is also managed here:
|
||||
record_failure() records a failure timestamp and returns the penalty delay
|
||||
to apply, enabling progressive back-off without exhausting request capacity.
|
||||
|
||||
Operational Notes
|
||||
-----------------
|
||||
|
||||
**Cleanup Lifecycle**: The rate limiter state (_attempts, _failures, _lock_counts)
|
||||
grows as IPs interact with the system. To prevent unbounded memory growth during
|
||||
long runtimes, a scheduled background task (rate_limiter_cleanup) calls the
|
||||
cleanup_expired() method every 30 minutes. This is safe because:
|
||||
|
||||
- cleanup_expired() only removes IPs with no recent attempts (all timestamps
|
||||
outside the rate-limit window), so active IPs are never disrupted.
|
||||
- The cleanup is non-blocking and logged for observability.
|
||||
- Individual requests already prune old timestamps from each IP's deque during
|
||||
is_allowed() and record_failure(), so cleanup primarily handles dormant IPs.
|
||||
|
||||
For monitoring, check logs for "rate_limiter_cleanup" events to observe how
|
||||
many IPs are being retired from memory each cleanup cycle.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
Reference in New Issue
Block a user