refactoring-backend #3

Merged
lukas.pupkalipinski merged 403 commits from refactoring-backend into main 2026-05-20 20:23:46 +02:00
3 changed files with 92 additions and 1 deletions
Showing only changes of commit c2dd9f5f55 - Show all commits

View File

@@ -41,6 +41,7 @@ from app.tasks import (
geo_re_resolve,
health_check,
history_sync,
rate_limiter_cleanup,
session_cleanup,
)
from app.utils.async_utils import run_blocking
@@ -395,6 +396,7 @@ async def _stage_register_tasks(app: FastAPI, scheduler: AsyncIOScheduler) -> No
- geo_re_resolve: Periodic re-resolution of stale records
- history_sync: Periodic synchronization of ban history
- session_cleanup: Periodic cleanup of expired sessions
- rate_limiter_cleanup: Periodic cleanup of expired rate-limiter entries
Args:
app: The FastAPI application instance.
@@ -407,5 +409,6 @@ async def _stage_register_tasks(app: FastAPI, scheduler: AsyncIOScheduler) -> No
geo_re_resolve.register(app)
history_sync.register(app)
session_cleanup.register(app)
rate_limiter_cleanup.register(app)
log.info("startup_tasks_registered", count=7)
log.info("startup_tasks_registered", count=8)

View File

@@ -0,0 +1,71 @@
"""Rate limiter cleanup background task.
Registers an APScheduler job that periodically removes expired rate-limit
entries from the in-memory rate limiter. Without this cleanup, the
rate-limiter state dictionary grows unbounded over long runtimes, eventually
consuming excessive memory.
The cleanup is conservative: it only removes IPs with no recent attempts
(all timestamps outside the rate-limit window), so active or recently-active
IPs are preserved.
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import structlog
if TYPE_CHECKING:
from fastapi import FastAPI
log: structlog.stdlib.BoundLogger = structlog.get_logger()
#: How often the cleanup job fires (seconds). Chosen to balance memory
#: management against CPU overhead. A 30-minute interval handles typical
#: brute-force attack patterns while staying lightweight.
RATE_LIMITER_CLEANUP_INTERVAL: int = 30 * 60 # 30 minutes
#: Stable APScheduler job ID — ensures re-registration replaces, not duplicates.
JOB_ID: str = "rate_limiter_cleanup"
def _run_cleanup(app: FastAPI) -> None:
"""Trigger cleanup of expired rate-limiter entries.
Args:
app: The FastAPI application instance (holds the rate limiter).
"""
rate_limiter = getattr(app.state, "login_rate_limiter", None)
if rate_limiter is None:
log.warning(
"rate_limiter_cleanup_skipped",
reason="rate_limiter not found on app.state",
)
return
rate_limiter.cleanup_expired()
def register(app: FastAPI) -> None:
"""Add (or replace) the rate-limiter cleanup job in the application scheduler.
Must be called after the scheduler has been started (i.e., inside the
lifespan handler, after ``scheduler.start()``).
Args:
app: The :class:`fastapi.FastAPI` application instance whose
``app.state.scheduler`` will receive the job.
"""
app.state.scheduler.add_job(
_run_cleanup,
trigger="interval",
seconds=RATE_LIMITER_CLEANUP_INTERVAL,
kwargs={"app": app},
id=JOB_ID,
replace_existing=True,
)
log.info(
"rate_limiter_cleanup_scheduled",
interval_seconds=RATE_LIMITER_CLEANUP_INTERVAL,
)

View File

@@ -11,6 +11,23 @@ attacks to a single worker.
The penalty strategy for failed login attempts is also managed here:
record_failure() records a failure timestamp and returns the penalty delay
to apply, enabling progressive back-off without exhausting request capacity.
Operational Notes
-----------------
**Cleanup Lifecycle**: The rate limiter state (_attempts, _failures, _lock_counts)
grows as IPs interact with the system. To prevent unbounded memory growth during
long runtimes, a scheduled background task (rate_limiter_cleanup) calls the
cleanup_expired() method every 30 minutes. This is safe because:
- cleanup_expired() only removes IPs with no recent attempts (all timestamps
outside the rate-limit window), so active IPs are never disrupted.
- The cleanup is non-blocking and logged for observability.
- Individual requests already prune old timestamps from each IP's deque during
is_allowed() and record_failure(), so cleanup primarily handles dormant IPs.
For monitoring, check logs for "rate_limiter_cleanup" events to observe how
many IPs are being retired from memory each cleanup cycle.
"""
from __future__ import annotations