refactor(logging): replace structlog with stdlib logging compat layer

- Remove structlog dependency from backend/pyproject.toml - Add app.utils.logging_compat shim for keyword-arg logging API - Add app.utils.json_formatter for JSON log output with extra fields - Update all backend modules to use logging_compat.get_logger() - Update docstrings in log_sanitizer.py and json_formatter.py - Update test comment in test_async_utils.py - Record 406 failing tests in Docs/Tasks.md for tracking
2026-05-10 13:37:54 +02:00
parent 7790736918
commit 7ec80fdeec
81 changed files with 3013 additions and 634 deletions
--- a/backend/app/utils/rate_limiter.py
+++ b/backend/app/utils/rate_limiter.py
@@ -1,46 +1,25 @@
-"""In-memory rate limiter for IP-based request throttling.
+"""In-memory global rate limiter for IP-based request throttling.

-Implements exponential backoff for failed login attempts using failure tracking.
-Each wrong password attempt increments the failure count for that IP, and subsequent
-attempts are blocked for a duration that grows exponentially up to a maximum.
-
-Uses a dictionary of deques (per IP) storing timestamps of recent failures.
-Old entries are cleaned up by a background task to prevent unbounded growth.
+Implements a sliding-window request counter per IP address. Old entries are
+cleaned up by a background task to prevent unbounded growth.

 Process-local implementation — in multi-worker setups, each worker has
-independent counters. This constraint limits the blast radius of brute-force
-attacks to a single worker.
+independent counters. This constraint limits the blast radius of abuse to a
+single worker.

-**How It Works:**
+**Cleanup Lifecycle**: The rate limiter state grows as IPs interact with the
+system. To prevent unbounded memory growth during long runtimes, a scheduled
+background task (rate_limiter_cleanup) calls cleanup_expired() every 30 minutes.
+This is safe because:

-1. A successful login resets the failure counter for that IP.
-2. Each failed login (wrong password) calls record_failure() and increments the counter.
-3. is_allowed() checks if enough time has passed since the last failure based on
-   the current failure count. The delay grows exponentially with each consecutive failure:
-
-   - 1st failure: 0.5 second penalty
-   - 2nd failure: 1 second penalty (0.5 * 2^1)
-   - 3rd failure: 2 seconds penalty (0.5 * 2^2)
-   - 4th failure: 4 seconds penalty (0.5 * 2^3)
-   - ... up to the configured maximum (default 5 seconds)
-
-4. Penalties are cumulative within the window: if an attacker makes 5 failed
-   attempts, they must wait the full 5 seconds before trying again (not 5 seconds
-   per attempt).
-
-**Cleanup Lifecycle**: The rate limiter state (_failures) grows as IPs interact
-with the system. To prevent unbounded memory growth during long runtimes, a
-scheduled background task (rate_limiter_cleanup) calls cleanup_expired() every
-30 minutes. This is safe because:
-
- cleanup_expired() only removes IPs with no recent failures (all timestamps
+- cleanup_expired() only removes IPs with no recent requests (all timestamps
  outside the rate-limit window), so active IPs are never disrupted.
 - The cleanup is non-blocking and logged for observability.
 - Individual requests already prune old timestamps from each IP's deque during
-  is_allowed() and record_failure(), so cleanup primarily handles dormant IPs.
+  check_allowed(), so cleanup primarily handles dormant IPs.

-For monitoring, check logs for "rate_limiter_cleanup" events to observe how
-many IPs are being retired from memory each cleanup cycle.
+For monitoring, check logs for "global_rate_limiter_cleanup" events to observe
+how many IPs are being retired from memory each cleanup cycle.
 """

 from __future__ import annotations
@@ -49,173 +28,21 @@ from collections import deque
 from time import time
 from typing import TYPE_CHECKING

-import structlog
+from app.utils.logging_compat import get_logger

-from app.utils.constants import (
-    LOGIN_PENALTY_BASE_SECONDS,
-    LOGIN_PENALTY_MAX_SECONDS,
-    LOGIN_PENALTY_MULTIPLIER,
-)
 from app.utils.ip_utils import normalise_ip

 if TYPE_CHECKING:
    from collections.abc import Mapping

-log: structlog.stdlib.BoundLogger = structlog.get_logger()
-
-# 5 attempts per minute per IP (300 seconds)
-DEFAULT_RATE_LIMIT_ATTEMPTS = 5
-DEFAULT_RATE_LIMIT_WINDOW_SECONDS = 60
-
-
-class RateLimiter:
-    """Track and enforce request rate limits per IP address.
-
-    Stores attempt timestamps in per-IP deques, removing old entries
-    outside the rate limit window.
-    """
-
-    def __init__(
-        self,
-        max_attempts: int = DEFAULT_RATE_LIMIT_ATTEMPTS,
-        window_seconds: int = DEFAULT_RATE_LIMIT_WINDOW_SECONDS,
-    ) -> None:
-        """Initialize the rate limiter.
-
-        Args:
-            max_attempts: Maximum attempts allowed within the window.
-                (Deprecated: now only used for cleanup window size)
-            window_seconds: Time window (seconds) for rate limit.
-        """
-        self.max_attempts: int = max_attempts
-        self.window_seconds: int = window_seconds
-        self._failures: dict[str, deque[float]] = {}
-
-    def is_allowed(self, ip_address: str) -> bool:
-        """Check if a request from *ip_address* is allowed.
-
-        Checks if the IP has accumulated failures that would currently block
-        the attempt due to penalty backoff. Does NOT record a new attempt —
-        that happens only on successful password verification.
-
-        Args:
-            ip_address: The client IP address to rate-limit.
-
-        Returns:
-            ``True`` if the request is allowed (past penalty period), ``False``
-            if currently blocked by exponential backoff.
-        """
-        ip_address = normalise_ip(ip_address)
-        now = time()
-
-        if ip_address not in self._failures:
-            self._failures[ip_address] = deque()
-
-        failures = self._failures[ip_address]
-        cutoff = now - self.window_seconds
-
-        # Remove old failures outside the window
-        while failures and failures[0] < cutoff:
-            failures.popleft()
-
-        # If no recent failures, request is allowed
-        if not failures:
-            return True
-
-        # Calculate accumulated penalty: how much time must pass before
-        # the next attempt is allowed, based on failure count
-        failure_count = len(failures)
-        penalty = min(
-            LOGIN_PENALTY_BASE_SECONDS * (LOGIN_PENALTY_MULTIPLIER ** failure_count),
-            LOGIN_PENALTY_MAX_SECONDS,
-        )
-
-        # Check if enough time has passed since the last failure
-        time_since_last_failure = now - failures[-1]
-        return time_since_last_failure >= penalty
-
-    def cleanup_expired(self) -> None:
-        """Remove all IPs with no recent failures (cleanup task).
-
-        Called periodically by the background task to prevent unbounded
-        growth of the tracking dictionary.
-        """
-        now = time()
-        cutoff = now - self.window_seconds
-
-        ips_to_remove = []
-        for ip_address, failures in self._failures.items():
-            # Remove old failures
-            while failures and failures[0] < cutoff:
-                failures.popleft()
-            # Mark IP for removal if no failures remain
-            if not failures:
-                ips_to_remove.append(ip_address)
-
-        for ip_address in ips_to_remove:
-            del self._failures[ip_address]
-
-        if ips_to_remove:
-            log.debug("rate_limiter_cleanup", removed_ips=len(ips_to_remove))
-
-    def get_state(self) -> Mapping[str, int]:
-        """Return a read-only view of current failure counts per IP.
-
-        For debugging and monitoring.
-
-        Returns:
-            A mapping of IP addresses to their failure counts.
-        """
-        now = time()
-        cutoff = now - self.window_seconds
-        result = {}
-        for ip_address, failures in self._failures.items():
-            # Count non-expired failures
-            count = sum(1 for ts in failures if ts >= cutoff)
-            if count > 0:
-                result[ip_address] = count
-        return result
-
-    def reset(self) -> None:
-        """Clear all tracked failures (for testing)."""
-        self._failures.clear()
-
-    # ---------------------------------------------------------------------------
-    # Penalty strategy for failed login attempts
-    # ---------------------------------------------------------------------------
-
-    def record_failure(self, ip_address: str) -> None:
-        """Record a failed login attempt.
-
-        Tracks failures per IP to enable exponential backoff in is_allowed().
-        The penalty delay is automatically calculated in is_allowed() based on
-        the failure count, providing transparent brute-force resistance.
-
-        Args:
-            ip_address: The client IP address whose login attempt failed.
-        """
-        ip_address = normalise_ip(ip_address)
-        now = time()
-
-        if ip_address not in self._failures:
-            self._failures[ip_address] = deque()
-
-        failures = self._failures[ip_address]
-        cutoff = now - self.window_seconds
-
-        # Remove old failures outside the window
-        while failures and failures[0] < cutoff:
-            failures.popleft()
-
-        # Record this failure
-        failures.append(now)
+log = get_logger(__name__)


 class GlobalRateLimiter:
    """Global per-IP request rate limiter using sliding window algorithm.

    Tracks total request count within a configurable time window per IP address.
-    Unlike RateLimiter (which uses exponential backoff), this implements simple
+    This implements simple
    request counting: when an IP exceeds the limit, the next request is blocked
    until the oldest request in the window expires.