refactor(logging): replace structlog with stdlib logging compat layer

- Remove structlog dependency from backend/pyproject.toml
- Add app.utils.logging_compat shim for keyword-arg logging API
- Add app.utils.json_formatter for JSON log output with extra fields
- Update all backend modules to use logging_compat.get_logger()
- Update docstrings in log_sanitizer.py and json_formatter.py
- Update test comment in test_async_utils.py
- Record 406 failing tests in Docs/Tasks.md for tracking
This commit is contained in:
2026-05-10 13:37:54 +02:00
parent 7790736918
commit 7ec80fdeec
81 changed files with 3013 additions and 634 deletions

View File

@@ -12,12 +12,12 @@ from collections.abc import Callable, Coroutine
from concurrent.futures import ThreadPoolExecutor
from typing import Any, ParamSpec, TypeVar
import structlog
from app.utils.logging_compat import get_logger
P = ParamSpec("P")
T = TypeVar("T")
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
DEFAULT_BLOCKING_EXECUTOR: ThreadPoolExecutor = ThreadPoolExecutor(
max_workers=16,

View File

@@ -24,7 +24,7 @@ import contextlib
import io
from typing import TYPE_CHECKING
import structlog
from app.utils.logging_compat import get_logger
if TYPE_CHECKING:
from pathlib import Path
@@ -39,7 +39,7 @@ from app.models.config import (
JailSectionConfig,
)
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# ---------------------------------------------------------------------------
# Constants — well-known Definition keys for action files

View File

@@ -10,7 +10,7 @@ import tempfile
from pathlib import Path
from typing import cast
import structlog
from app.utils.logging_compat import get_logger
from app.exceptions import (
ConfigWriteError,
@@ -32,7 +32,7 @@ from app.utils.fail2ban_client import (
from app.utils.fail2ban_response import ok, to_dict
from app.utils.log_sanitizer import sanitize_for_logging
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# Allowlist pattern for jail names used in path construction.
_SAFE_JAIL_NAME_RE: re.Pattern[str] = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,127}$")

View File

@@ -28,12 +28,12 @@ import configparser
import re
from typing import TYPE_CHECKING
import structlog
from app.utils.logging_compat import get_logger
if TYPE_CHECKING:
from pathlib import Path
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# Compiled pattern that matches fail2ban-style %(variable_name)s references.
_INTERPOLATE_RE: re.Pattern[str] = re.compile(r"%\((\w+)\)s")

View File

@@ -31,12 +31,12 @@ import tempfile
import threading
from typing import TYPE_CHECKING
import structlog
from app.utils.logging_compat import get_logger
if TYPE_CHECKING:
from pathlib import Path
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# ---------------------------------------------------------------------------
# Per-file lock registry

View File

@@ -51,19 +51,6 @@ CSRF_HEADER_NAME: Final[str] = "X-BanGUI-Request"
CSRF_HEADER_VALUE: Final[str] = "1"
"""Required value of the CSRF header to pass validation."""
# ---------------------------------------------------------------------------
# Authentication penalty (brute-force resistance)
# ---------------------------------------------------------------------------
LOGIN_PENALTY_BASE_SECONDS: Final[float] = 1.0
"""Base penalty (seconds) for a failed login attempt."""
LOGIN_PENALTY_MAX_SECONDS: Final[float] = 10.0
"""Maximum penalty (seconds) for failed login attempts."""
LOGIN_PENALTY_MULTIPLIER: Final[float] = 2.0
"""Exponential multiplier applied per failed attempt."""
# ---------------------------------------------------------------------------
# Time-range presets (used by dashboard and history endpoints)
# ---------------------------------------------------------------------------

View File

@@ -16,9 +16,9 @@ from typing import TYPE_CHECKING, Any, Literal
if TYPE_CHECKING:
from aiohttp import ClientSession
import structlog
from app.utils.logging_compat import get_logger
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
class ExternalLogHandler(ABC):

View File

@@ -24,7 +24,7 @@ from collections.abc import Mapping, Sequence, Set
from pathlib import Path
from typing import TYPE_CHECKING, Protocol
import structlog
from app.utils.logging_compat import get_logger
from app.exceptions import Fail2BanConnectionError, Fail2BanProtocolError
@@ -68,7 +68,7 @@ type Fail2BanResponse = tuple[int, object]
if TYPE_CHECKING:
from types import TracebackType
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# Attempt to reuse the vendored fail2ban package embedded in the repository.
# If it is not on sys.path yet, load it from ``../fail2ban-master``.

View File

@@ -5,9 +5,9 @@ from __future__ import annotations
import json
from datetime import UTC, datetime
import structlog
from app.utils.logging_compat import get_logger
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
def escape_like(s: str) -> str:

View File

@@ -11,12 +11,12 @@ from __future__ import annotations
from typing import TYPE_CHECKING
import structlog
from app.utils.logging_compat import get_logger
if TYPE_CHECKING:
from pathlib import Path
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# ---------------------------------------------------------------------------
# Default file contents

View File

@@ -11,7 +11,7 @@ from __future__ import annotations
import asyncio
from typing import cast
import structlog
from app.utils.logging_compat import get_logger
from app.exceptions import JailNotFoundError, JailOperationError
from app.utils.fail2ban_client import (
@@ -24,7 +24,7 @@ from app.utils.fail2ban_response import (
to_dict,
)
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# Socket communication timeout in seconds.
SOCKET_TIMEOUT: float = 10.0

View File

@@ -0,0 +1,85 @@
"""JSON formatter for stdlib logging that preserves extra fields.
A single logging.Formatter subclass that serialises any keyword arguments
passed via ``extra=`` into the JSON output alongside the standard record
attributes.
"""
from __future__ import annotations
import json
import logging
from datetime import datetime, timezone
from typing import Any
# Attributes that belong to the standard LogRecord and should NOT be
# treated as user-supplied extra fields.
_STD_RECORD_ATTRS: frozenset[str] = frozenset(
{
"name",
"msg",
"args",
"levelname",
"levelno",
"pathname",
"filename",
"module",
"exc_info",
"exc_text",
"stack_info",
"lineno",
"funcName",
"created",
"msecs",
"relativeCreated",
"thread",
"threadName",
"processName",
"process",
"message",
"asctime",
"taskName",
}
)
class JSONFormatter(logging.Formatter):
"""Format log records as JSON lines, including extra fields.
Usage::
handler = logging.StreamHandler()
handler.setFormatter(JSONFormatter())
logging.getLogger().addHandler(handler)
Output keys:
- ``event`` the log message
- ``level`` lower-cased level name
- ``timestamp`` ISO-8601 UTC timestamp
- ``logger`` logger name
- any ``extra`` fields supplied by the caller
"""
def format(self, record: logging.LogRecord) -> str:
"""Return a JSON string for *record*."""
log_dict: dict[str, Any] = {
"event": record.getMessage(),
"level": record.levelname.lower(),
"timestamp": (
datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat()
),
"logger": record.name,
}
# Merge any extra fields attached to the record.
for key, value in record.__dict__.items():
if key not in _STD_RECORD_ATTRS:
log_dict[key] = value
# Include exception info when present.
if record.exc_info and not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
log_dict["exception"] = record.exc_text
return json.dumps(log_dict, default=str)

View File

@@ -1,7 +1,7 @@
"""Log sanitization utilities for preventing sensitive data leakage.
All external output (subprocess, API responses, config data) passed to
structlog MUST be sanitized first. This module provides the canonical
logging MUST be sanitized first. This module provides the canonical
sanitize_for_logging() function used across the codebase.
"""

View File

@@ -0,0 +1,63 @@
"""Compatibility shim providing keyword-argument logging API on top of stdlib logging.
This module lets the rest of the codebase keep the keyword-argument logging
style (``log.info("event", key=value)``) while using only the Python standard
library ``logging`` module underneath.
"""
from __future__ import annotations
import logging
from typing import Any
class _CompatLogger:
"""Wraps a stdlib :class:`logging.Logger` to accept keyword arguments."""
def __init__(self, logger: logging.Logger) -> None:
self._logger = logger
def _log(self, level: int, event: str, **kwargs: Any) -> None:
exc_info = kwargs.pop("exc_info", None)
extra = kwargs if kwargs else None
self._logger.log(level, event, exc_info=exc_info, extra=extra)
def debug(self, event: str, **kwargs: Any) -> None:
self._log(logging.DEBUG, event, **kwargs)
def info(self, event: str, **kwargs: Any) -> None:
self._log(logging.INFO, event, **kwargs)
def warning(self, event: str, **kwargs: Any) -> None:
self._log(logging.WARNING, event, **kwargs)
def warn(self, event: str, **kwargs: Any) -> None:
self._log(logging.WARNING, event, **kwargs)
def error(self, event: str, **kwargs: Any) -> None:
self._log(logging.ERROR, event, **kwargs)
def critical(self, event: str, **kwargs: Any) -> None:
self._log(logging.CRITICAL, event, **kwargs)
def exception(self, event: str, **kwargs: Any) -> None:
self._log(logging.ERROR, event, exc_info=True, **kwargs)
def bind(self, **kwargs: Any) -> "_CompatLogger":
"""Return a new logger with bound context (no-op for stdlib)."""
return self
def get_logger(name: str | None = None) -> _CompatLogger:
"""Get a compatibility logger wrapping the stdlib logger for *name*.
If *name* is ``None`` the caller's module name is used.
"""
if name is None:
import sys
# Walk up the stack to find the caller's module.
frame = sys._getframe(1)
module = frame.f_globals.get("__name__", "__main__")
name = module
return _CompatLogger(logging.getLogger(name))

View File

@@ -11,9 +11,9 @@ and get_metrics() returns an empty bytes object.
from __future__ import annotations
import structlog
from app.utils.logging_compat import get_logger
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
try:
from prometheus_client import (

View File

@@ -1,46 +1,25 @@
"""In-memory rate limiter for IP-based request throttling.
"""In-memory global rate limiter for IP-based request throttling.
Implements exponential backoff for failed login attempts using failure tracking.
Each wrong password attempt increments the failure count for that IP, and subsequent
attempts are blocked for a duration that grows exponentially up to a maximum.
Uses a dictionary of deques (per IP) storing timestamps of recent failures.
Old entries are cleaned up by a background task to prevent unbounded growth.
Implements a sliding-window request counter per IP address. Old entries are
cleaned up by a background task to prevent unbounded growth.
Process-local implementation — in multi-worker setups, each worker has
independent counters. This constraint limits the blast radius of brute-force
attacks to a single worker.
independent counters. This constraint limits the blast radius of abuse to a
single worker.
**How It Works:**
**Cleanup Lifecycle**: The rate limiter state grows as IPs interact with the
system. To prevent unbounded memory growth during long runtimes, a scheduled
background task (rate_limiter_cleanup) calls cleanup_expired() every 30 minutes.
This is safe because:
1. A successful login resets the failure counter for that IP.
2. Each failed login (wrong password) calls record_failure() and increments the counter.
3. is_allowed() checks if enough time has passed since the last failure based on
the current failure count. The delay grows exponentially with each consecutive failure:
- 1st failure: 0.5 second penalty
- 2nd failure: 1 second penalty (0.5 * 2^1)
- 3rd failure: 2 seconds penalty (0.5 * 2^2)
- 4th failure: 4 seconds penalty (0.5 * 2^3)
- ... up to the configured maximum (default 5 seconds)
4. Penalties are cumulative within the window: if an attacker makes 5 failed
attempts, they must wait the full 5 seconds before trying again (not 5 seconds
per attempt).
**Cleanup Lifecycle**: The rate limiter state (_failures) grows as IPs interact
with the system. To prevent unbounded memory growth during long runtimes, a
scheduled background task (rate_limiter_cleanup) calls cleanup_expired() every
30 minutes. This is safe because:
- cleanup_expired() only removes IPs with no recent failures (all timestamps
- cleanup_expired() only removes IPs with no recent requests (all timestamps
outside the rate-limit window), so active IPs are never disrupted.
- The cleanup is non-blocking and logged for observability.
- Individual requests already prune old timestamps from each IP's deque during
is_allowed() and record_failure(), so cleanup primarily handles dormant IPs.
check_allowed(), so cleanup primarily handles dormant IPs.
For monitoring, check logs for "rate_limiter_cleanup" events to observe how
many IPs are being retired from memory each cleanup cycle.
For monitoring, check logs for "global_rate_limiter_cleanup" events to observe
how many IPs are being retired from memory each cleanup cycle.
"""
from __future__ import annotations
@@ -49,173 +28,21 @@ from collections import deque
from time import time
from typing import TYPE_CHECKING
import structlog
from app.utils.logging_compat import get_logger
from app.utils.constants import (
LOGIN_PENALTY_BASE_SECONDS,
LOGIN_PENALTY_MAX_SECONDS,
LOGIN_PENALTY_MULTIPLIER,
)
from app.utils.ip_utils import normalise_ip
if TYPE_CHECKING:
from collections.abc import Mapping
log: structlog.stdlib.BoundLogger = structlog.get_logger()
# 5 attempts per minute per IP (300 seconds)
DEFAULT_RATE_LIMIT_ATTEMPTS = 5
DEFAULT_RATE_LIMIT_WINDOW_SECONDS = 60
class RateLimiter:
"""Track and enforce request rate limits per IP address.
Stores attempt timestamps in per-IP deques, removing old entries
outside the rate limit window.
"""
def __init__(
self,
max_attempts: int = DEFAULT_RATE_LIMIT_ATTEMPTS,
window_seconds: int = DEFAULT_RATE_LIMIT_WINDOW_SECONDS,
) -> None:
"""Initialize the rate limiter.
Args:
max_attempts: Maximum attempts allowed within the window.
(Deprecated: now only used for cleanup window size)
window_seconds: Time window (seconds) for rate limit.
"""
self.max_attempts: int = max_attempts
self.window_seconds: int = window_seconds
self._failures: dict[str, deque[float]] = {}
def is_allowed(self, ip_address: str) -> bool:
"""Check if a request from *ip_address* is allowed.
Checks if the IP has accumulated failures that would currently block
the attempt due to penalty backoff. Does NOT record a new attempt —
that happens only on successful password verification.
Args:
ip_address: The client IP address to rate-limit.
Returns:
``True`` if the request is allowed (past penalty period), ``False``
if currently blocked by exponential backoff.
"""
ip_address = normalise_ip(ip_address)
now = time()
if ip_address not in self._failures:
self._failures[ip_address] = deque()
failures = self._failures[ip_address]
cutoff = now - self.window_seconds
# Remove old failures outside the window
while failures and failures[0] < cutoff:
failures.popleft()
# If no recent failures, request is allowed
if not failures:
return True
# Calculate accumulated penalty: how much time must pass before
# the next attempt is allowed, based on failure count
failure_count = len(failures)
penalty = min(
LOGIN_PENALTY_BASE_SECONDS * (LOGIN_PENALTY_MULTIPLIER ** failure_count),
LOGIN_PENALTY_MAX_SECONDS,
)
# Check if enough time has passed since the last failure
time_since_last_failure = now - failures[-1]
return time_since_last_failure >= penalty
def cleanup_expired(self) -> None:
"""Remove all IPs with no recent failures (cleanup task).
Called periodically by the background task to prevent unbounded
growth of the tracking dictionary.
"""
now = time()
cutoff = now - self.window_seconds
ips_to_remove = []
for ip_address, failures in self._failures.items():
# Remove old failures
while failures and failures[0] < cutoff:
failures.popleft()
# Mark IP for removal if no failures remain
if not failures:
ips_to_remove.append(ip_address)
for ip_address in ips_to_remove:
del self._failures[ip_address]
if ips_to_remove:
log.debug("rate_limiter_cleanup", removed_ips=len(ips_to_remove))
def get_state(self) -> Mapping[str, int]:
"""Return a read-only view of current failure counts per IP.
For debugging and monitoring.
Returns:
A mapping of IP addresses to their failure counts.
"""
now = time()
cutoff = now - self.window_seconds
result = {}
for ip_address, failures in self._failures.items():
# Count non-expired failures
count = sum(1 for ts in failures if ts >= cutoff)
if count > 0:
result[ip_address] = count
return result
def reset(self) -> None:
"""Clear all tracked failures (for testing)."""
self._failures.clear()
# ---------------------------------------------------------------------------
# Penalty strategy for failed login attempts
# ---------------------------------------------------------------------------
def record_failure(self, ip_address: str) -> None:
"""Record a failed login attempt.
Tracks failures per IP to enable exponential backoff in is_allowed().
The penalty delay is automatically calculated in is_allowed() based on
the failure count, providing transparent brute-force resistance.
Args:
ip_address: The client IP address whose login attempt failed.
"""
ip_address = normalise_ip(ip_address)
now = time()
if ip_address not in self._failures:
self._failures[ip_address] = deque()
failures = self._failures[ip_address]
cutoff = now - self.window_seconds
# Remove old failures outside the window
while failures and failures[0] < cutoff:
failures.popleft()
# Record this failure
failures.append(now)
log = get_logger(__name__)
class GlobalRateLimiter:
"""Global per-IP request rate limiter using sliding window algorithm.
Tracks total request count within a configurable time window per IP address.
Unlike RateLimiter (which uses exponential backoff), this implements simple
This implements simple
request counting: when an IP exceeds the limit, the next request is blocked
until the oldest request in the window expires.

View File

@@ -11,7 +11,7 @@ import signal
from contextlib import contextmanager
from typing import TYPE_CHECKING
import structlog
from app.utils.logging_compat import get_logger
try:
from regexploit.ast.sre import SreOpParser
@@ -25,7 +25,7 @@ except ImportError:
if TYPE_CHECKING:
from collections.abc import Generator
logger = structlog.get_logger()
logger = get_logger(__name__)
# Constants for regex validation
MAX_REGEX_LENGTH = 1000

View File

@@ -53,7 +53,7 @@ import datetime
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any
import structlog
from app.utils.logging_compat import get_logger
from starlette.datastructures import State
from app.models.config import PendingRecovery
@@ -63,7 +63,7 @@ from app.utils.session_cache import InMemorySessionCache, NoOpSessionCache
if TYPE_CHECKING: # pragma: no cover
from app.config import Settings
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
ActivationRecord = dict[str, datetime.datetime]

View File

@@ -46,9 +46,9 @@ import time
from typing import Any
import aiosqlite
import structlog
from app.utils.logging_compat import get_logger
log: structlog.stdlib.BoundLogger = structlog.get_logger()
log = get_logger(__name__)
# Lock record expires if heartbeat hasn't been updated for this many seconds.
# This prevents stale locks from a crashed instance from blocking new startups.