- Remove structlog dependency from backend/pyproject.toml - Add app.utils.logging_compat shim for keyword-arg logging API - Add app.utils.json_formatter for JSON log output with extra fields - Update all backend modules to use logging_compat.get_logger() - Update docstrings in log_sanitizer.py and json_formatter.py - Update test comment in test_async_utils.py - Record 406 failing tests in Docs/Tasks.md for tracking
272 lines
8.8 KiB
Python
272 lines
8.8 KiB
Python
"""Prometheus metrics collection for BanGUI backend.
|
|
|
|
This module provides metrics collection for:
|
|
- HTTP request count and latency per endpoint
|
|
- Active concurrent requests
|
|
- Custom application metrics (bans, jails, etc.)
|
|
|
|
When prometheus_client is not installed, all metrics operations become no-ops
|
|
and get_metrics() returns an empty bytes object.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from app.utils.logging_compat import get_logger
|
|
|
|
log = get_logger(__name__)
|
|
|
|
try:
|
|
from prometheus_client import (
|
|
CONTENT_TYPE_LATEST,
|
|
CollectorRegistry,
|
|
Counter,
|
|
Gauge,
|
|
Histogram,
|
|
Summary,
|
|
generate_latest,
|
|
)
|
|
from prometheus_client import CollectorRegistry as _CR
|
|
|
|
_PROMETHEUS_AVAILABLE = True
|
|
except ImportError:
|
|
_PROMETHEUS_AVAILABLE = False
|
|
CONTENT_TYPE_LATEST = "text/plain; charset=utf-8"
|
|
Counter = Gauge = Histogram = Summary = object # dummy types for type hints
|
|
CollectorRegistry = None
|
|
generate_latest = lambda r: b""
|
|
|
|
__all__ = [
|
|
"get_metrics_registry",
|
|
"get_metrics",
|
|
"http_request_count",
|
|
"http_request_latency",
|
|
"http_active_requests",
|
|
"bans_total",
|
|
"jails_total",
|
|
"fail2ban_connection_errors",
|
|
"external_logging_init_failures",
|
|
]
|
|
|
|
# Global registry
|
|
_registry: "CollectorRegistry | None" = None
|
|
|
|
|
|
def get_metrics_registry() -> "CollectorRegistry":
|
|
"""Get or create the global metrics registry."""
|
|
global _registry
|
|
if _registry is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError(
|
|
"prometheus_client is not installed — cannot create metrics registry"
|
|
)
|
|
_registry = CollectorRegistry()
|
|
return _registry
|
|
|
|
|
|
# HTTP Metrics — created lazily so the module loads even without prometheus_client
|
|
|
|
_http_request_count: "Counter | None" = None
|
|
_http_request_latency: "Histogram | None" = None
|
|
_http_active_requests: "Gauge | None" = None
|
|
|
|
|
|
def _get_http_request_count() -> "Counter":
|
|
global _http_request_count
|
|
if _http_request_count is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_http_request_count = Counter(
|
|
"bangui_http_requests_total",
|
|
"Total HTTP requests by method, endpoint, and status code",
|
|
["method", "endpoint", "status_code"],
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _http_request_count
|
|
|
|
|
|
def _get_http_request_latency() -> "Histogram":
|
|
global _http_request_latency
|
|
if _http_request_latency is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_http_request_latency = Histogram(
|
|
"bangui_http_request_duration_seconds",
|
|
"HTTP request latency in seconds by method and endpoint",
|
|
["method", "endpoint"],
|
|
buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0),
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _http_request_latency
|
|
|
|
|
|
def _get_http_active_requests() -> "Gauge":
|
|
global _http_active_requests
|
|
if _http_active_requests is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_http_active_requests = Gauge(
|
|
"bangui_http_active_requests",
|
|
"Current number of active HTTP requests by method and endpoint",
|
|
["method", "endpoint"],
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _http_active_requests
|
|
|
|
|
|
class _NoOpCounter:
|
|
def inc(self): pass
|
|
def dec(self): pass
|
|
|
|
class _NoOpHistogram:
|
|
def observe(self, x): pass
|
|
|
|
class _NoOpGauge:
|
|
def inc(self): pass
|
|
def dec(self): pass
|
|
|
|
class _NoOpRequestCountProxy:
|
|
def labels(self, method, endpoint, status_code):
|
|
return _NoOpCounter()
|
|
|
|
class _NoOpRequestLatencyProxy:
|
|
def labels(self, method, endpoint):
|
|
return _NoOpHistogram()
|
|
|
|
class _NoOpActiveRequestsProxy:
|
|
def labels(self, method, endpoint):
|
|
return _NoOpGauge()
|
|
|
|
http_request_count = _NoOpRequestCountProxy()
|
|
http_request_latency = _NoOpRequestLatencyProxy()
|
|
http_active_requests = _NoOpActiveRequestsProxy()
|
|
|
|
# Replace with real implementations if prometheus is available
|
|
if _PROMETHEUS_AVAILABLE:
|
|
class _RealHttpRequestCount:
|
|
def labels(self, **kw):
|
|
return _get_http_request_count().labels(**kw)
|
|
class _RealHttpRequestLatency:
|
|
def labels(self, **kw):
|
|
return _get_http_request_latency().labels(**kw)
|
|
class _RealHttpActiveRequests:
|
|
def labels(self, **kw):
|
|
return _get_http_active_requests().labels(**kw)
|
|
http_request_count = _RealHttpRequestCount()
|
|
http_request_latency = _RealHttpRequestLatency()
|
|
http_active_requests = _RealHttpActiveRequests()
|
|
|
|
|
|
# Application Metrics — also lazily initialized
|
|
|
|
_bans_total: "Gauge | None" = None
|
|
_jails_total: "Gauge | None" = None
|
|
_fail2ban_connection_errors: "Counter | None" = None
|
|
_external_logging_init_failures: "Counter | None" = None
|
|
_app_uptime: "Summary | None" = None
|
|
|
|
|
|
def _get_bans_total() -> "Gauge":
|
|
global _bans_total
|
|
if _bans_total is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_bans_total = Gauge(
|
|
"bangui_bans_total",
|
|
"Total number of banned IPs across all jails",
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _bans_total
|
|
|
|
|
|
def _get_jails_total() -> "Gauge":
|
|
global _jails_total
|
|
if _jails_total is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_jails_total = Gauge(
|
|
"bangui_jails_total",
|
|
"Total number of fail2ban jails",
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _jails_total
|
|
|
|
|
|
def _get_fail2ban_connection_errors() -> "Counter":
|
|
global _fail2ban_connection_errors
|
|
if _fail2ban_connection_errors is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_fail2ban_connection_errors = Counter(
|
|
"bangui_fail2ban_connection_errors_total",
|
|
"Total number of fail2ban connection errors",
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _fail2ban_connection_errors
|
|
|
|
|
|
def _get_external_logging_init_failures() -> "Counter":
|
|
global _external_logging_init_failures
|
|
if _external_logging_init_failures is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_external_logging_init_failures = Counter(
|
|
"bangui_external_logging_init_failures_total",
|
|
"Total number of external logging handler initialization failures",
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _external_logging_init_failures
|
|
|
|
|
|
def _get_app_uptime() -> "Summary":
|
|
global _app_uptime
|
|
if _app_uptime is None:
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
raise RuntimeError("prometheus_client not installed")
|
|
_app_uptime = Summary(
|
|
"bangui_uptime_seconds",
|
|
"Application uptime in seconds",
|
|
registry=get_metrics_registry(),
|
|
)
|
|
return _app_uptime
|
|
|
|
|
|
# No-op defaults when prometheus unavailable
|
|
bans_total = type("G", (), {"inc": lambda self: None, "dec": lambda self: None, "set": lambda self, x: None})()
|
|
jails_total = type("G", (), {"inc": lambda self: None, "dec": lambda self: None, "set": lambda self, x: None})()
|
|
fail2ban_connection_errors = type("C", (), {"inc": lambda self: None})()
|
|
external_logging_init_failures = type("C", (), {"inc": lambda self: None})()
|
|
app_uptime = type("S", (), {"time": lambda self: None})()
|
|
|
|
if _PROMETHEUS_AVAILABLE:
|
|
class _RealBansTotal:
|
|
def inc(self): _get_bans_total().inc()
|
|
def dec(self): _get_bans_total().dec()
|
|
def set(self, x): _get_bans_total().set(x)
|
|
class _RealJailsTotal:
|
|
def inc(self): _get_jails_total().inc()
|
|
def dec(self): _get_jails_total().dec()
|
|
def set(self, x): _get_jails_total().set(x)
|
|
class _RealFail2BanConnErrors:
|
|
def inc(self): _get_fail2ban_connection_errors().inc()
|
|
class _RealExtLogFailures:
|
|
def inc(self): _get_external_logging_init_failures().inc()
|
|
class _RealAppUptime:
|
|
def time(self): _get_app_uptime().time()
|
|
bans_total = _RealBansTotal()
|
|
jails_total = _RealJailsTotal()
|
|
fail2ban_connection_errors = _RealFail2BanConnErrors()
|
|
external_logging_init_failures = _RealExtLogFailures()
|
|
app_uptime = _RealAppUptime()
|
|
|
|
|
|
def get_metrics() -> bytes:
|
|
"""Get all collected metrics in Prometheus text format."""
|
|
if not _PROMETHEUS_AVAILABLE:
|
|
return b"[metrics unavailable - prometheus_client not installed]"
|
|
return generate_latest(get_metrics_registry())
|
|
|
|
|
|
def get_metrics_content_type() -> str:
|
|
"""Get the correct Content-Type for Prometheus metrics."""
|
|
return CONTENT_TYPE_LATEST
|