"""Prometheus metrics collection for BanGUI backend. This module provides metrics collection for: - HTTP request count and latency per endpoint - Active concurrent requests - Custom application metrics (bans, jails, etc.) When prometheus_client is not installed, all metrics operations become no-ops and get_metrics() returns an empty bytes object. """ from __future__ import annotations from app.utils.logging_compat import get_logger log = get_logger(__name__) try: from prometheus_client import ( CONTENT_TYPE_LATEST, CollectorRegistry, Counter, Gauge, Histogram, Summary, generate_latest, ) from prometheus_client import CollectorRegistry as _CR _PROMETHEUS_AVAILABLE = True except ImportError: _PROMETHEUS_AVAILABLE = False CONTENT_TYPE_LATEST = "text/plain; charset=utf-8" Counter = Gauge = Histogram = Summary = object # dummy types for type hints CollectorRegistry = None generate_latest = lambda r: b"" __all__ = [ "get_metrics_registry", "get_metrics", "http_request_count", "http_request_latency", "http_active_requests", "bans_total", "jails_total", "fail2ban_connection_errors", "external_logging_init_failures", ] # Global registry _registry: "CollectorRegistry | None" = None def get_metrics_registry() -> "CollectorRegistry": """Get or create the global metrics registry.""" global _registry if _registry is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError( "prometheus_client is not installed — cannot create metrics registry" ) _registry = CollectorRegistry() return _registry # HTTP Metrics — created lazily so the module loads even without prometheus_client _http_request_count: "Counter | None" = None _http_request_latency: "Histogram | None" = None _http_active_requests: "Gauge | None" = None def _get_http_request_count() -> "Counter": global _http_request_count if _http_request_count is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _http_request_count = Counter( "bangui_http_requests_total", "Total HTTP requests by method, endpoint, and status code", ["method", "endpoint", "status_code"], registry=get_metrics_registry(), ) return _http_request_count def _get_http_request_latency() -> "Histogram": global _http_request_latency if _http_request_latency is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _http_request_latency = Histogram( "bangui_http_request_duration_seconds", "HTTP request latency in seconds by method and endpoint", ["method", "endpoint"], buckets=(0.005, 0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0), registry=get_metrics_registry(), ) return _http_request_latency def _get_http_active_requests() -> "Gauge": global _http_active_requests if _http_active_requests is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _http_active_requests = Gauge( "bangui_http_active_requests", "Current number of active HTTP requests by method and endpoint", ["method", "endpoint"], registry=get_metrics_registry(), ) return _http_active_requests class _NoOpCounter: def inc(self): pass def dec(self): pass class _NoOpHistogram: def observe(self, x): pass class _NoOpGauge: def inc(self): pass def dec(self): pass class _NoOpRequestCountProxy: def labels(self, method, endpoint, status_code): return _NoOpCounter() class _NoOpRequestLatencyProxy: def labels(self, method, endpoint): return _NoOpHistogram() class _NoOpActiveRequestsProxy: def labels(self, method, endpoint): return _NoOpGauge() http_request_count = _NoOpRequestCountProxy() http_request_latency = _NoOpRequestLatencyProxy() http_active_requests = _NoOpActiveRequestsProxy() # Replace with real implementations if prometheus is available if _PROMETHEUS_AVAILABLE: class _RealHttpRequestCount: def labels(self, **kw): return _get_http_request_count().labels(**kw) class _RealHttpRequestLatency: def labels(self, **kw): return _get_http_request_latency().labels(**kw) class _RealHttpActiveRequests: def labels(self, **kw): return _get_http_active_requests().labels(**kw) http_request_count = _RealHttpRequestCount() http_request_latency = _RealHttpRequestLatency() http_active_requests = _RealHttpActiveRequests() # Application Metrics — also lazily initialized _bans_total: "Gauge | None" = None _jails_total: "Gauge | None" = None _fail2ban_connection_errors: "Counter | None" = None _external_logging_init_failures: "Counter | None" = None _app_uptime: "Summary | None" = None def _get_bans_total() -> "Gauge": global _bans_total if _bans_total is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _bans_total = Gauge( "bangui_bans_total", "Total number of banned IPs across all jails", registry=get_metrics_registry(), ) return _bans_total def _get_jails_total() -> "Gauge": global _jails_total if _jails_total is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _jails_total = Gauge( "bangui_jails_total", "Total number of fail2ban jails", registry=get_metrics_registry(), ) return _jails_total def _get_fail2ban_connection_errors() -> "Counter": global _fail2ban_connection_errors if _fail2ban_connection_errors is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _fail2ban_connection_errors = Counter( "bangui_fail2ban_connection_errors_total", "Total number of fail2ban connection errors", registry=get_metrics_registry(), ) return _fail2ban_connection_errors def _get_external_logging_init_failures() -> "Counter": global _external_logging_init_failures if _external_logging_init_failures is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _external_logging_init_failures = Counter( "bangui_external_logging_init_failures_total", "Total number of external logging handler initialization failures", registry=get_metrics_registry(), ) return _external_logging_init_failures def _get_app_uptime() -> "Summary": global _app_uptime if _app_uptime is None: if not _PROMETHEUS_AVAILABLE: raise RuntimeError("prometheus_client not installed") _app_uptime = Summary( "bangui_uptime_seconds", "Application uptime in seconds", registry=get_metrics_registry(), ) return _app_uptime # No-op defaults when prometheus unavailable bans_total = type("G", (), {"inc": lambda self: None, "dec": lambda self: None, "set": lambda self, x: None})() jails_total = type("G", (), {"inc": lambda self: None, "dec": lambda self: None, "set": lambda self, x: None})() fail2ban_connection_errors = type("C", (), {"inc": lambda self: None})() external_logging_init_failures = type("C", (), {"inc": lambda self: None})() app_uptime = type("S", (), {"time": lambda self: None})() if _PROMETHEUS_AVAILABLE: class _RealBansTotal: def inc(self): _get_bans_total().inc() def dec(self): _get_bans_total().dec() def set(self, x): _get_bans_total().set(x) class _RealJailsTotal: def inc(self): _get_jails_total().inc() def dec(self): _get_jails_total().dec() def set(self, x): _get_jails_total().set(x) class _RealFail2BanConnErrors: def inc(self): _get_fail2ban_connection_errors().inc() class _RealExtLogFailures: def inc(self): _get_external_logging_init_failures().inc() class _RealAppUptime: def time(self): _get_app_uptime().time() bans_total = _RealBansTotal() jails_total = _RealJailsTotal() fail2ban_connection_errors = _RealFail2BanConnErrors() external_logging_init_failures = _RealExtLogFailures() app_uptime = _RealAppUptime() def get_metrics() -> bytes: """Get all collected metrics in Prometheus text format.""" if not _PROMETHEUS_AVAILABLE: return b"[metrics unavailable - prometheus_client not installed]" return generate_latest(get_metrics_registry()) def get_metrics_content_type() -> str: """Get the correct Content-Type for Prometheus metrics.""" return CONTENT_TYPE_LATEST