359 lines
10 KiB
Python
359 lines
10 KiB
Python
"""Metrics collection for Prometheus and custom business metrics."""
|
|
|
|
import logging
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from threading import Lock
|
|
from typing import Any, Dict, Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class MetricType(Enum):
|
|
"""Types of metrics."""
|
|
|
|
COUNTER = "counter"
|
|
GAUGE = "gauge"
|
|
HISTOGRAM = "histogram"
|
|
SUMMARY = "summary"
|
|
|
|
|
|
@dataclass
|
|
class MetricValue:
|
|
"""A single metric value with metadata."""
|
|
|
|
name: str
|
|
value: float
|
|
metric_type: MetricType
|
|
labels: Dict[str, str] = field(default_factory=dict)
|
|
timestamp: datetime = field(default_factory=datetime.now)
|
|
help_text: str = ""
|
|
|
|
|
|
@dataclass
|
|
class HistogramBucket:
|
|
"""Histogram bucket for latency tracking."""
|
|
|
|
le: float # bucket upper bound in seconds
|
|
count: int = 0
|
|
|
|
|
|
class MetricsCollector:
|
|
"""Collect and export metrics for monitoring."""
|
|
|
|
def __init__(self):
|
|
"""Initialize metrics collector."""
|
|
self._metrics: Dict[str, MetricValue] = {}
|
|
self._request_timings: Dict[str, list[float]] = {}
|
|
self._download_stats: Dict[str, int] = {
|
|
"completed": 0,
|
|
"failed": 0,
|
|
"total_size_bytes": 0,
|
|
}
|
|
self._lock = Lock()
|
|
self._timers: Dict[str, float] = {}
|
|
|
|
def increment_counter(
|
|
self,
|
|
name: str,
|
|
value: float = 1.0,
|
|
labels: Optional[Dict[str, str]] = None,
|
|
help_text: str = "",
|
|
) -> None:
|
|
"""Increment a counter metric.
|
|
|
|
Args:
|
|
name: Metric name.
|
|
value: Amount to increment by.
|
|
labels: Optional labels for the metric.
|
|
help_text: Optional help text describing the metric.
|
|
"""
|
|
with self._lock:
|
|
if name not in self._metrics:
|
|
self._metrics[name] = MetricValue(
|
|
name=name,
|
|
value=value,
|
|
metric_type=MetricType.COUNTER,
|
|
labels=labels or {},
|
|
help_text=help_text,
|
|
)
|
|
else:
|
|
self._metrics[name].value += value
|
|
|
|
def set_gauge(
|
|
self,
|
|
name: str,
|
|
value: float,
|
|
labels: Optional[Dict[str, str]] = None,
|
|
help_text: str = "",
|
|
) -> None:
|
|
"""Set a gauge metric.
|
|
|
|
Args:
|
|
name: Metric name.
|
|
value: Gauge value.
|
|
labels: Optional labels for the metric.
|
|
help_text: Optional help text describing the metric.
|
|
"""
|
|
with self._lock:
|
|
self._metrics[name] = MetricValue(
|
|
name=name,
|
|
value=value,
|
|
metric_type=MetricType.GAUGE,
|
|
labels=labels or {},
|
|
help_text=help_text,
|
|
)
|
|
|
|
def observe_histogram(
|
|
self,
|
|
name: str,
|
|
value: float,
|
|
labels: Optional[Dict[str, str]] = None,
|
|
help_text: str = "",
|
|
) -> None:
|
|
"""Observe a value for histogram.
|
|
|
|
Args:
|
|
name: Metric name.
|
|
value: Value to record.
|
|
labels: Optional labels for the metric.
|
|
help_text: Optional help text describing the metric.
|
|
"""
|
|
with self._lock:
|
|
if name not in self._request_timings:
|
|
self._request_timings[name] = []
|
|
self._request_timings[name].append(value)
|
|
|
|
# Update histogram metric
|
|
if name not in self._metrics:
|
|
self._metrics[name] = MetricValue(
|
|
name=name,
|
|
value=value,
|
|
metric_type=MetricType.HISTOGRAM,
|
|
labels=labels or {},
|
|
help_text=help_text,
|
|
)
|
|
|
|
def start_timer(self, timer_name: str) -> None:
|
|
"""Start a timer for tracking operation duration.
|
|
|
|
Args:
|
|
timer_name: Name of the timer.
|
|
"""
|
|
self._timers[timer_name] = time.time()
|
|
|
|
def end_timer(
|
|
self,
|
|
timer_name: str,
|
|
metric_name: str,
|
|
labels: Optional[Dict[str, str]] = None,
|
|
) -> float:
|
|
"""End a timer and record the duration.
|
|
|
|
Args:
|
|
timer_name: Name of the timer to end.
|
|
metric_name: Name of the metric to record.
|
|
labels: Optional labels for the metric.
|
|
|
|
Returns:
|
|
Duration in seconds.
|
|
"""
|
|
if timer_name not in self._timers:
|
|
logger.warning("Timer %s not started", timer_name)
|
|
return 0.0
|
|
|
|
duration = time.time() - self._timers[timer_name]
|
|
del self._timers[timer_name]
|
|
|
|
self.observe_histogram(
|
|
metric_name, duration, labels, "Request/operation duration"
|
|
)
|
|
return duration
|
|
|
|
def record_download_success(self, size_bytes: int) -> None:
|
|
"""Record a successful download.
|
|
|
|
Args:
|
|
size_bytes: Size of downloaded file in bytes.
|
|
"""
|
|
with self._lock:
|
|
self._download_stats["completed"] += 1
|
|
self._download_stats["total_size_bytes"] += size_bytes
|
|
|
|
self.increment_counter(
|
|
"downloads_completed_total",
|
|
help_text="Total successful downloads",
|
|
)
|
|
|
|
def record_download_failure(self) -> None:
|
|
"""Record a failed download."""
|
|
with self._lock:
|
|
self._download_stats["failed"] += 1
|
|
|
|
self.increment_counter(
|
|
"downloads_failed_total", help_text="Total failed downloads"
|
|
)
|
|
|
|
def get_download_stats(self) -> Dict[str, int]:
|
|
"""Get download statistics.
|
|
|
|
Returns:
|
|
dict: Download statistics.
|
|
"""
|
|
with self._lock:
|
|
return self._download_stats.copy()
|
|
|
|
def get_request_statistics(
|
|
self, metric_name: str
|
|
) -> Optional[Dict[str, float]]:
|
|
"""Get statistics for a request timing metric.
|
|
|
|
Args:
|
|
metric_name: Name of the metric to analyze.
|
|
|
|
Returns:
|
|
Statistics including count, sum, mean, min, max.
|
|
"""
|
|
with self._lock:
|
|
if metric_name not in self._request_timings:
|
|
return None
|
|
|
|
timings = self._request_timings[metric_name]
|
|
if not timings:
|
|
return None
|
|
|
|
return {
|
|
"count": len(timings),
|
|
"sum": sum(timings),
|
|
"mean": sum(timings) / len(timings),
|
|
"min": min(timings),
|
|
"max": max(timings),
|
|
"p50": sorted(timings)[len(timings) // 2],
|
|
"p99": sorted(timings)[int(len(timings) * 0.99)],
|
|
}
|
|
|
|
def export_prometheus_format(self) -> str:
|
|
"""Export metrics in Prometheus text format.
|
|
|
|
Returns:
|
|
str: Prometheus format metrics.
|
|
"""
|
|
with self._lock:
|
|
lines = []
|
|
|
|
for name, metric in self._metrics.items():
|
|
# Add help text if available
|
|
if metric.help_text:
|
|
lines.append(f"# HELP {name} {metric.help_text}")
|
|
lines.append(f"# TYPE {name} {metric.metric_type.value}")
|
|
|
|
# Format labels
|
|
label_str = ""
|
|
if metric.labels:
|
|
label_pairs = [
|
|
f'{k}="{v}"' for k, v in metric.labels.items()
|
|
]
|
|
label_str = "{" + ",".join(label_pairs) + "}"
|
|
|
|
# Add metric value
|
|
lines.append(f"{name}{label_str} {metric.value}")
|
|
|
|
return "\n".join(lines)
|
|
|
|
def export_json(self) -> Dict[str, Any]:
|
|
"""Export metrics as JSON.
|
|
|
|
Returns:
|
|
dict: Metrics in JSON-serializable format.
|
|
"""
|
|
with self._lock:
|
|
metrics_dict = {}
|
|
|
|
for name, metric in self._metrics.items():
|
|
metrics_dict[name] = {
|
|
"value": metric.value,
|
|
"type": metric.metric_type.value,
|
|
"labels": metric.labels,
|
|
"timestamp": metric.timestamp.isoformat(),
|
|
}
|
|
|
|
return {
|
|
"metrics": metrics_dict,
|
|
"downloads": self._download_stats,
|
|
"request_timings": {
|
|
name: self.get_request_statistics(name)
|
|
for name in self._request_timings
|
|
},
|
|
}
|
|
|
|
def reset_metrics(self) -> None:
|
|
"""Reset all collected metrics."""
|
|
with self._lock:
|
|
self._metrics.clear()
|
|
self._request_timings.clear()
|
|
self._download_stats = {
|
|
"completed": 0,
|
|
"failed": 0,
|
|
"total_size_bytes": 0,
|
|
}
|
|
|
|
def get_all_metrics(self) -> Dict[str, MetricValue]:
|
|
"""Get all collected metrics.
|
|
|
|
Returns:
|
|
dict: All metrics keyed by name.
|
|
"""
|
|
with self._lock:
|
|
return self._metrics.copy()
|
|
|
|
|
|
# Global metrics collector instance
|
|
_metrics_collector: Optional[MetricsCollector] = None
|
|
|
|
|
|
def get_metrics_collector() -> MetricsCollector:
|
|
"""Get or create the global metrics collector instance.
|
|
|
|
Returns:
|
|
MetricsCollector: The metrics collector instance.
|
|
"""
|
|
global _metrics_collector
|
|
if _metrics_collector is None:
|
|
_metrics_collector = MetricsCollector()
|
|
return _metrics_collector
|
|
|
|
|
|
class TimerContext:
|
|
"""Context manager for timing operations."""
|
|
|
|
def __init__(
|
|
self,
|
|
metric_name: str,
|
|
timer_name: Optional[str] = None,
|
|
labels: Optional[Dict[str, str]] = None,
|
|
):
|
|
"""Initialize timer context.
|
|
|
|
Args:
|
|
metric_name: Name of the metric to record.
|
|
timer_name: Optional name for the timer.
|
|
labels: Optional labels for the metric.
|
|
"""
|
|
self.metric_name = metric_name
|
|
self.timer_name = timer_name or metric_name
|
|
self.labels = labels
|
|
self.collector = get_metrics_collector()
|
|
|
|
def __enter__(self):
|
|
"""Start the timer."""
|
|
self.collector.start_timer(self.timer_name)
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
"""End the timer and record the metric."""
|
|
self.collector.end_timer(
|
|
self.timer_name, self.metric_name, self.labels
|
|
)
|