backup
This commit is contained in:
358
src/server/utils/metrics.py
Normal file
358
src/server/utils/metrics.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""Metrics collection for Prometheus and custom business metrics."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetricType(Enum):
|
||||
"""Types of metrics."""
|
||||
|
||||
COUNTER = "counter"
|
||||
GAUGE = "gauge"
|
||||
HISTOGRAM = "histogram"
|
||||
SUMMARY = "summary"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricValue:
|
||||
"""A single metric value with metadata."""
|
||||
|
||||
name: str
|
||||
value: float
|
||||
metric_type: MetricType
|
||||
labels: Dict[str, str] = field(default_factory=dict)
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
help_text: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistogramBucket:
|
||||
"""Histogram bucket for latency tracking."""
|
||||
|
||||
le: float # bucket upper bound in seconds
|
||||
count: int = 0
|
||||
|
||||
|
||||
class MetricsCollector:
|
||||
"""Collect and export metrics for monitoring."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize metrics collector."""
|
||||
self._metrics: Dict[str, MetricValue] = {}
|
||||
self._request_timings: Dict[str, list[float]] = {}
|
||||
self._download_stats: Dict[str, int] = {
|
||||
"completed": 0,
|
||||
"failed": 0,
|
||||
"total_size_bytes": 0,
|
||||
}
|
||||
self._lock = Lock()
|
||||
self._timers: Dict[str, float] = {}
|
||||
|
||||
def increment_counter(
|
||||
self,
|
||||
name: str,
|
||||
value: float = 1.0,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
help_text: str = "",
|
||||
) -> None:
|
||||
"""Increment a counter metric.
|
||||
|
||||
Args:
|
||||
name: Metric name.
|
||||
value: Amount to increment by.
|
||||
labels: Optional labels for the metric.
|
||||
help_text: Optional help text describing the metric.
|
||||
"""
|
||||
with self._lock:
|
||||
if name not in self._metrics:
|
||||
self._metrics[name] = MetricValue(
|
||||
name=name,
|
||||
value=value,
|
||||
metric_type=MetricType.COUNTER,
|
||||
labels=labels or {},
|
||||
help_text=help_text,
|
||||
)
|
||||
else:
|
||||
self._metrics[name].value += value
|
||||
|
||||
def set_gauge(
|
||||
self,
|
||||
name: str,
|
||||
value: float,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
help_text: str = "",
|
||||
) -> None:
|
||||
"""Set a gauge metric.
|
||||
|
||||
Args:
|
||||
name: Metric name.
|
||||
value: Gauge value.
|
||||
labels: Optional labels for the metric.
|
||||
help_text: Optional help text describing the metric.
|
||||
"""
|
||||
with self._lock:
|
||||
self._metrics[name] = MetricValue(
|
||||
name=name,
|
||||
value=value,
|
||||
metric_type=MetricType.GAUGE,
|
||||
labels=labels or {},
|
||||
help_text=help_text,
|
||||
)
|
||||
|
||||
def observe_histogram(
|
||||
self,
|
||||
name: str,
|
||||
value: float,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
help_text: str = "",
|
||||
) -> None:
|
||||
"""Observe a value for histogram.
|
||||
|
||||
Args:
|
||||
name: Metric name.
|
||||
value: Value to record.
|
||||
labels: Optional labels for the metric.
|
||||
help_text: Optional help text describing the metric.
|
||||
"""
|
||||
with self._lock:
|
||||
if name not in self._request_timings:
|
||||
self._request_timings[name] = []
|
||||
self._request_timings[name].append(value)
|
||||
|
||||
# Update histogram metric
|
||||
if name not in self._metrics:
|
||||
self._metrics[name] = MetricValue(
|
||||
name=name,
|
||||
value=value,
|
||||
metric_type=MetricType.HISTOGRAM,
|
||||
labels=labels or {},
|
||||
help_text=help_text,
|
||||
)
|
||||
|
||||
def start_timer(self, timer_name: str) -> None:
|
||||
"""Start a timer for tracking operation duration.
|
||||
|
||||
Args:
|
||||
timer_name: Name of the timer.
|
||||
"""
|
||||
self._timers[timer_name] = time.time()
|
||||
|
||||
def end_timer(
|
||||
self,
|
||||
timer_name: str,
|
||||
metric_name: str,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
) -> float:
|
||||
"""End a timer and record the duration.
|
||||
|
||||
Args:
|
||||
timer_name: Name of the timer to end.
|
||||
metric_name: Name of the metric to record.
|
||||
labels: Optional labels for the metric.
|
||||
|
||||
Returns:
|
||||
Duration in seconds.
|
||||
"""
|
||||
if timer_name not in self._timers:
|
||||
logger.warning(f"Timer {timer_name} not started")
|
||||
return 0.0
|
||||
|
||||
duration = time.time() - self._timers[timer_name]
|
||||
del self._timers[timer_name]
|
||||
|
||||
self.observe_histogram(
|
||||
metric_name, duration, labels, "Request/operation duration"
|
||||
)
|
||||
return duration
|
||||
|
||||
def record_download_success(self, size_bytes: int) -> None:
|
||||
"""Record a successful download.
|
||||
|
||||
Args:
|
||||
size_bytes: Size of downloaded file in bytes.
|
||||
"""
|
||||
with self._lock:
|
||||
self._download_stats["completed"] += 1
|
||||
self._download_stats["total_size_bytes"] += size_bytes
|
||||
|
||||
self.increment_counter(
|
||||
"downloads_completed_total",
|
||||
help_text="Total successful downloads",
|
||||
)
|
||||
|
||||
def record_download_failure(self) -> None:
|
||||
"""Record a failed download."""
|
||||
with self._lock:
|
||||
self._download_stats["failed"] += 1
|
||||
|
||||
self.increment_counter(
|
||||
"downloads_failed_total", help_text="Total failed downloads"
|
||||
)
|
||||
|
||||
def get_download_stats(self) -> Dict[str, int]:
|
||||
"""Get download statistics.
|
||||
|
||||
Returns:
|
||||
dict: Download statistics.
|
||||
"""
|
||||
with self._lock:
|
||||
return self._download_stats.copy()
|
||||
|
||||
def get_request_statistics(
|
||||
self, metric_name: str
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""Get statistics for a request timing metric.
|
||||
|
||||
Args:
|
||||
metric_name: Name of the metric to analyze.
|
||||
|
||||
Returns:
|
||||
Statistics including count, sum, mean, min, max.
|
||||
"""
|
||||
with self._lock:
|
||||
if metric_name not in self._request_timings:
|
||||
return None
|
||||
|
||||
timings = self._request_timings[metric_name]
|
||||
if not timings:
|
||||
return None
|
||||
|
||||
return {
|
||||
"count": len(timings),
|
||||
"sum": sum(timings),
|
||||
"mean": sum(timings) / len(timings),
|
||||
"min": min(timings),
|
||||
"max": max(timings),
|
||||
"p50": sorted(timings)[len(timings) // 2],
|
||||
"p99": sorted(timings)[int(len(timings) * 0.99)],
|
||||
}
|
||||
|
||||
def export_prometheus_format(self) -> str:
|
||||
"""Export metrics in Prometheus text format.
|
||||
|
||||
Returns:
|
||||
str: Prometheus format metrics.
|
||||
"""
|
||||
with self._lock:
|
||||
lines = []
|
||||
|
||||
for name, metric in self._metrics.items():
|
||||
# Add help text if available
|
||||
if metric.help_text:
|
||||
lines.append(f"# HELP {name} {metric.help_text}")
|
||||
lines.append(f"# TYPE {name} {metric.metric_type.value}")
|
||||
|
||||
# Format labels
|
||||
label_str = ""
|
||||
if metric.labels:
|
||||
label_pairs = [
|
||||
f'{k}="{v}"' for k, v in metric.labels.items()
|
||||
]
|
||||
label_str = "{" + ",".join(label_pairs) + "}"
|
||||
|
||||
# Add metric value
|
||||
lines.append(f"{name}{label_str} {metric.value}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def export_json(self) -> Dict[str, Any]:
|
||||
"""Export metrics as JSON.
|
||||
|
||||
Returns:
|
||||
dict: Metrics in JSON-serializable format.
|
||||
"""
|
||||
with self._lock:
|
||||
metrics_dict = {}
|
||||
|
||||
for name, metric in self._metrics.items():
|
||||
metrics_dict[name] = {
|
||||
"value": metric.value,
|
||||
"type": metric.metric_type.value,
|
||||
"labels": metric.labels,
|
||||
"timestamp": metric.timestamp.isoformat(),
|
||||
}
|
||||
|
||||
return {
|
||||
"metrics": metrics_dict,
|
||||
"downloads": self._download_stats,
|
||||
"request_timings": {
|
||||
name: self.get_request_statistics(name)
|
||||
for name in self._request_timings
|
||||
},
|
||||
}
|
||||
|
||||
def reset_metrics(self) -> None:
|
||||
"""Reset all collected metrics."""
|
||||
with self._lock:
|
||||
self._metrics.clear()
|
||||
self._request_timings.clear()
|
||||
self._download_stats = {
|
||||
"completed": 0,
|
||||
"failed": 0,
|
||||
"total_size_bytes": 0,
|
||||
}
|
||||
|
||||
def get_all_metrics(self) -> Dict[str, MetricValue]:
|
||||
"""Get all collected metrics.
|
||||
|
||||
Returns:
|
||||
dict: All metrics keyed by name.
|
||||
"""
|
||||
with self._lock:
|
||||
return self._metrics.copy()
|
||||
|
||||
|
||||
# Global metrics collector instance
|
||||
_metrics_collector: Optional[MetricsCollector] = None
|
||||
|
||||
|
||||
def get_metrics_collector() -> MetricsCollector:
|
||||
"""Get or create the global metrics collector instance.
|
||||
|
||||
Returns:
|
||||
MetricsCollector: The metrics collector instance.
|
||||
"""
|
||||
global _metrics_collector
|
||||
if _metrics_collector is None:
|
||||
_metrics_collector = MetricsCollector()
|
||||
return _metrics_collector
|
||||
|
||||
|
||||
class TimerContext:
|
||||
"""Context manager for timing operations."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
metric_name: str,
|
||||
timer_name: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""Initialize timer context.
|
||||
|
||||
Args:
|
||||
metric_name: Name of the metric to record.
|
||||
timer_name: Optional name for the timer.
|
||||
labels: Optional labels for the metric.
|
||||
"""
|
||||
self.metric_name = metric_name
|
||||
self.timer_name = timer_name or metric_name
|
||||
self.labels = labels
|
||||
self.collector = get_metrics_collector()
|
||||
|
||||
def __enter__(self):
|
||||
"""Start the timer."""
|
||||
self.collector.start_timer(self.timer_name)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""End the timer and record the metric."""
|
||||
self.collector.end_timer(
|
||||
self.timer_name, self.metric_name, self.labels
|
||||
)
|
||||
Reference in New Issue
Block a user