"""Metrics collection for Prometheus and custom business metrics.""" import logging import time from dataclasses import dataclass, field from datetime import datetime from enum import Enum from threading import Lock from typing import Any, Dict, Optional logger = logging.getLogger(__name__) class MetricType(Enum): """Types of metrics.""" COUNTER = "counter" GAUGE = "gauge" HISTOGRAM = "histogram" SUMMARY = "summary" @dataclass class MetricValue: """A single metric value with metadata.""" name: str value: float metric_type: MetricType labels: Dict[str, str] = field(default_factory=dict) timestamp: datetime = field(default_factory=datetime.now) help_text: str = "" @dataclass class HistogramBucket: """Histogram bucket for latency tracking.""" le: float # bucket upper bound in seconds count: int = 0 class MetricsCollector: """Collect and export metrics for monitoring.""" def __init__(self): """Initialize metrics collector.""" self._metrics: Dict[str, MetricValue] = {} self._request_timings: Dict[str, list[float]] = {} self._download_stats: Dict[str, int] = { "completed": 0, "failed": 0, "total_size_bytes": 0, } self._lock = Lock() self._timers: Dict[str, float] = {} def increment_counter( self, name: str, value: float = 1.0, labels: Optional[Dict[str, str]] = None, help_text: str = "", ) -> None: """Increment a counter metric. Args: name: Metric name. value: Amount to increment by. labels: Optional labels for the metric. help_text: Optional help text describing the metric. """ with self._lock: if name not in self._metrics: self._metrics[name] = MetricValue( name=name, value=value, metric_type=MetricType.COUNTER, labels=labels or {}, help_text=help_text, ) else: self._metrics[name].value += value def set_gauge( self, name: str, value: float, labels: Optional[Dict[str, str]] = None, help_text: str = "", ) -> None: """Set a gauge metric. Args: name: Metric name. value: Gauge value. labels: Optional labels for the metric. help_text: Optional help text describing the metric. """ with self._lock: self._metrics[name] = MetricValue( name=name, value=value, metric_type=MetricType.GAUGE, labels=labels or {}, help_text=help_text, ) def observe_histogram( self, name: str, value: float, labels: Optional[Dict[str, str]] = None, help_text: str = "", ) -> None: """Observe a value for histogram. Args: name: Metric name. value: Value to record. labels: Optional labels for the metric. help_text: Optional help text describing the metric. """ with self._lock: if name not in self._request_timings: self._request_timings[name] = [] self._request_timings[name].append(value) # Update histogram metric if name not in self._metrics: self._metrics[name] = MetricValue( name=name, value=value, metric_type=MetricType.HISTOGRAM, labels=labels or {}, help_text=help_text, ) def start_timer(self, timer_name: str) -> None: """Start a timer for tracking operation duration. Args: timer_name: Name of the timer. """ self._timers[timer_name] = time.time() def end_timer( self, timer_name: str, metric_name: str, labels: Optional[Dict[str, str]] = None, ) -> float: """End a timer and record the duration. Args: timer_name: Name of the timer to end. metric_name: Name of the metric to record. labels: Optional labels for the metric. Returns: Duration in seconds. """ if timer_name not in self._timers: logger.warning("Timer %s not started", timer_name) return 0.0 duration = time.time() - self._timers[timer_name] del self._timers[timer_name] self.observe_histogram( metric_name, duration, labels, "Request/operation duration" ) return duration def record_download_success(self, size_bytes: int) -> None: """Record a successful download. Args: size_bytes: Size of downloaded file in bytes. """ with self._lock: self._download_stats["completed"] += 1 self._download_stats["total_size_bytes"] += size_bytes self.increment_counter( "downloads_completed_total", help_text="Total successful downloads", ) def record_download_failure(self) -> None: """Record a failed download.""" with self._lock: self._download_stats["failed"] += 1 self.increment_counter( "downloads_failed_total", help_text="Total failed downloads" ) def get_download_stats(self) -> Dict[str, int]: """Get download statistics. Returns: dict: Download statistics. """ with self._lock: return self._download_stats.copy() def get_request_statistics( self, metric_name: str ) -> Optional[Dict[str, float]]: """Get statistics for a request timing metric. Args: metric_name: Name of the metric to analyze. Returns: Statistics including count, sum, mean, min, max. """ with self._lock: if metric_name not in self._request_timings: return None timings = self._request_timings[metric_name] if not timings: return None return { "count": len(timings), "sum": sum(timings), "mean": sum(timings) / len(timings), "min": min(timings), "max": max(timings), "p50": sorted(timings)[len(timings) // 2], "p99": sorted(timings)[int(len(timings) * 0.99)], } def export_prometheus_format(self) -> str: """Export metrics in Prometheus text format. Returns: str: Prometheus format metrics. """ with self._lock: lines = [] for name, metric in self._metrics.items(): # Add help text if available if metric.help_text: lines.append(f"# HELP {name} {metric.help_text}") lines.append(f"# TYPE {name} {metric.metric_type.value}") # Format labels label_str = "" if metric.labels: label_pairs = [ f'{k}="{v}"' for k, v in metric.labels.items() ] label_str = "{" + ",".join(label_pairs) + "}" # Add metric value lines.append(f"{name}{label_str} {metric.value}") return "\n".join(lines) def export_json(self) -> Dict[str, Any]: """Export metrics as JSON. Returns: dict: Metrics in JSON-serializable format. """ with self._lock: metrics_dict = {} for name, metric in self._metrics.items(): metrics_dict[name] = { "value": metric.value, "type": metric.metric_type.value, "labels": metric.labels, "timestamp": metric.timestamp.isoformat(), } return { "metrics": metrics_dict, "downloads": self._download_stats, "request_timings": { name: self.get_request_statistics(name) for name in self._request_timings }, } def reset_metrics(self) -> None: """Reset all collected metrics.""" with self._lock: self._metrics.clear() self._request_timings.clear() self._download_stats = { "completed": 0, "failed": 0, "total_size_bytes": 0, } def get_all_metrics(self) -> Dict[str, MetricValue]: """Get all collected metrics. Returns: dict: All metrics keyed by name. """ with self._lock: return self._metrics.copy() # Global metrics collector instance _metrics_collector: Optional[MetricsCollector] = None def get_metrics_collector() -> MetricsCollector: """Get or create the global metrics collector instance. Returns: MetricsCollector: The metrics collector instance. """ global _metrics_collector if _metrics_collector is None: _metrics_collector = MetricsCollector() return _metrics_collector class TimerContext: """Context manager for timing operations.""" def __init__( self, metric_name: str, timer_name: Optional[str] = None, labels: Optional[Dict[str, str]] = None, ): """Initialize timer context. Args: metric_name: Name of the metric to record. timer_name: Optional name for the timer. labels: Optional labels for the metric. """ self.metric_name = metric_name self.timer_name = timer_name or metric_name self.labels = labels self.collector = get_metrics_collector() def __enter__(self): """Start the timer.""" self.collector.start_timer(self.timer_name) return self def __exit__(self, exc_type, exc_val, exc_tb): """End the timer and record the metric.""" self.collector.end_timer( self.timer_name, self.metric_name, self.labels )