Files
Aniworld/src/server/utils/metrics.py

359 lines
10 KiB
Python

"""Metrics collection for Prometheus and custom business metrics."""
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from threading import Lock
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class MetricType(Enum):
"""Types of metrics."""
COUNTER = "counter"
GAUGE = "gauge"
HISTOGRAM = "histogram"
SUMMARY = "summary"
@dataclass
class MetricValue:
"""A single metric value with metadata."""
name: str
value: float
metric_type: MetricType
labels: Dict[str, str] = field(default_factory=dict)
timestamp: datetime = field(default_factory=datetime.now)
help_text: str = ""
@dataclass
class HistogramBucket:
"""Histogram bucket for latency tracking."""
le: float # bucket upper bound in seconds
count: int = 0
class MetricsCollector:
"""Collect and export metrics for monitoring."""
def __init__(self):
"""Initialize metrics collector."""
self._metrics: Dict[str, MetricValue] = {}
self._request_timings: Dict[str, list[float]] = {}
self._download_stats: Dict[str, int] = {
"completed": 0,
"failed": 0,
"total_size_bytes": 0,
}
self._lock = Lock()
self._timers: Dict[str, float] = {}
def increment_counter(
self,
name: str,
value: float = 1.0,
labels: Optional[Dict[str, str]] = None,
help_text: str = "",
) -> None:
"""Increment a counter metric.
Args:
name: Metric name.
value: Amount to increment by.
labels: Optional labels for the metric.
help_text: Optional help text describing the metric.
"""
with self._lock:
if name not in self._metrics:
self._metrics[name] = MetricValue(
name=name,
value=value,
metric_type=MetricType.COUNTER,
labels=labels or {},
help_text=help_text,
)
else:
self._metrics[name].value += value
def set_gauge(
self,
name: str,
value: float,
labels: Optional[Dict[str, str]] = None,
help_text: str = "",
) -> None:
"""Set a gauge metric.
Args:
name: Metric name.
value: Gauge value.
labels: Optional labels for the metric.
help_text: Optional help text describing the metric.
"""
with self._lock:
self._metrics[name] = MetricValue(
name=name,
value=value,
metric_type=MetricType.GAUGE,
labels=labels or {},
help_text=help_text,
)
def observe_histogram(
self,
name: str,
value: float,
labels: Optional[Dict[str, str]] = None,
help_text: str = "",
) -> None:
"""Observe a value for histogram.
Args:
name: Metric name.
value: Value to record.
labels: Optional labels for the metric.
help_text: Optional help text describing the metric.
"""
with self._lock:
if name not in self._request_timings:
self._request_timings[name] = []
self._request_timings[name].append(value)
# Update histogram metric
if name not in self._metrics:
self._metrics[name] = MetricValue(
name=name,
value=value,
metric_type=MetricType.HISTOGRAM,
labels=labels or {},
help_text=help_text,
)
def start_timer(self, timer_name: str) -> None:
"""Start a timer for tracking operation duration.
Args:
timer_name: Name of the timer.
"""
self._timers[timer_name] = time.time()
def end_timer(
self,
timer_name: str,
metric_name: str,
labels: Optional[Dict[str, str]] = None,
) -> float:
"""End a timer and record the duration.
Args:
timer_name: Name of the timer to end.
metric_name: Name of the metric to record.
labels: Optional labels for the metric.
Returns:
Duration in seconds.
"""
if timer_name not in self._timers:
logger.warning("Timer %s not started", timer_name)
return 0.0
duration = time.time() - self._timers[timer_name]
del self._timers[timer_name]
self.observe_histogram(
metric_name, duration, labels, "Request/operation duration"
)
return duration
def record_download_success(self, size_bytes: int) -> None:
"""Record a successful download.
Args:
size_bytes: Size of downloaded file in bytes.
"""
with self._lock:
self._download_stats["completed"] += 1
self._download_stats["total_size_bytes"] += size_bytes
self.increment_counter(
"downloads_completed_total",
help_text="Total successful downloads",
)
def record_download_failure(self) -> None:
"""Record a failed download."""
with self._lock:
self._download_stats["failed"] += 1
self.increment_counter(
"downloads_failed_total", help_text="Total failed downloads"
)
def get_download_stats(self) -> Dict[str, int]:
"""Get download statistics.
Returns:
dict: Download statistics.
"""
with self._lock:
return self._download_stats.copy()
def get_request_statistics(
self, metric_name: str
) -> Optional[Dict[str, float]]:
"""Get statistics for a request timing metric.
Args:
metric_name: Name of the metric to analyze.
Returns:
Statistics including count, sum, mean, min, max.
"""
with self._lock:
if metric_name not in self._request_timings:
return None
timings = self._request_timings[metric_name]
if not timings:
return None
return {
"count": len(timings),
"sum": sum(timings),
"mean": sum(timings) / len(timings),
"min": min(timings),
"max": max(timings),
"p50": sorted(timings)[len(timings) // 2],
"p99": sorted(timings)[int(len(timings) * 0.99)],
}
def export_prometheus_format(self) -> str:
"""Export metrics in Prometheus text format.
Returns:
str: Prometheus format metrics.
"""
with self._lock:
lines = []
for name, metric in self._metrics.items():
# Add help text if available
if metric.help_text:
lines.append(f"# HELP {name} {metric.help_text}")
lines.append(f"# TYPE {name} {metric.metric_type.value}")
# Format labels
label_str = ""
if metric.labels:
label_pairs = [
f'{k}="{v}"' for k, v in metric.labels.items()
]
label_str = "{" + ",".join(label_pairs) + "}"
# Add metric value
lines.append(f"{name}{label_str} {metric.value}")
return "\n".join(lines)
def export_json(self) -> Dict[str, Any]:
"""Export metrics as JSON.
Returns:
dict: Metrics in JSON-serializable format.
"""
with self._lock:
metrics_dict = {}
for name, metric in self._metrics.items():
metrics_dict[name] = {
"value": metric.value,
"type": metric.metric_type.value,
"labels": metric.labels,
"timestamp": metric.timestamp.isoformat(),
}
return {
"metrics": metrics_dict,
"downloads": self._download_stats,
"request_timings": {
name: self.get_request_statistics(name)
for name in self._request_timings
},
}
def reset_metrics(self) -> None:
"""Reset all collected metrics."""
with self._lock:
self._metrics.clear()
self._request_timings.clear()
self._download_stats = {
"completed": 0,
"failed": 0,
"total_size_bytes": 0,
}
def get_all_metrics(self) -> Dict[str, MetricValue]:
"""Get all collected metrics.
Returns:
dict: All metrics keyed by name.
"""
with self._lock:
return self._metrics.copy()
# Global metrics collector instance
_metrics_collector: Optional[MetricsCollector] = None
def get_metrics_collector() -> MetricsCollector:
"""Get or create the global metrics collector instance.
Returns:
MetricsCollector: The metrics collector instance.
"""
global _metrics_collector
if _metrics_collector is None:
_metrics_collector = MetricsCollector()
return _metrics_collector
class TimerContext:
"""Context manager for timing operations."""
def __init__(
self,
metric_name: str,
timer_name: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
):
"""Initialize timer context.
Args:
metric_name: Name of the metric to record.
timer_name: Optional name for the timer.
labels: Optional labels for the metric.
"""
self.metric_name = metric_name
self.timer_name = timer_name or metric_name
self.labels = labels
self.collector = get_metrics_collector()
def __enter__(self):
"""Start the timer."""
self.collector.start_timer(self.timer_name)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""End the timer and record the metric."""
self.collector.end_timer(
self.timer_name, self.metric_name, self.labels
)