Files
Aniworld/src/server/providers/health_monitor.py
Lukas 5526ab884a refactor: restructure core→server, split large entity files into database module
- Move src/core/ → src/server/
- Split SerieList.py (531 lines) and series.py (414 lines) into src/server/database/
- Add database/models.py for SQLAlchemy models
- Update all test imports to reflect new structure
- Remove deprecated test files (test_serie_class.py, test_serie_folder_with_year.py)
2026-06-04 21:11:53 +02:00

417 lines
14 KiB
Python

"""Provider health monitoring system for tracking availability and performance.
This module provides health monitoring capabilities for anime providers,
tracking metrics like availability, response times, success rates, and
bandwidth usage.
"""
import asyncio
import logging
from collections import defaultdict, deque
from dataclasses import dataclass
from datetime import datetime, timedelta
from typing import Any, Deque, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class ProviderHealthMetrics:
"""Health metrics for a single provider."""
provider_name: str
is_available: bool = True
last_check_time: Optional[datetime] = None
total_requests: int = 0
successful_requests: int = 0
failed_requests: int = 0
average_response_time_ms: float = 0.0
last_error: Optional[str] = None
last_error_time: Optional[datetime] = None
consecutive_failures: int = 0
total_bytes_downloaded: int = 0
uptime_percentage: float = 100.0
@property
def success_rate(self) -> float:
"""Calculate success rate as percentage."""
if self.total_requests == 0:
return 0.0
return (self.successful_requests / self.total_requests) * 100
@property
def failure_rate(self) -> float:
"""Calculate failure rate as percentage."""
return 100.0 - self.success_rate
def to_dict(self) -> Dict[str, Any]:
"""Convert metrics to dictionary."""
return {
"provider_name": self.provider_name,
"is_available": self.is_available,
"last_check_time": (
self.last_check_time.isoformat()
if self.last_check_time
else None
),
"total_requests": self.total_requests,
"successful_requests": self.successful_requests,
"failed_requests": self.failed_requests,
"success_rate": round(self.success_rate, 2),
"average_response_time_ms": round(
self.average_response_time_ms, 2
),
"last_error": self.last_error,
"last_error_time": (
self.last_error_time.isoformat()
if self.last_error_time
else None
),
"consecutive_failures": self.consecutive_failures,
"total_bytes_downloaded": self.total_bytes_downloaded,
"uptime_percentage": round(self.uptime_percentage, 2),
}
@dataclass
class RequestMetric:
"""Individual request metric."""
timestamp: datetime
success: bool
response_time_ms: float
bytes_transferred: int = 0
error_message: Optional[str] = None
class ProviderHealthMonitor:
"""Monitors health and performance of anime providers."""
def __init__(
self,
max_history_size: int = 1000,
health_check_interval: int = 300, # 5 minutes
failure_threshold: int = 3,
):
"""Initialize provider health monitor.
Args:
max_history_size: Maximum number of request metrics to keep
per provider.
health_check_interval: Interval between health checks in
seconds.
failure_threshold: Number of consecutive failures before
marking unavailable.
"""
self._max_history_size = max_history_size
self._health_check_interval = health_check_interval
self._failure_threshold = failure_threshold
# Provider metrics storage
self._metrics: Dict[str, ProviderHealthMetrics] = {}
self._request_history: Dict[str, Deque[RequestMetric]] = defaultdict(
lambda: deque(maxlen=max_history_size)
)
# Health check task
self._health_check_task: Optional[asyncio.Task] = None
self._is_running = False
logger.info("Provider health monitor initialized")
def start_monitoring(self) -> None:
"""Start background health monitoring."""
if self._is_running:
logger.warning("Health monitoring already running")
return
self._is_running = True
self._health_check_task = asyncio.create_task(
self._health_check_loop()
)
logger.info("Provider health monitoring started")
async def stop_monitoring(self) -> None:
"""Stop background health monitoring."""
self._is_running = False
if self._health_check_task:
self._health_check_task.cancel()
try:
await self._health_check_task
except asyncio.CancelledError:
pass
self._health_check_task = None
logger.info("Provider health monitoring stopped")
async def _health_check_loop(self) -> None:
"""Background health check loop."""
while self._is_running:
try:
await self._perform_health_checks()
await asyncio.sleep(self._health_check_interval)
except asyncio.CancelledError:
break
except Exception as e:
logger.exception("Error in health check loop: %s", e)
await asyncio.sleep(self._health_check_interval)
async def _perform_health_checks(self) -> None:
"""Perform health checks on all registered providers."""
for provider_name in list(self._metrics.keys()):
try:
metrics = self._metrics[provider_name]
metrics.last_check_time = datetime.now()
# Update uptime percentage based on recent history
recent_metrics = self._get_recent_metrics(
provider_name, minutes=60
)
if recent_metrics:
successful = sum(1 for m in recent_metrics if m.success)
metrics.uptime_percentage = (
successful / len(recent_metrics)
) * 100
logger.debug(
f"Health check for {provider_name}: "
f"available={metrics.is_available}, "
f"success_rate={metrics.success_rate:.2f}%"
)
except Exception as e:
logger.error(
f"Error checking health for {provider_name}: {e}",
exc_info=True,
)
def record_request(
self,
provider_name: str,
success: bool,
response_time_ms: float,
bytes_transferred: int = 0,
error_message: Optional[str] = None,
) -> None:
"""Record a provider request for health tracking.
Args:
provider_name: Name of the provider.
success: Whether the request was successful.
response_time_ms: Response time in milliseconds.
bytes_transferred: Number of bytes transferred.
error_message: Error message if request failed.
"""
# Initialize metrics if not exists
if provider_name not in self._metrics:
self._metrics[provider_name] = ProviderHealthMetrics(
provider_name=provider_name
)
metrics = self._metrics[provider_name]
# Update request counts
metrics.total_requests += 1
if success:
metrics.successful_requests += 1
metrics.consecutive_failures = 0
else:
metrics.failed_requests += 1
metrics.consecutive_failures += 1
metrics.last_error = error_message
metrics.last_error_time = datetime.now()
# Update availability based on consecutive failures
if metrics.consecutive_failures >= self._failure_threshold:
if metrics.is_available:
logger.warning(
f"Provider {provider_name} marked as unavailable after "
f"{metrics.consecutive_failures} consecutive failures"
)
metrics.is_available = False
else:
metrics.is_available = True
# Update average response time
total_time = metrics.average_response_time_ms * (
metrics.total_requests - 1
)
metrics.average_response_time_ms = (
total_time + response_time_ms
) / metrics.total_requests
# Update bytes transferred
metrics.total_bytes_downloaded += bytes_transferred
# Store request metric in history
request_metric = RequestMetric(
timestamp=datetime.now(),
success=success,
response_time_ms=response_time_ms,
bytes_transferred=bytes_transferred,
error_message=error_message,
)
self._request_history[provider_name].append(request_metric)
logger.debug(
f"Recorded request for {provider_name}: "
f"success={success}, time={response_time_ms:.2f}ms"
)
def get_provider_metrics(
self, provider_name: str
) -> Optional[ProviderHealthMetrics]:
"""Get health metrics for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
Provider health metrics or None if not found.
"""
return self._metrics.get(provider_name)
def get_all_metrics(self) -> Dict[str, ProviderHealthMetrics]:
"""Get health metrics for all providers.
Returns:
Dictionary mapping provider names to their metrics.
"""
return self._metrics.copy()
def get_available_providers(self) -> List[str]:
"""Get list of currently available providers.
Returns:
List of available provider names.
"""
return [
name
for name, metrics in self._metrics.items()
if metrics.is_available
]
def get_best_provider(self) -> Optional[str]:
"""Get the best performing available provider.
Best is determined by:
1. Availability
2. Success rate
3. Response time
Returns:
Name of best provider or None if none available.
"""
available = [
(name, metrics)
for name, metrics in self._metrics.items()
if metrics.is_available
]
if not available:
return None
# Sort by success rate (descending) then response time (ascending)
available.sort(
key=lambda x: (-x[1].success_rate, x[1].average_response_time_ms)
)
best_provider = available[0][0]
logger.debug("Best provider selected: %s", best_provider)
return best_provider
def _get_recent_metrics(
self, provider_name: str, minutes: int = 60
) -> List[RequestMetric]:
"""Get recent request metrics for a provider.
Args:
provider_name: Name of the provider.
minutes: Number of minutes to look back.
Returns:
List of recent request metrics.
"""
if provider_name not in self._request_history:
return []
cutoff_time = datetime.now() - timedelta(minutes=minutes)
return [
metric
for metric in self._request_history[provider_name]
if metric.timestamp >= cutoff_time
]
def reset_provider_metrics(self, provider_name: str) -> bool:
"""Reset metrics for a specific provider.
Args:
provider_name: Name of the provider.
Returns:
True if reset successful, False if provider not found.
"""
if provider_name not in self._metrics:
return False
self._metrics[provider_name] = ProviderHealthMetrics(
provider_name=provider_name
)
self._request_history[provider_name].clear()
logger.info("Reset metrics for provider: %s", provider_name)
return True
def get_health_summary(self) -> Dict[str, Any]:
"""Get summary of overall provider health.
Returns:
Dictionary with health summary statistics.
"""
total_providers = len(self._metrics)
available_providers = len(self.get_available_providers())
if total_providers == 0:
return {
"total_providers": 0,
"available_providers": 0,
"availability_percentage": 0.0,
"average_success_rate": 0.0,
"average_response_time_ms": 0.0,
}
avg_success_rate = sum(
m.success_rate for m in self._metrics.values()
) / total_providers
avg_response_time = sum(
m.average_response_time_ms for m in self._metrics.values()
) / total_providers
return {
"total_providers": total_providers,
"available_providers": available_providers,
"availability_percentage": (
available_providers / total_providers
)
* 100,
"average_success_rate": round(avg_success_rate, 2),
"average_response_time_ms": round(avg_response_time, 2),
"providers": {
name: metrics.to_dict()
for name, metrics in self._metrics.items()
},
}
# Global health monitor instance
_health_monitor: Optional[ProviderHealthMonitor] = None
def get_health_monitor() -> ProviderHealthMonitor:
"""Get or create global provider health monitor instance.
Returns:
Global ProviderHealthMonitor instance.
"""
global _health_monitor
if _health_monitor is None:
_health_monitor = ProviderHealthMonitor()
return _health_monitor