Aniworld/tests/unit/test_provider_health.py
Lukas fecdb38a90 feat: Add comprehensive provider health monitoring and failover system
- Implemented ProviderHealthMonitor for real-time tracking
  - Monitors availability, response times, success rates
  - Automatic marking unavailable after failures
  - Background health check loop

- Added ProviderFailover for automatic provider switching
  - Configurable retry attempts with exponential backoff
  - Integration with health monitoring
  - Smart provider selection

- Created MonitoredProviderWrapper for performance tracking
  - Transparent monitoring for any provider
  - Automatic metric recording
  - No changes needed to existing providers

- Implemented ProviderConfigManager for dynamic configuration
  - Runtime updates without restart
  - Per-provider settings (timeout, retries, bandwidth)
  - JSON-based persistence

- Added Provider Management API (15+ endpoints)
  - Health monitoring endpoints
  - Configuration management
  - Failover control

- Comprehensive testing (34 tests, 100% pass rate)
  - Health monitoring tests
  - Failover scenario tests
  - Configuration management tests

- Documentation updates
  - Updated infrastructure.md
  - Updated instructions.md
  - Created PROVIDER_ENHANCEMENT_SUMMARY.md

Total: ~2,593 lines of code, 34 passing tests
2025-10-24 11:01:40 +02:00

330 lines
11 KiB
Python

"""Unit tests for provider health monitoring system."""
import asyncio
from datetime import datetime
import pytest
from src.core.providers.health_monitor import (
ProviderHealthMetrics,
ProviderHealthMonitor,
RequestMetric,
get_health_monitor,
)
class TestProviderHealthMetrics:
"""Test ProviderHealthMetrics dataclass."""
def test_metrics_initialization(self):
"""Test metrics initialization with defaults."""
metrics = ProviderHealthMetrics(provider_name="test_provider")
assert metrics.provider_name == "test_provider"
assert metrics.is_available is True
assert metrics.total_requests == 0
assert metrics.successful_requests == 0
assert metrics.failed_requests == 0
assert metrics.average_response_time_ms == 0.0
assert metrics.consecutive_failures == 0
assert metrics.uptime_percentage == 100.0
def test_success_rate_calculation(self):
"""Test success rate calculation."""
metrics = ProviderHealthMetrics(provider_name="test")
metrics.total_requests = 100
metrics.successful_requests = 75
assert metrics.success_rate == 75.0
assert metrics.failure_rate == 25.0
def test_success_rate_zero_requests(self):
"""Test success rate with zero requests."""
metrics = ProviderHealthMetrics(provider_name="test")
assert metrics.success_rate == 0.0
assert metrics.failure_rate == 100.0
def test_to_dict(self):
"""Test metrics conversion to dictionary."""
metrics = ProviderHealthMetrics(
provider_name="test",
total_requests=10,
successful_requests=8,
)
result = metrics.to_dict()
assert result["provider_name"] == "test"
assert result["total_requests"] == 10
assert result["successful_requests"] == 8
assert result["success_rate"] == 80.0
assert "average_response_time_ms" in result
class TestProviderHealthMonitor:
"""Test ProviderHealthMonitor class."""
def test_monitor_initialization(self):
"""Test monitor initialization."""
monitor = ProviderHealthMonitor(
max_history_size=500,
health_check_interval=60,
failure_threshold=5,
)
assert monitor._max_history_size == 500
assert monitor._health_check_interval == 60
assert monitor._failure_threshold == 5
assert not monitor._is_running
def test_record_successful_request(self):
"""Test recording successful request."""
monitor = ProviderHealthMonitor()
monitor.record_request(
provider_name="test_provider",
success=True,
response_time_ms=150.0,
bytes_transferred=1024,
)
metrics = monitor.get_provider_metrics("test_provider")
assert metrics is not None
assert metrics.total_requests == 1
assert metrics.successful_requests == 1
assert metrics.failed_requests == 0
assert metrics.is_available is True
assert metrics.consecutive_failures == 0
assert metrics.average_response_time_ms == 150.0
assert metrics.total_bytes_downloaded == 1024
def test_record_failed_request(self):
"""Test recording failed request."""
monitor = ProviderHealthMonitor(failure_threshold=2)
monitor.record_request(
provider_name="test_provider",
success=False,
response_time_ms=200.0,
error_message="Connection timeout",
)
metrics = monitor.get_provider_metrics("test_provider")
assert metrics is not None
assert metrics.total_requests == 1
assert metrics.failed_requests == 1
assert metrics.consecutive_failures == 1
assert metrics.last_error == "Connection timeout"
assert metrics.is_available is True # Below threshold
def test_mark_unavailable_after_failures(self):
"""Test marking provider unavailable after threshold."""
monitor = ProviderHealthMonitor(failure_threshold=3)
for i in range(3):
monitor.record_request(
provider_name="test_provider",
success=False,
response_time_ms=100.0,
error_message=f"Error {i}",
)
metrics = monitor.get_provider_metrics("test_provider")
assert metrics.is_available is False
assert metrics.consecutive_failures == 3
def test_recovery_after_success(self):
"""Test provider recovery after successful request."""
monitor = ProviderHealthMonitor(failure_threshold=2)
# Record failures
for _ in range(2):
monitor.record_request(
provider_name="test_provider",
success=False,
response_time_ms=100.0,
)
metrics = monitor.get_provider_metrics("test_provider")
assert metrics.is_available is False
# Record success
monitor.record_request(
provider_name="test_provider",
success=True,
response_time_ms=100.0,
)
metrics = monitor.get_provider_metrics("test_provider")
assert metrics.is_available is True
assert metrics.consecutive_failures == 0
def test_average_response_time_calculation(self):
"""Test average response time calculation."""
monitor = ProviderHealthMonitor()
monitor.record_request(
"test", success=True, response_time_ms=100.0
)
monitor.record_request(
"test", success=True, response_time_ms=200.0
)
monitor.record_request(
"test", success=True, response_time_ms=300.0
)
metrics = monitor.get_provider_metrics("test")
assert metrics.average_response_time_ms == 200.0
def test_get_all_metrics(self):
"""Test getting metrics for all providers."""
monitor = ProviderHealthMonitor()
monitor.record_request("provider1", success=True, response_time_ms=100.0) # noqa: E501
monitor.record_request("provider2", success=True, response_time_ms=150.0) # noqa: E501
all_metrics = monitor.get_all_metrics()
assert len(all_metrics) == 2
assert "provider1" in all_metrics
assert "provider2" in all_metrics
def test_get_available_providers(self):
"""Test getting available providers list."""
monitor = ProviderHealthMonitor(failure_threshold=2)
# Available provider
monitor.record_request("provider1", success=True, response_time_ms=100.0) # noqa: E501
# Unavailable provider
for _ in range(3):
monitor.record_request(
"provider2", success=False, response_time_ms=100.0
)
available = monitor.get_available_providers()
assert "provider1" in available
assert "provider2" not in available
def test_get_best_provider(self):
"""Test getting best provider based on performance."""
monitor = ProviderHealthMonitor()
# Provider 1: 80% success, 100ms avg
for i in range(10):
monitor.record_request(
"provider1",
success=(i < 8),
response_time_ms=100.0,
)
# Provider 2: 90% success, 150ms avg
for i in range(10):
monitor.record_request(
"provider2",
success=(i < 9),
response_time_ms=150.0,
)
best = monitor.get_best_provider()
# Provider 2 should be best (higher success rate)
assert best == "provider2"
def test_reset_provider_metrics(self):
"""Test resetting provider metrics."""
monitor = ProviderHealthMonitor()
monitor.record_request("test", success=True, response_time_ms=100.0)
success = monitor.reset_provider_metrics("test")
assert success is True
metrics = monitor.get_provider_metrics("test")
assert metrics.total_requests == 0
def test_reset_nonexistent_provider(self):
"""Test resetting metrics for nonexistent provider."""
monitor = ProviderHealthMonitor()
success = monitor.reset_provider_metrics("nonexistent")
assert success is False
def test_health_summary(self):
"""Test health summary generation."""
monitor = ProviderHealthMonitor()
monitor.record_request("provider1", success=True, response_time_ms=100.0) # noqa: E501
monitor.record_request("provider2", success=True, response_time_ms=150.0) # noqa: E501
summary = monitor.get_health_summary()
assert summary["total_providers"] == 2
assert summary["available_providers"] == 2
assert summary["availability_percentage"] == 100.0
assert "average_success_rate" in summary
assert "average_response_time_ms" in summary
assert "providers" in summary
@pytest.mark.asyncio
async def test_start_stop_monitoring(self):
"""Test starting and stopping health monitoring."""
monitor = ProviderHealthMonitor(health_check_interval=1)
monitor.start_monitoring()
assert monitor._is_running is True
assert monitor._health_check_task is not None
await asyncio.sleep(0.1) # Let it run briefly
await monitor.stop_monitoring()
assert monitor._is_running is False
@pytest.mark.asyncio
async def test_periodic_health_checks(self):
"""Test periodic health check execution."""
monitor = ProviderHealthMonitor(health_check_interval=0.1)
# Add some data
monitor.record_request("test", success=True, response_time_ms=100.0)
monitor.start_monitoring()
await asyncio.sleep(0.3) # Wait for health checks
await monitor.stop_monitoring()
metrics = monitor.get_provider_metrics("test")
assert metrics.last_check_time is not None
class TestRequestMetric:
"""Test RequestMetric dataclass."""
def test_metric_initialization(self):
"""Test request metric initialization."""
now = datetime.now()
metric = RequestMetric(
timestamp=now,
success=True,
response_time_ms=150.0,
bytes_transferred=2048,
error_message=None,
)
assert metric.timestamp == now
assert metric.success is True
assert metric.response_time_ms == 150.0
assert metric.bytes_transferred == 2048
assert metric.error_message is None
class TestHealthMonitorSingleton:
"""Test global health monitor singleton."""
def test_get_health_monitor_singleton(self):
"""Test that get_health_monitor returns singleton."""
monitor1 = get_health_monitor()
monitor2 = get_health_monitor()
assert monitor1 is monitor2