- Implemented ProviderHealthMonitor for real-time tracking - Monitors availability, response times, success rates - Automatic marking unavailable after failures - Background health check loop - Added ProviderFailover for automatic provider switching - Configurable retry attempts with exponential backoff - Integration with health monitoring - Smart provider selection - Created MonitoredProviderWrapper for performance tracking - Transparent monitoring for any provider - Automatic metric recording - No changes needed to existing providers - Implemented ProviderConfigManager for dynamic configuration - Runtime updates without restart - Per-provider settings (timeout, retries, bandwidth) - JSON-based persistence - Added Provider Management API (15+ endpoints) - Health monitoring endpoints - Configuration management - Failover control - Comprehensive testing (34 tests, 100% pass rate) - Health monitoring tests - Failover scenario tests - Configuration management tests - Documentation updates - Updated infrastructure.md - Updated instructions.md - Created PROVIDER_ENHANCEMENT_SUMMARY.md Total: ~2,593 lines of code, 34 passing tests
330 lines
11 KiB
Python
330 lines
11 KiB
Python
"""Unit tests for provider health monitoring system."""
|
|
import asyncio
|
|
from datetime import datetime
|
|
|
|
import pytest
|
|
|
|
from src.core.providers.health_monitor import (
|
|
ProviderHealthMetrics,
|
|
ProviderHealthMonitor,
|
|
RequestMetric,
|
|
get_health_monitor,
|
|
)
|
|
|
|
|
|
class TestProviderHealthMetrics:
|
|
"""Test ProviderHealthMetrics dataclass."""
|
|
|
|
def test_metrics_initialization(self):
|
|
"""Test metrics initialization with defaults."""
|
|
metrics = ProviderHealthMetrics(provider_name="test_provider")
|
|
|
|
assert metrics.provider_name == "test_provider"
|
|
assert metrics.is_available is True
|
|
assert metrics.total_requests == 0
|
|
assert metrics.successful_requests == 0
|
|
assert metrics.failed_requests == 0
|
|
assert metrics.average_response_time_ms == 0.0
|
|
assert metrics.consecutive_failures == 0
|
|
assert metrics.uptime_percentage == 100.0
|
|
|
|
def test_success_rate_calculation(self):
|
|
"""Test success rate calculation."""
|
|
metrics = ProviderHealthMetrics(provider_name="test")
|
|
metrics.total_requests = 100
|
|
metrics.successful_requests = 75
|
|
|
|
assert metrics.success_rate == 75.0
|
|
assert metrics.failure_rate == 25.0
|
|
|
|
def test_success_rate_zero_requests(self):
|
|
"""Test success rate with zero requests."""
|
|
metrics = ProviderHealthMetrics(provider_name="test")
|
|
|
|
assert metrics.success_rate == 0.0
|
|
assert metrics.failure_rate == 100.0
|
|
|
|
def test_to_dict(self):
|
|
"""Test metrics conversion to dictionary."""
|
|
metrics = ProviderHealthMetrics(
|
|
provider_name="test",
|
|
total_requests=10,
|
|
successful_requests=8,
|
|
)
|
|
|
|
result = metrics.to_dict()
|
|
|
|
assert result["provider_name"] == "test"
|
|
assert result["total_requests"] == 10
|
|
assert result["successful_requests"] == 8
|
|
assert result["success_rate"] == 80.0
|
|
assert "average_response_time_ms" in result
|
|
|
|
|
|
class TestProviderHealthMonitor:
|
|
"""Test ProviderHealthMonitor class."""
|
|
|
|
def test_monitor_initialization(self):
|
|
"""Test monitor initialization."""
|
|
monitor = ProviderHealthMonitor(
|
|
max_history_size=500,
|
|
health_check_interval=60,
|
|
failure_threshold=5,
|
|
)
|
|
|
|
assert monitor._max_history_size == 500
|
|
assert monitor._health_check_interval == 60
|
|
assert monitor._failure_threshold == 5
|
|
assert not monitor._is_running
|
|
|
|
def test_record_successful_request(self):
|
|
"""Test recording successful request."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
monitor.record_request(
|
|
provider_name="test_provider",
|
|
success=True,
|
|
response_time_ms=150.0,
|
|
bytes_transferred=1024,
|
|
)
|
|
|
|
metrics = monitor.get_provider_metrics("test_provider")
|
|
assert metrics is not None
|
|
assert metrics.total_requests == 1
|
|
assert metrics.successful_requests == 1
|
|
assert metrics.failed_requests == 0
|
|
assert metrics.is_available is True
|
|
assert metrics.consecutive_failures == 0
|
|
assert metrics.average_response_time_ms == 150.0
|
|
assert metrics.total_bytes_downloaded == 1024
|
|
|
|
def test_record_failed_request(self):
|
|
"""Test recording failed request."""
|
|
monitor = ProviderHealthMonitor(failure_threshold=2)
|
|
|
|
monitor.record_request(
|
|
provider_name="test_provider",
|
|
success=False,
|
|
response_time_ms=200.0,
|
|
error_message="Connection timeout",
|
|
)
|
|
|
|
metrics = monitor.get_provider_metrics("test_provider")
|
|
assert metrics is not None
|
|
assert metrics.total_requests == 1
|
|
assert metrics.failed_requests == 1
|
|
assert metrics.consecutive_failures == 1
|
|
assert metrics.last_error == "Connection timeout"
|
|
assert metrics.is_available is True # Below threshold
|
|
|
|
def test_mark_unavailable_after_failures(self):
|
|
"""Test marking provider unavailable after threshold."""
|
|
monitor = ProviderHealthMonitor(failure_threshold=3)
|
|
|
|
for i in range(3):
|
|
monitor.record_request(
|
|
provider_name="test_provider",
|
|
success=False,
|
|
response_time_ms=100.0,
|
|
error_message=f"Error {i}",
|
|
)
|
|
|
|
metrics = monitor.get_provider_metrics("test_provider")
|
|
assert metrics.is_available is False
|
|
assert metrics.consecutive_failures == 3
|
|
|
|
def test_recovery_after_success(self):
|
|
"""Test provider recovery after successful request."""
|
|
monitor = ProviderHealthMonitor(failure_threshold=2)
|
|
|
|
# Record failures
|
|
for _ in range(2):
|
|
monitor.record_request(
|
|
provider_name="test_provider",
|
|
success=False,
|
|
response_time_ms=100.0,
|
|
)
|
|
|
|
metrics = monitor.get_provider_metrics("test_provider")
|
|
assert metrics.is_available is False
|
|
|
|
# Record success
|
|
monitor.record_request(
|
|
provider_name="test_provider",
|
|
success=True,
|
|
response_time_ms=100.0,
|
|
)
|
|
|
|
metrics = monitor.get_provider_metrics("test_provider")
|
|
assert metrics.is_available is True
|
|
assert metrics.consecutive_failures == 0
|
|
|
|
def test_average_response_time_calculation(self):
|
|
"""Test average response time calculation."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
monitor.record_request(
|
|
"test", success=True, response_time_ms=100.0
|
|
)
|
|
monitor.record_request(
|
|
"test", success=True, response_time_ms=200.0
|
|
)
|
|
monitor.record_request(
|
|
"test", success=True, response_time_ms=300.0
|
|
)
|
|
|
|
metrics = monitor.get_provider_metrics("test")
|
|
assert metrics.average_response_time_ms == 200.0
|
|
|
|
def test_get_all_metrics(self):
|
|
"""Test getting metrics for all providers."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
monitor.record_request("provider1", success=True, response_time_ms=100.0) # noqa: E501
|
|
monitor.record_request("provider2", success=True, response_time_ms=150.0) # noqa: E501
|
|
|
|
all_metrics = monitor.get_all_metrics()
|
|
|
|
assert len(all_metrics) == 2
|
|
assert "provider1" in all_metrics
|
|
assert "provider2" in all_metrics
|
|
|
|
def test_get_available_providers(self):
|
|
"""Test getting available providers list."""
|
|
monitor = ProviderHealthMonitor(failure_threshold=2)
|
|
|
|
# Available provider
|
|
monitor.record_request("provider1", success=True, response_time_ms=100.0) # noqa: E501
|
|
|
|
# Unavailable provider
|
|
for _ in range(3):
|
|
monitor.record_request(
|
|
"provider2", success=False, response_time_ms=100.0
|
|
)
|
|
|
|
available = monitor.get_available_providers()
|
|
|
|
assert "provider1" in available
|
|
assert "provider2" not in available
|
|
|
|
def test_get_best_provider(self):
|
|
"""Test getting best provider based on performance."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
# Provider 1: 80% success, 100ms avg
|
|
for i in range(10):
|
|
monitor.record_request(
|
|
"provider1",
|
|
success=(i < 8),
|
|
response_time_ms=100.0,
|
|
)
|
|
|
|
# Provider 2: 90% success, 150ms avg
|
|
for i in range(10):
|
|
monitor.record_request(
|
|
"provider2",
|
|
success=(i < 9),
|
|
response_time_ms=150.0,
|
|
)
|
|
|
|
best = monitor.get_best_provider()
|
|
# Provider 2 should be best (higher success rate)
|
|
assert best == "provider2"
|
|
|
|
def test_reset_provider_metrics(self):
|
|
"""Test resetting provider metrics."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
monitor.record_request("test", success=True, response_time_ms=100.0)
|
|
|
|
success = monitor.reset_provider_metrics("test")
|
|
|
|
assert success is True
|
|
metrics = monitor.get_provider_metrics("test")
|
|
assert metrics.total_requests == 0
|
|
|
|
def test_reset_nonexistent_provider(self):
|
|
"""Test resetting metrics for nonexistent provider."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
success = monitor.reset_provider_metrics("nonexistent")
|
|
|
|
assert success is False
|
|
|
|
def test_health_summary(self):
|
|
"""Test health summary generation."""
|
|
monitor = ProviderHealthMonitor()
|
|
|
|
monitor.record_request("provider1", success=True, response_time_ms=100.0) # noqa: E501
|
|
monitor.record_request("provider2", success=True, response_time_ms=150.0) # noqa: E501
|
|
|
|
summary = monitor.get_health_summary()
|
|
|
|
assert summary["total_providers"] == 2
|
|
assert summary["available_providers"] == 2
|
|
assert summary["availability_percentage"] == 100.0
|
|
assert "average_success_rate" in summary
|
|
assert "average_response_time_ms" in summary
|
|
assert "providers" in summary
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_start_stop_monitoring(self):
|
|
"""Test starting and stopping health monitoring."""
|
|
monitor = ProviderHealthMonitor(health_check_interval=1)
|
|
|
|
monitor.start_monitoring()
|
|
assert monitor._is_running is True
|
|
assert monitor._health_check_task is not None
|
|
|
|
await asyncio.sleep(0.1) # Let it run briefly
|
|
|
|
await monitor.stop_monitoring()
|
|
assert monitor._is_running is False
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_periodic_health_checks(self):
|
|
"""Test periodic health check execution."""
|
|
monitor = ProviderHealthMonitor(health_check_interval=0.1)
|
|
|
|
# Add some data
|
|
monitor.record_request("test", success=True, response_time_ms=100.0)
|
|
|
|
monitor.start_monitoring()
|
|
await asyncio.sleep(0.3) # Wait for health checks
|
|
await monitor.stop_monitoring()
|
|
|
|
metrics = monitor.get_provider_metrics("test")
|
|
assert metrics.last_check_time is not None
|
|
|
|
|
|
class TestRequestMetric:
|
|
"""Test RequestMetric dataclass."""
|
|
|
|
def test_metric_initialization(self):
|
|
"""Test request metric initialization."""
|
|
now = datetime.now()
|
|
metric = RequestMetric(
|
|
timestamp=now,
|
|
success=True,
|
|
response_time_ms=150.0,
|
|
bytes_transferred=2048,
|
|
error_message=None,
|
|
)
|
|
|
|
assert metric.timestamp == now
|
|
assert metric.success is True
|
|
assert metric.response_time_ms == 150.0
|
|
assert metric.bytes_transferred == 2048
|
|
assert metric.error_message is None
|
|
|
|
|
|
class TestHealthMonitorSingleton:
|
|
"""Test global health monitor singleton."""
|
|
|
|
def test_get_health_monitor_singleton(self):
|
|
"""Test that get_health_monitor returns singleton."""
|
|
monitor1 = get_health_monitor()
|
|
monitor2 = get_health_monitor()
|
|
|
|
assert monitor1 is monitor2
|