Aniworld/src/server/api/providers.py
Lukas fecdb38a90 feat: Add comprehensive provider health monitoring and failover system
- Implemented ProviderHealthMonitor for real-time tracking
  - Monitors availability, response times, success rates
  - Automatic marking unavailable after failures
  - Background health check loop

- Added ProviderFailover for automatic provider switching
  - Configurable retry attempts with exponential backoff
  - Integration with health monitoring
  - Smart provider selection

- Created MonitoredProviderWrapper for performance tracking
  - Transparent monitoring for any provider
  - Automatic metric recording
  - No changes needed to existing providers

- Implemented ProviderConfigManager for dynamic configuration
  - Runtime updates without restart
  - Per-provider settings (timeout, retries, bandwidth)
  - JSON-based persistence

- Added Provider Management API (15+ endpoints)
  - Health monitoring endpoints
  - Configuration management
  - Failover control

- Comprehensive testing (34 tests, 100% pass rate)
  - Health monitoring tests
  - Failover scenario tests
  - Configuration management tests

- Documentation updates
  - Updated infrastructure.md
  - Updated instructions.md
  - Created PROVIDER_ENHANCEMENT_SUMMARY.md

Total: ~2,593 lines of code, 34 passing tests
2025-10-24 11:01:40 +02:00

532 lines
15 KiB
Python

"""Provider management API endpoints.
This module provides REST API endpoints for monitoring and managing
anime providers, including health checks, configuration, and failover.
"""
import logging
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel, Field
from src.core.providers.config_manager import ProviderSettings, get_config_manager
from src.core.providers.failover import get_failover
from src.core.providers.health_monitor import get_health_monitor
from src.server.utils.dependencies import require_auth
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/providers", tags=["providers"])
# Request/Response Models
class ProviderHealthResponse(BaseModel):
"""Response model for provider health status."""
provider_name: str
is_available: bool
last_check_time: Optional[str] = None
total_requests: int
successful_requests: int
failed_requests: int
success_rate: float
average_response_time_ms: float
last_error: Optional[str] = None
last_error_time: Optional[str] = None
consecutive_failures: int
total_bytes_downloaded: int
uptime_percentage: float
class HealthSummaryResponse(BaseModel):
"""Response model for overall health summary."""
total_providers: int
available_providers: int
availability_percentage: float
average_success_rate: float
average_response_time_ms: float
providers: Dict[str, Dict[str, Any]]
class ProviderSettingsRequest(BaseModel):
"""Request model for updating provider settings."""
enabled: Optional[bool] = None
priority: Optional[int] = None
timeout_seconds: Optional[int] = Field(None, gt=0)
max_retries: Optional[int] = Field(None, ge=0)
retry_delay_seconds: Optional[float] = Field(None, gt=0)
max_concurrent_downloads: Optional[int] = Field(None, gt=0)
bandwidth_limit_mbps: Optional[float] = Field(None, gt=0)
class ProviderSettingsResponse(BaseModel):
"""Response model for provider settings."""
name: str
enabled: bool
priority: int
timeout_seconds: int
max_retries: int
retry_delay_seconds: float
max_concurrent_downloads: int
bandwidth_limit_mbps: Optional[float] = None
class FailoverStatsResponse(BaseModel):
"""Response model for failover statistics."""
total_providers: int
providers: List[str]
current_provider: str
max_retries: int
retry_delay: float
health_monitoring_enabled: bool
available_providers: Optional[List[str]] = None
unavailable_providers: Optional[List[str]] = None
# Health Monitoring Endpoints
@router.get("/health", response_model=HealthSummaryResponse)
async def get_providers_health(
auth: Optional[dict] = Depends(require_auth),
) -> HealthSummaryResponse:
"""Get overall provider health summary.
Args:
auth: Authentication token (optional).
Returns:
Health summary for all providers.
"""
try:
health_monitor = get_health_monitor()
summary = health_monitor.get_health_summary()
return HealthSummaryResponse(**summary)
except Exception as e:
logger.error(f"Failed to get provider health: {e}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve provider health: {str(e)}",
)
@router.get("/health/{provider_name}", response_model=ProviderHealthResponse) # noqa: E501
async def get_provider_health(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> ProviderHealthResponse:
"""Get health status for a specific provider.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Health metrics for the provider.
Raises:
HTTPException: If provider not found or error occurs.
"""
try:
health_monitor = get_health_monitor()
metrics = health_monitor.get_provider_metrics(provider_name)
if not metrics:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Provider '{provider_name}' not found",
)
return ProviderHealthResponse(**metrics.to_dict())
except HTTPException:
raise
except Exception as e:
logger.error(
f"Failed to get health for {provider_name}: {e}",
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve provider health: {str(e)}",
)
@router.get("/available", response_model=List[str])
async def get_available_providers(
auth: Optional[dict] = Depends(require_auth),
) -> List[str]:
"""Get list of currently available providers.
Args:
auth: Authentication token (optional).
Returns:
List of available provider names.
"""
try:
health_monitor = get_health_monitor()
return health_monitor.get_available_providers()
except Exception as e:
logger.error(f"Failed to get available providers: {e}", exc_info=True) # noqa: E501
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve available providers: {str(e)}",
)
@router.get("/best", response_model=Dict[str, str])
async def get_best_provider(
auth: Optional[dict] = Depends(require_auth),
) -> Dict[str, str]:
"""Get the best performing provider.
Args:
auth: Authentication token (optional).
Returns:
Dictionary with best provider name.
"""
try:
health_monitor = get_health_monitor()
best = health_monitor.get_best_provider()
if not best:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="No available providers",
)
return {"provider": best}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to get best provider: {e}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to determine best provider: {str(e)}",
)
@router.post("/health/{provider_name}/reset")
async def reset_provider_health(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> Dict[str, str]:
"""Reset health metrics for a specific provider.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Success message.
Raises:
HTTPException: If provider not found or error occurs.
"""
try:
health_monitor = get_health_monitor()
success = health_monitor.reset_provider_metrics(provider_name)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Provider '{provider_name}' not found",
)
return {"message": f"Reset metrics for provider: {provider_name}"}
except HTTPException:
raise
except Exception as e:
logger.error(
f"Failed to reset health for {provider_name}: {e}",
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to reset provider health: {str(e)}",
)
# Configuration Endpoints
@router.get("/config", response_model=List[ProviderSettingsResponse])
async def get_all_provider_configs(
auth: Optional[dict] = Depends(require_auth),
) -> List[ProviderSettingsResponse]:
"""Get configuration for all providers.
Args:
auth: Authentication token (optional).
Returns:
List of provider configurations.
"""
try:
config_manager = get_config_manager()
all_settings = config_manager.get_all_provider_settings()
return [
ProviderSettingsResponse(**settings.to_dict())
for settings in all_settings.values()
]
except Exception as e:
logger.error(f"Failed to get provider configs: {e}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve provider configurations: {str(e)}", # noqa: E501
)
@router.get(
"/config/{provider_name}", response_model=ProviderSettingsResponse
)
async def get_provider_config(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> ProviderSettingsResponse:
"""Get configuration for a specific provider.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Provider configuration.
Raises:
HTTPException: If provider not found or error occurs.
"""
try:
config_manager = get_config_manager()
settings = config_manager.get_provider_settings(provider_name)
if not settings:
# Return default settings
settings = ProviderSettings(name=provider_name)
return ProviderSettingsResponse(**settings.to_dict())
except Exception as e:
logger.error(
f"Failed to get config for {provider_name}: {e}",
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve provider configuration: {str(e)}", # noqa: E501
)
@router.put(
"/config/{provider_name}", response_model=ProviderSettingsResponse
)
async def update_provider_config(
provider_name: str,
settings: ProviderSettingsRequest,
auth: Optional[dict] = Depends(require_auth),
) -> ProviderSettingsResponse:
"""Update configuration for a specific provider.
Args:
provider_name: Name of the provider.
settings: Settings to update.
auth: Authentication token (optional).
Returns:
Updated provider configuration.
"""
try:
config_manager = get_config_manager()
# Update settings
update_dict = settings.dict(exclude_unset=True)
config_manager.update_provider_settings(
provider_name, **update_dict
)
# Get updated settings
updated = config_manager.get_provider_settings(provider_name)
if not updated:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to retrieve updated configuration",
)
return ProviderSettingsResponse(**updated.to_dict())
except HTTPException:
raise
except Exception as e:
logger.error(
f"Failed to update config for {provider_name}: {e}",
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to update provider configuration: {str(e)}",
)
@router.post("/config/{provider_name}/enable")
async def enable_provider(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> Dict[str, str]:
"""Enable a provider.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Success message.
"""
try:
config_manager = get_config_manager()
config_manager.update_provider_settings(
provider_name, enabled=True
)
return {"message": f"Enabled provider: {provider_name}"}
except Exception as e:
logger.error(
f"Failed to enable {provider_name}: {e}", exc_info=True
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to enable provider: {str(e)}",
)
@router.post("/config/{provider_name}/disable")
async def disable_provider(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> Dict[str, str]:
"""Disable a provider.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Success message.
"""
try:
config_manager = get_config_manager()
config_manager.update_provider_settings(
provider_name, enabled=False
)
return {"message": f"Disabled provider: {provider_name}"}
except Exception as e:
logger.error(
f"Failed to disable {provider_name}: {e}", exc_info=True
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to disable provider: {str(e)}",
)
# Failover Endpoints
@router.get("/failover", response_model=FailoverStatsResponse)
async def get_failover_stats(
auth: Optional[dict] = Depends(require_auth),
) -> FailoverStatsResponse:
"""Get failover statistics and configuration.
Args:
auth: Authentication token (optional).
Returns:
Failover statistics.
"""
try:
failover = get_failover()
stats = failover.get_failover_stats()
return FailoverStatsResponse(**stats)
except Exception as e:
logger.error(f"Failed to get failover stats: {e}", exc_info=True)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to retrieve failover statistics: {str(e)}",
)
@router.post("/failover/{provider_name}/add")
async def add_provider_to_failover(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> Dict[str, str]:
"""Add a provider to the failover chain.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Success message.
"""
try:
failover = get_failover()
failover.add_provider(provider_name)
return {"message": f"Added provider to failover: {provider_name}"}
except Exception as e:
logger.error(
f"Failed to add {provider_name} to failover: {e}",
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to add provider to failover: {str(e)}",
)
@router.delete("/failover/{provider_name}")
async def remove_provider_from_failover(
provider_name: str,
auth: Optional[dict] = Depends(require_auth),
) -> Dict[str, str]:
"""Remove a provider from the failover chain.
Args:
provider_name: Name of the provider.
auth: Authentication token (optional).
Returns:
Success message.
Raises:
HTTPException: If provider not found in failover chain.
"""
try:
failover = get_failover()
success = failover.remove_provider(provider_name)
if not success:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail=f"Provider '{provider_name}' not in failover chain", # noqa: E501
)
return {
"message": f"Removed provider from failover: {provider_name}"
}
except HTTPException:
raise
except Exception as e:
logger.error(
f"Failed to remove {provider_name} from failover: {e}",
exc_info=True,
)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to remove provider from failover: {str(e)}",
)