- Add documentation warnings for in-memory rate limiting and failed login attempts - Consolidate duplicate health endpoints into api/health.py - Fix CLI to use correct async rescan method names - Update download.py and anime.py to use custom exception classes - Add WebSocket room validation and rate limiting
280 lines
7.8 KiB
Python
280 lines
7.8 KiB
Python
"""Health check endpoints for system monitoring and status verification."""
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Any, Dict, Optional
|
|
|
|
import psutil
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from pydantic import BaseModel
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from src.server.utils.dependencies import get_database_session
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/health", tags=["health"])
|
|
|
|
|
|
class HealthStatus(BaseModel):
|
|
"""Basic health status response."""
|
|
|
|
status: str
|
|
timestamp: str
|
|
version: str = "1.0.0"
|
|
service: str = "aniworld-api"
|
|
series_app_initialized: bool = False
|
|
anime_directory_configured: bool = False
|
|
|
|
|
|
class DatabaseHealth(BaseModel):
|
|
"""Database health status."""
|
|
|
|
status: str
|
|
connection_time_ms: float
|
|
message: Optional[str] = None
|
|
|
|
|
|
class SystemMetrics(BaseModel):
|
|
"""System resource metrics."""
|
|
|
|
cpu_percent: float
|
|
memory_percent: float
|
|
memory_available_mb: float
|
|
disk_percent: float
|
|
disk_free_mb: float
|
|
uptime_seconds: float
|
|
|
|
|
|
class DependencyHealth(BaseModel):
|
|
"""Health status of external dependencies."""
|
|
|
|
database: DatabaseHealth
|
|
filesystem: Dict[str, Any]
|
|
system: SystemMetrics
|
|
|
|
|
|
class DetailedHealthStatus(BaseModel):
|
|
"""Comprehensive health check response."""
|
|
|
|
status: str
|
|
timestamp: str
|
|
version: str = "1.0.0"
|
|
dependencies: DependencyHealth
|
|
startup_time: datetime
|
|
|
|
|
|
# Global startup time
|
|
startup_time = datetime.now()
|
|
|
|
|
|
async def check_database_health(db: AsyncSession) -> DatabaseHealth:
|
|
"""Check database connection and performance.
|
|
|
|
Args:
|
|
db: Database session dependency.
|
|
|
|
Returns:
|
|
DatabaseHealth: Database status and connection time.
|
|
"""
|
|
try:
|
|
import time
|
|
|
|
start_time = time.time()
|
|
await db.execute(text("SELECT 1"))
|
|
connection_time = (time.time() - start_time) * 1000 # Convert to milliseconds
|
|
|
|
return DatabaseHealth(
|
|
status="healthy",
|
|
connection_time_ms=connection_time,
|
|
message="Database connection successful",
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Database health check failed: {e}")
|
|
return DatabaseHealth(
|
|
status="unhealthy",
|
|
connection_time_ms=0,
|
|
message=f"Database connection failed: {str(e)}",
|
|
)
|
|
|
|
|
|
async def check_filesystem_health() -> Dict[str, Any]:
|
|
"""Check filesystem availability and permissions.
|
|
|
|
Returns:
|
|
dict: Filesystem status and available space.
|
|
"""
|
|
try:
|
|
import os
|
|
|
|
data_dir = "data"
|
|
logs_dir = "logs"
|
|
|
|
data_accessible = os.path.exists(data_dir) and os.access(data_dir, os.W_OK)
|
|
logs_accessible = os.path.exists(logs_dir) and os.access(logs_dir, os.W_OK)
|
|
|
|
return {
|
|
"status": "healthy" if (data_accessible and logs_accessible) else "degraded",
|
|
"data_dir_writable": data_accessible,
|
|
"logs_dir_writable": logs_accessible,
|
|
"message": "Filesystem check completed",
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Filesystem health check failed: {e}")
|
|
return {
|
|
"status": "unhealthy",
|
|
"message": f"Filesystem check failed: {str(e)}",
|
|
}
|
|
|
|
|
|
def get_system_metrics() -> SystemMetrics:
|
|
"""Get system resource metrics.
|
|
|
|
Returns:
|
|
SystemMetrics: CPU, memory, disk, and uptime information.
|
|
"""
|
|
try:
|
|
import os
|
|
import time
|
|
|
|
# CPU usage
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
|
|
# Memory usage
|
|
memory_info = psutil.virtual_memory()
|
|
memory_percent = memory_info.percent
|
|
memory_available_mb = memory_info.available / (1024 * 1024)
|
|
|
|
# Disk usage
|
|
disk_info = psutil.disk_usage("/")
|
|
disk_percent = disk_info.percent
|
|
disk_free_mb = disk_info.free / (1024 * 1024)
|
|
|
|
# Uptime
|
|
boot_time = psutil.boot_time()
|
|
uptime_seconds = time.time() - boot_time
|
|
|
|
return SystemMetrics(
|
|
cpu_percent=cpu_percent,
|
|
memory_percent=memory_percent,
|
|
memory_available_mb=memory_available_mb,
|
|
disk_percent=disk_percent,
|
|
disk_free_mb=disk_free_mb,
|
|
uptime_seconds=uptime_seconds,
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"System metrics collection failed: {e}")
|
|
raise HTTPException(
|
|
status_code=500, detail=f"Failed to collect system metrics: {str(e)}"
|
|
)
|
|
|
|
|
|
@router.get("", response_model=HealthStatus)
|
|
async def basic_health_check() -> HealthStatus:
|
|
"""Basic health check endpoint.
|
|
|
|
This endpoint does not depend on anime_directory configuration
|
|
and should always return 200 OK for basic health monitoring.
|
|
Includes service information for identification.
|
|
|
|
Returns:
|
|
HealthStatus: Simple health status with timestamp and service info.
|
|
"""
|
|
from src.config.settings import settings
|
|
from src.server.utils.dependencies import _series_app
|
|
|
|
logger.debug("Basic health check requested")
|
|
return HealthStatus(
|
|
status="healthy",
|
|
timestamp=datetime.now().isoformat(),
|
|
service="aniworld-api",
|
|
series_app_initialized=_series_app is not None,
|
|
anime_directory_configured=bool(settings.anime_directory),
|
|
)
|
|
|
|
|
|
@router.get("/detailed", response_model=DetailedHealthStatus)
|
|
async def detailed_health_check(
|
|
db: AsyncSession = Depends(get_database_session),
|
|
) -> DetailedHealthStatus:
|
|
"""Comprehensive health check endpoint.
|
|
|
|
Checks database, filesystem, and system metrics.
|
|
|
|
Args:
|
|
db: Database session dependency.
|
|
|
|
Returns:
|
|
DetailedHealthStatus: Comprehensive health information.
|
|
"""
|
|
logger.debug("Detailed health check requested")
|
|
|
|
try:
|
|
# Check dependencies
|
|
database_health = await check_database_health(db)
|
|
filesystem_health = await check_filesystem_health()
|
|
system_metrics = get_system_metrics()
|
|
|
|
# Determine overall status
|
|
overall_status = "healthy"
|
|
if database_health.status != "healthy":
|
|
overall_status = "degraded"
|
|
if filesystem_health.get("status") != "healthy":
|
|
overall_status = "degraded"
|
|
|
|
dependencies = DependencyHealth(
|
|
database=database_health,
|
|
filesystem=filesystem_health,
|
|
system=system_metrics,
|
|
)
|
|
|
|
return DetailedHealthStatus(
|
|
status=overall_status,
|
|
timestamp=datetime.now().isoformat(),
|
|
dependencies=dependencies,
|
|
startup_time=startup_time,
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Detailed health check failed: {e}")
|
|
raise HTTPException(status_code=500, detail="Health check failed")
|
|
|
|
|
|
@router.get("/metrics", response_model=SystemMetrics)
|
|
async def get_metrics() -> SystemMetrics:
|
|
"""Get system resource metrics.
|
|
|
|
Returns:
|
|
SystemMetrics: Current CPU, memory, disk, and uptime metrics.
|
|
"""
|
|
logger.debug("System metrics requested")
|
|
return get_system_metrics()
|
|
|
|
|
|
@router.get("/metrics/prometheus")
|
|
async def get_prometheus_metrics() -> str:
|
|
"""Get metrics in Prometheus format.
|
|
|
|
Returns:
|
|
str: Prometheus formatted metrics.
|
|
"""
|
|
from src.server.utils.metrics import get_metrics_collector
|
|
|
|
logger.debug("Prometheus metrics requested")
|
|
collector = get_metrics_collector()
|
|
return collector.export_prometheus_format()
|
|
|
|
|
|
@router.get("/metrics/json")
|
|
async def get_metrics_json() -> Dict[str, Any]:
|
|
"""Get metrics as JSON.
|
|
|
|
Returns:
|
|
dict: Metrics in JSON format.
|
|
"""
|
|
from src.server.utils.metrics import get_metrics_collector
|
|
|
|
logger.debug("JSON metrics requested")
|
|
collector = get_metrics_collector()
|
|
return collector.export_json()
|