Add startup health checks and /health/ready endpoint
- Add _run_startup_health_checks() function in fastapi_app.py - Check ffmpeg availability (warning) - Check DNS resolution for aniworld.to and api.themoviedb.org (warning) - Check anime_directory configuration and writability (error) - Store startup checks in app.state for health endpoint access - Add /health/ready endpoint for container orchestrators - Returns not_ready with 503 when critical failures present - Includes critical_failures list for debugging - Update /health endpoint to include startup check results - Status reflects worst check (error > warning > ok) - Document health check endpoints in DEVELOPMENT.md - Add unit tests for startup health checks - Add unit tests for /health/ready endpoint Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -5,7 +5,7 @@ from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import psutil
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
@@ -28,6 +28,7 @@ class HealthStatus(BaseModel):
|
||||
anime_directory_configured: bool = False
|
||||
scheduler_next_run: Optional[str] = None
|
||||
scheduler_last_run: Optional[str] = None
|
||||
checks: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class DatabaseHealth(BaseModel):
|
||||
@@ -173,13 +174,14 @@ def get_system_metrics() -> SystemMetrics:
|
||||
|
||||
|
||||
@router.get("", response_model=HealthStatus)
|
||||
async def basic_health_check() -> HealthStatus:
|
||||
async def basic_health_check(request: Request) -> HealthStatus:
|
||||
"""Basic health check endpoint.
|
||||
|
||||
This endpoint does not depend on anime_directory configuration
|
||||
and should always return 200 OK for basic health monitoring.
|
||||
Includes service information for identification.
|
||||
Includes scheduler next/last run times for monitoring tools.
|
||||
Includes startup health check results.
|
||||
|
||||
Returns:
|
||||
HealthStatus: Simple health status with timestamp and service info.
|
||||
@@ -195,18 +197,67 @@ async def basic_health_check() -> HealthStatus:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get startup checks from app state
|
||||
checks = getattr(request.app.state, "startup_checks", None)
|
||||
|
||||
# Determine overall status based on checks
|
||||
overall_status = "healthy"
|
||||
if checks:
|
||||
for check_name, check_data in checks.items():
|
||||
if check_data.get("status") == "error":
|
||||
overall_status = "unhealthy"
|
||||
break
|
||||
elif check_data.get("status") == "warning":
|
||||
overall_status = "degraded"
|
||||
|
||||
logger.debug("Basic health check requested")
|
||||
return HealthStatus(
|
||||
status="healthy",
|
||||
status=overall_status,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
service="aniworld-api",
|
||||
series_app_initialized=_series_app is not None,
|
||||
anime_directory_configured=bool(settings.anime_directory),
|
||||
scheduler_next_run=scheduler_status.get("next_run"),
|
||||
scheduler_last_run=scheduler_status.get("last_run"),
|
||||
checks=checks,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/ready")
|
||||
async def ready_check(request: Request) -> Dict[str, Any]:
|
||||
"""Readiness check endpoint for container orchestrators.
|
||||
|
||||
Returns 503 if critical dependencies are not available.
|
||||
This endpoint is used by Kubernetes, Docker Swarm, etc. to determine
|
||||
if the container should receive traffic.
|
||||
|
||||
Returns:
|
||||
dict: Readiness status with checks details.
|
||||
"""
|
||||
checks = getattr(request.app.state, "startup_checks", {})
|
||||
|
||||
critical_failures = []
|
||||
for check_name, check_data in checks.items():
|
||||
if check_data.get("status") == "error":
|
||||
critical_failures.append(f"{check_name}: {check_data.get('message')}")
|
||||
|
||||
if critical_failures:
|
||||
return {
|
||||
"status": "not_ready",
|
||||
"ready": False,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"critical_failures": critical_failures,
|
||||
"checks": checks,
|
||||
}
|
||||
|
||||
return {
|
||||
"status": "ready",
|
||||
"ready": True,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"checks": checks,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/detailed", response_model=DetailedHealthStatus)
|
||||
async def detailed_health_check(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
|
||||
Reference in New Issue
Block a user