Add startup health checks and /health/ready endpoint

- Add _run_startup_health_checks() function in fastapi_app.py
  - Check ffmpeg availability (warning)
  - Check DNS resolution for aniworld.to and api.themoviedb.org (warning)
  - Check anime_directory configuration and writability (error)
- Store startup checks in app.state for health endpoint access
- Add /health/ready endpoint for container orchestrators
  - Returns not_ready with 503 when critical failures present
  - Includes critical_failures list for debugging
- Update /health endpoint to include startup check results
  - Status reflects worst check (error > warning > ok)
- Document health check endpoints in DEVELOPMENT.md
- Add unit tests for startup health checks
- Add unit tests for /health/ready endpoint

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-23 22:12:03 +02:00
parent 9a20541598
commit 3551838887
5 changed files with 458 additions and 7 deletions

View File

@@ -5,7 +5,7 @@ from datetime import datetime
from typing import Any, Dict, Optional
import psutil
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException, Request
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
@@ -28,6 +28,7 @@ class HealthStatus(BaseModel):
anime_directory_configured: bool = False
scheduler_next_run: Optional[str] = None
scheduler_last_run: Optional[str] = None
checks: Optional[Dict[str, Any]] = None
class DatabaseHealth(BaseModel):
@@ -173,13 +174,14 @@ def get_system_metrics() -> SystemMetrics:
@router.get("", response_model=HealthStatus)
async def basic_health_check() -> HealthStatus:
async def basic_health_check(request: Request) -> HealthStatus:
"""Basic health check endpoint.
This endpoint does not depend on anime_directory configuration
and should always return 200 OK for basic health monitoring.
Includes service information for identification.
Includes scheduler next/last run times for monitoring tools.
Includes startup health check results.
Returns:
HealthStatus: Simple health status with timestamp and service info.
@@ -195,18 +197,67 @@ async def basic_health_check() -> HealthStatus:
except Exception:
pass
# Get startup checks from app state
checks = getattr(request.app.state, "startup_checks", None)
# Determine overall status based on checks
overall_status = "healthy"
if checks:
for check_name, check_data in checks.items():
if check_data.get("status") == "error":
overall_status = "unhealthy"
break
elif check_data.get("status") == "warning":
overall_status = "degraded"
logger.debug("Basic health check requested")
return HealthStatus(
status="healthy",
status=overall_status,
timestamp=datetime.now().isoformat(),
service="aniworld-api",
series_app_initialized=_series_app is not None,
anime_directory_configured=bool(settings.anime_directory),
scheduler_next_run=scheduler_status.get("next_run"),
scheduler_last_run=scheduler_status.get("last_run"),
checks=checks,
)
@router.get("/ready")
async def ready_check(request: Request) -> Dict[str, Any]:
"""Readiness check endpoint for container orchestrators.
Returns 503 if critical dependencies are not available.
This endpoint is used by Kubernetes, Docker Swarm, etc. to determine
if the container should receive traffic.
Returns:
dict: Readiness status with checks details.
"""
checks = getattr(request.app.state, "startup_checks", {})
critical_failures = []
for check_name, check_data in checks.items():
if check_data.get("status") == "error":
critical_failures.append(f"{check_name}: {check_data.get('message')}")
if critical_failures:
return {
"status": "not_ready",
"ready": False,
"timestamp": datetime.now().isoformat(),
"critical_failures": critical_failures,
"checks": checks,
}
return {
"status": "ready",
"ready": True,
"timestamp": datetime.now().isoformat(),
"checks": checks,
}
@router.get("/detailed", response_model=DetailedHealthStatus)
async def detailed_health_check(
db: AsyncSession = Depends(get_database_session),