Add startup health checks and /health/ready endpoint
- Add _run_startup_health_checks() function in fastapi_app.py - Check ffmpeg availability (warning) - Check DNS resolution for aniworld.to and api.themoviedb.org (warning) - Check anime_directory configuration and writability (error) - Store startup checks in app.state for health endpoint access - Add /health/ready endpoint for container orchestrators - Returns not_ready with 503 when critical failures present - Includes critical_failures list for debugging - Update /health endpoint to include startup check results - Status reflects worst check (error > warning > ok) - Document health check endpoints in DEVELOPMENT.md - Add unit tests for startup health checks - Add unit tests for /health/ready endpoint Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -5,7 +5,7 @@ from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import psutil
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
@@ -28,6 +28,7 @@ class HealthStatus(BaseModel):
|
||||
anime_directory_configured: bool = False
|
||||
scheduler_next_run: Optional[str] = None
|
||||
scheduler_last_run: Optional[str] = None
|
||||
checks: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class DatabaseHealth(BaseModel):
|
||||
@@ -173,13 +174,14 @@ def get_system_metrics() -> SystemMetrics:
|
||||
|
||||
|
||||
@router.get("", response_model=HealthStatus)
|
||||
async def basic_health_check() -> HealthStatus:
|
||||
async def basic_health_check(request: Request) -> HealthStatus:
|
||||
"""Basic health check endpoint.
|
||||
|
||||
This endpoint does not depend on anime_directory configuration
|
||||
and should always return 200 OK for basic health monitoring.
|
||||
Includes service information for identification.
|
||||
Includes scheduler next/last run times for monitoring tools.
|
||||
Includes startup health check results.
|
||||
|
||||
Returns:
|
||||
HealthStatus: Simple health status with timestamp and service info.
|
||||
@@ -195,18 +197,67 @@ async def basic_health_check() -> HealthStatus:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get startup checks from app state
|
||||
checks = getattr(request.app.state, "startup_checks", None)
|
||||
|
||||
# Determine overall status based on checks
|
||||
overall_status = "healthy"
|
||||
if checks:
|
||||
for check_name, check_data in checks.items():
|
||||
if check_data.get("status") == "error":
|
||||
overall_status = "unhealthy"
|
||||
break
|
||||
elif check_data.get("status") == "warning":
|
||||
overall_status = "degraded"
|
||||
|
||||
logger.debug("Basic health check requested")
|
||||
return HealthStatus(
|
||||
status="healthy",
|
||||
status=overall_status,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
service="aniworld-api",
|
||||
series_app_initialized=_series_app is not None,
|
||||
anime_directory_configured=bool(settings.anime_directory),
|
||||
scheduler_next_run=scheduler_status.get("next_run"),
|
||||
scheduler_last_run=scheduler_status.get("last_run"),
|
||||
checks=checks,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/ready")
|
||||
async def ready_check(request: Request) -> Dict[str, Any]:
|
||||
"""Readiness check endpoint for container orchestrators.
|
||||
|
||||
Returns 503 if critical dependencies are not available.
|
||||
This endpoint is used by Kubernetes, Docker Swarm, etc. to determine
|
||||
if the container should receive traffic.
|
||||
|
||||
Returns:
|
||||
dict: Readiness status with checks details.
|
||||
"""
|
||||
checks = getattr(request.app.state, "startup_checks", {})
|
||||
|
||||
critical_failures = []
|
||||
for check_name, check_data in checks.items():
|
||||
if check_data.get("status") == "error":
|
||||
critical_failures.append(f"{check_name}: {check_data.get('message')}")
|
||||
|
||||
if critical_failures:
|
||||
return {
|
||||
"status": "not_ready",
|
||||
"ready": False,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"critical_failures": critical_failures,
|
||||
"checks": checks,
|
||||
}
|
||||
|
||||
return {
|
||||
"status": "ready",
|
||||
"ready": True,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"checks": checks,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/detailed", response_model=DetailedHealthStatus)
|
||||
async def detailed_health_check(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
|
||||
@@ -104,6 +104,107 @@ async def _check_incomplete_series_on_startup(background_loader) -> None:
|
||||
logger.exception("Failed to check incomplete series on startup")
|
||||
|
||||
|
||||
async def _run_startup_health_checks(logger) -> dict:
|
||||
"""Run startup health checks for critical dependencies.
|
||||
|
||||
Checks:
|
||||
- ffmpeg availability
|
||||
- DNS resolution for aniworld.to and api.themoviedb.org
|
||||
- anime_directory configuration and writability
|
||||
|
||||
Args:
|
||||
logger: Logger instance for recording check results.
|
||||
|
||||
Returns:
|
||||
dict: Health check results with status and details for each check.
|
||||
"""
|
||||
import asyncio
|
||||
import shutil
|
||||
import socket
|
||||
from typing import Dict, Any
|
||||
|
||||
checks: Dict[str, Any] = {
|
||||
"ffmpeg": {"status": "unknown", "message": None},
|
||||
"dns_aniworld": {"status": "unknown", "message": None},
|
||||
"dns_tmdb": {"status": "unknown", "message": None},
|
||||
"anime_directory": {"status": "unknown", "message": None, "path": None},
|
||||
}
|
||||
|
||||
# Check ffmpeg availability
|
||||
try:
|
||||
ffmpeg_path = shutil.which("ffmpeg")
|
||||
if ffmpeg_path:
|
||||
checks["ffmpeg"]["status"] = "ok"
|
||||
checks["ffmpeg"]["message"] = f"Found at {ffmpeg_path}"
|
||||
logger.debug("ffmpeg health check passed: %s", ffmpeg_path)
|
||||
else:
|
||||
checks["ffmpeg"]["status"] = "warning"
|
||||
checks["ffmpeg"]["message"] = "ffmpeg not found in PATH"
|
||||
logger.warning("ffmpeg health check failed: not in PATH")
|
||||
except Exception as e:
|
||||
checks["ffmpeg"]["status"] = "error"
|
||||
checks["ffmpeg"]["message"] = str(e)
|
||||
logger.warning("Could not check ffmpeg: %s", e)
|
||||
|
||||
# Check DNS resolution for aniworld.to
|
||||
try:
|
||||
socket.gethostbyname("aniworld.to")
|
||||
checks["dns_aniworld"]["status"] = "ok"
|
||||
checks["dns_aniworld"]["message"] = "Resolved successfully"
|
||||
logger.debug("DNS health check passed for aniworld.to")
|
||||
except socket.gaierror as e:
|
||||
checks["dns_aniworld"]["status"] = "warning"
|
||||
checks["dns_aniworld"]["message"] = f"DNS resolution failed: {e}"
|
||||
logger.warning("DNS health check failed for aniworld.to: %s", e)
|
||||
except Exception as e:
|
||||
checks["dns_aniworld"]["status"] = "warning"
|
||||
checks["dns_aniworld"]["message"] = f"Unexpected error: {e}"
|
||||
logger.warning("Unexpected DNS error for aniworld.to: %s", e)
|
||||
|
||||
# Check DNS resolution for api.themoviedb.org
|
||||
try:
|
||||
socket.gethostbyname("api.themoviedb.org")
|
||||
checks["dns_tmdb"]["status"] = "ok"
|
||||
checks["dns_tmdb"]["message"] = "Resolved successfully"
|
||||
logger.debug("DNS health check passed for api.themoviedb.org")
|
||||
except socket.gaierror as e:
|
||||
checks["dns_tmdb"]["status"] = "warning"
|
||||
checks["dns_tmdb"]["message"] = f"DNS resolution failed: {e}"
|
||||
logger.warning("DNS health check failed for api.themoviedb.org: %s", e)
|
||||
except Exception as e:
|
||||
checks["dns_tmdb"]["status"] = "warning"
|
||||
checks["dns_tmdb"]["message"] = f"Unexpected error: {e}"
|
||||
logger.warning("Unexpected DNS error for api.themoviedb.org: %s", e)
|
||||
|
||||
# Check anime_directory configuration and writability
|
||||
from src.config.settings import settings
|
||||
anime_dir = settings.anime_directory
|
||||
|
||||
if not anime_dir:
|
||||
checks["anime_directory"]["status"] = "error"
|
||||
checks["anime_directory"]["message"] = "anime_directory not configured"
|
||||
checks["anime_directory"]["path"] = None
|
||||
logger.error("anime_directory health check failed: not configured")
|
||||
else:
|
||||
import os
|
||||
checks["anime_directory"]["path"] = anime_dir
|
||||
|
||||
if not os.path.isdir(anime_dir):
|
||||
checks["anime_directory"]["status"] = "error"
|
||||
checks["anime_directory"]["message"] = f"Directory does not exist: {anime_dir}"
|
||||
logger.error("anime_directory health check failed: %s does not exist", anime_dir)
|
||||
elif not os.access(anime_dir, os.W_OK):
|
||||
checks["anime_directory"]["status"] = "error"
|
||||
checks["anime_directory"]["message"] = f"Directory not writable: {anime_dir}"
|
||||
logger.error("anime_directory health check failed: %s not writable", anime_dir)
|
||||
else:
|
||||
checks["anime_directory"]["status"] = "ok"
|
||||
checks["anime_directory"]["message"] = f"Directory exists and is writable: {anime_dir}"
|
||||
logger.debug("anime_directory health check passed: %s", anime_dir)
|
||||
|
||||
return checks
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_application: FastAPI):
|
||||
"""Manage application lifespan (startup and shutdown).
|
||||
@@ -342,6 +443,14 @@ async def lifespan(_application: FastAPI):
|
||||
logger.debug("ffmpeg found at: %s", _shutil.which("ffmpeg"))
|
||||
except Exception as _exc:
|
||||
logger.warning("Could not check for ffmpeg: %s", _exc)
|
||||
|
||||
# Run startup health checks and store results for /health endpoint
|
||||
try:
|
||||
startup_checks = await _run_startup_health_checks(logger)
|
||||
app.state.startup_checks = startup_checks
|
||||
except Exception as _exc:
|
||||
logger.warning("Could not run startup health checks: %s", _exc)
|
||||
app.state.startup_checks = {}
|
||||
except Exception as e:
|
||||
logger.error("Error during startup: %s", e, exc_info=True)
|
||||
startup_error = e
|
||||
|
||||
Reference in New Issue
Block a user