feat: implement graceful shutdown with SIGINT/SIGTERM support

- Add WebSocket shutdown() with client notification and graceful close
- Enhance download service stop() with pending state persistence
- Expand FastAPI lifespan shutdown with proper cleanup sequence
- Add SQLite WAL checkpoint before database close
- Update stop_server.sh to use SIGTERM with timeout fallback
- Configure uvicorn timeout_graceful_shutdown=30s
- Update ARCHITECTURE.md with shutdown documentation
This commit is contained in:
2025-12-25 18:59:07 +01:00
parent 1ba67357dc
commit d70d70e193
9 changed files with 443 additions and 175 deletions

View File

@@ -155,30 +155,81 @@ async def lifespan(_application: FastAPI):
# Yield control to the application
yield
# Shutdown
logger.info("FastAPI application shutting down")
# Shutdown - execute in proper order with timeout protection
logger.info("FastAPI application shutting down (graceful shutdown initiated)")
# Shutdown download service and its thread pool
# Define shutdown timeout (total time allowed for all shutdown operations)
SHUTDOWN_TIMEOUT = 30.0
import time
shutdown_start = time.monotonic()
def remaining_time() -> float:
"""Calculate remaining shutdown time."""
elapsed = time.monotonic() - shutdown_start
return max(0.0, SHUTDOWN_TIMEOUT - elapsed)
# 1. Broadcast shutdown notification via WebSocket
try:
ws_service = get_websocket_service()
logger.info("Broadcasting shutdown notification to WebSocket clients...")
await asyncio.wait_for(
ws_service.shutdown(timeout=min(5.0, remaining_time())),
timeout=min(5.0, remaining_time())
)
logger.info("WebSocket shutdown complete")
except asyncio.TimeoutError:
logger.warning("WebSocket shutdown timed out")
except Exception as e: # pylint: disable=broad-exception-caught
logger.error("Error during WebSocket shutdown: %s", e, exc_info=True)
# 2. Shutdown download service and persist active downloads
try:
from src.server.services.download_service import ( # noqa: E501
_download_service_instance,
)
if _download_service_instance is not None:
logger.info("Stopping download service...")
await _download_service_instance.stop()
await asyncio.wait_for(
_download_service_instance.stop(timeout=min(10.0, remaining_time())),
timeout=min(15.0, remaining_time())
)
logger.info("Download service stopped successfully")
except asyncio.TimeoutError:
logger.warning("Download service shutdown timed out")
except Exception as e: # pylint: disable=broad-exception-caught
logger.error("Error stopping download service: %s", e, exc_info=True)
# Close database connections
# 3. Cleanup progress service
try:
progress_service = get_progress_service()
logger.info("Cleaning up progress service...")
# Clear any active progress tracking and subscribers
progress_service._subscribers.clear()
progress_service._active_progress.clear()
logger.info("Progress service cleanup complete")
except Exception as e: # pylint: disable=broad-exception-caught
logger.error("Error cleaning up progress service: %s", e, exc_info=True)
# 4. Close database connections with WAL checkpoint
try:
from src.server.database.connection import close_db
await close_db()
logger.info("Closing database connections...")
await asyncio.wait_for(
close_db(),
timeout=min(10.0, remaining_time())
)
logger.info("Database connections closed")
except asyncio.TimeoutError:
logger.warning("Database shutdown timed out")
except Exception as e: # pylint: disable=broad-exception-caught
logger.error("Error closing database: %s", e, exc_info=True)
logger.info("FastAPI application shutdown complete")
elapsed_total = time.monotonic() - shutdown_start
logger.info(
"FastAPI application shutdown complete (took %.2fs)",
elapsed_total
)
# Initialize FastAPI app with lifespan