backup
This commit is contained in:
270
src/server/api/analytics.py
Normal file
270
src/server/api/analytics.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""Analytics API endpoints for accessing system analytics and reports.
|
||||
|
||||
Provides REST API endpoints for querying analytics data including download
|
||||
statistics, series popularity, storage analysis, and performance reports.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.server.database.connection import get_db
|
||||
from src.server.services.analytics_service import get_analytics_service
|
||||
|
||||
router = APIRouter(prefix="/api/analytics", tags=["analytics"])
|
||||
|
||||
|
||||
class DownloadStatsResponse(BaseModel):
|
||||
"""Download statistics response model."""
|
||||
|
||||
total_downloads: int
|
||||
successful_downloads: int
|
||||
failed_downloads: int
|
||||
total_bytes_downloaded: int
|
||||
average_speed_mbps: float
|
||||
success_rate: float
|
||||
average_duration_seconds: float
|
||||
|
||||
|
||||
class SeriesPopularityResponse(BaseModel):
|
||||
"""Series popularity response model."""
|
||||
|
||||
series_name: str
|
||||
download_count: int
|
||||
total_size_bytes: int
|
||||
last_download: Optional[str]
|
||||
success_rate: float
|
||||
|
||||
|
||||
class StorageAnalysisResponse(BaseModel):
|
||||
"""Storage analysis response model."""
|
||||
|
||||
total_storage_bytes: int
|
||||
used_storage_bytes: int
|
||||
free_storage_bytes: int
|
||||
storage_percent_used: float
|
||||
downloads_directory_size_bytes: int
|
||||
cache_directory_size_bytes: int
|
||||
logs_directory_size_bytes: int
|
||||
|
||||
|
||||
class PerformanceReportResponse(BaseModel):
|
||||
"""Performance report response model."""
|
||||
|
||||
period_start: str
|
||||
period_end: str
|
||||
downloads_per_hour: float
|
||||
average_queue_size: float
|
||||
peak_memory_usage_mb: float
|
||||
average_cpu_percent: float
|
||||
uptime_seconds: float
|
||||
error_rate: float
|
||||
|
||||
|
||||
class SummaryReportResponse(BaseModel):
|
||||
"""Comprehensive analytics summary response."""
|
||||
|
||||
timestamp: str
|
||||
download_stats: DownloadStatsResponse
|
||||
series_popularity: list[SeriesPopularityResponse]
|
||||
storage_analysis: StorageAnalysisResponse
|
||||
performance_report: PerformanceReportResponse
|
||||
|
||||
|
||||
@router.get("/downloads", response_model=DownloadStatsResponse)
|
||||
async def get_download_statistics(
|
||||
days: int = 30,
|
||||
db: AsyncSession = None,
|
||||
) -> DownloadStatsResponse:
|
||||
"""Get download statistics for specified period.
|
||||
|
||||
Args:
|
||||
days: Number of days to analyze (default: 30)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Download statistics including success rates and speeds
|
||||
"""
|
||||
if db is None:
|
||||
db = await get_db().__anext__()
|
||||
|
||||
try:
|
||||
service = get_analytics_service()
|
||||
stats = await service.get_download_stats(db, days=days)
|
||||
|
||||
return DownloadStatsResponse(
|
||||
total_downloads=stats.total_downloads,
|
||||
successful_downloads=stats.successful_downloads,
|
||||
failed_downloads=stats.failed_downloads,
|
||||
total_bytes_downloaded=stats.total_bytes_downloaded,
|
||||
average_speed_mbps=stats.average_speed_mbps,
|
||||
success_rate=stats.success_rate,
|
||||
average_duration_seconds=stats.average_duration_seconds,
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get download statistics: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/series-popularity",
|
||||
response_model=list[SeriesPopularityResponse]
|
||||
)
|
||||
async def get_series_popularity(
|
||||
limit: int = 10,
|
||||
db: AsyncSession = None,
|
||||
) -> list[SeriesPopularityResponse]:
|
||||
"""Get most popular series by download count.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of series (default: 10)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
List of series sorted by popularity
|
||||
"""
|
||||
if db is None:
|
||||
db = await get_db().__anext__()
|
||||
|
||||
try:
|
||||
service = get_analytics_service()
|
||||
popularity = await service.get_series_popularity(db, limit=limit)
|
||||
|
||||
return [
|
||||
SeriesPopularityResponse(
|
||||
series_name=p.series_name,
|
||||
download_count=p.download_count,
|
||||
total_size_bytes=p.total_size_bytes,
|
||||
last_download=p.last_download,
|
||||
success_rate=p.success_rate,
|
||||
)
|
||||
for p in popularity
|
||||
]
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get series popularity: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/storage",
|
||||
response_model=StorageAnalysisResponse
|
||||
)
|
||||
async def get_storage_analysis() -> StorageAnalysisResponse:
|
||||
"""Get current storage usage analysis.
|
||||
|
||||
Returns:
|
||||
Storage breakdown including disk and directory usage
|
||||
"""
|
||||
try:
|
||||
service = get_analytics_service()
|
||||
analysis = service.get_storage_analysis()
|
||||
|
||||
return StorageAnalysisResponse(
|
||||
total_storage_bytes=analysis.total_storage_bytes,
|
||||
used_storage_bytes=analysis.used_storage_bytes,
|
||||
free_storage_bytes=analysis.free_storage_bytes,
|
||||
storage_percent_used=analysis.storage_percent_used,
|
||||
downloads_directory_size_bytes=(
|
||||
analysis.downloads_directory_size_bytes
|
||||
),
|
||||
cache_directory_size_bytes=(
|
||||
analysis.cache_directory_size_bytes
|
||||
),
|
||||
logs_directory_size_bytes=(
|
||||
analysis.logs_directory_size_bytes
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get storage analysis: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/performance",
|
||||
response_model=PerformanceReportResponse
|
||||
)
|
||||
async def get_performance_report(
|
||||
hours: int = 24,
|
||||
db: AsyncSession = None,
|
||||
) -> PerformanceReportResponse:
|
||||
"""Get performance metrics for specified period.
|
||||
|
||||
Args:
|
||||
hours: Number of hours to analyze (default: 24)
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Performance metrics including speeds and system usage
|
||||
"""
|
||||
if db is None:
|
||||
db = await get_db().__anext__()
|
||||
|
||||
try:
|
||||
service = get_analytics_service()
|
||||
report = await service.get_performance_report(db, hours=hours)
|
||||
|
||||
return PerformanceReportResponse(
|
||||
period_start=report.period_start,
|
||||
period_end=report.period_end,
|
||||
downloads_per_hour=report.downloads_per_hour,
|
||||
average_queue_size=report.average_queue_size,
|
||||
peak_memory_usage_mb=report.peak_memory_usage_mb,
|
||||
average_cpu_percent=report.average_cpu_percent,
|
||||
uptime_seconds=report.uptime_seconds,
|
||||
error_rate=report.error_rate,
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to get performance report: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
@router.get("/summary", response_model=SummaryReportResponse)
|
||||
async def get_summary_report(
|
||||
db: AsyncSession = None,
|
||||
) -> SummaryReportResponse:
|
||||
"""Get comprehensive analytics summary.
|
||||
|
||||
Args:
|
||||
db: Database session
|
||||
|
||||
Returns:
|
||||
Complete analytics report with all metrics
|
||||
"""
|
||||
if db is None:
|
||||
db = await get_db().__anext__()
|
||||
|
||||
try:
|
||||
service = get_analytics_service()
|
||||
summary = await service.generate_summary_report(db)
|
||||
|
||||
return SummaryReportResponse(
|
||||
timestamp=summary["timestamp"],
|
||||
download_stats=DownloadStatsResponse(
|
||||
**summary["download_stats"]
|
||||
),
|
||||
series_popularity=[
|
||||
SeriesPopularityResponse(**p)
|
||||
for p in summary["series_popularity"]
|
||||
],
|
||||
storage_analysis=StorageAnalysisResponse(
|
||||
**summary["storage_analysis"]
|
||||
),
|
||||
performance_report=PerformanceReportResponse(
|
||||
**summary["performance_report"]
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Failed to generate summary report: {str(e)}",
|
||||
)
|
||||
304
src/server/api/backup.py
Normal file
304
src/server/api/backup.py
Normal file
@@ -0,0 +1,304 @@
|
||||
"""Backup management API endpoints."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from src.server.services.backup_service import BackupService, get_backup_service
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/backup", tags=["backup"])
|
||||
|
||||
|
||||
class BackupCreateRequest(BaseModel):
|
||||
"""Request to create a backup."""
|
||||
|
||||
backup_type: str # 'config', 'database', 'full'
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
class BackupResponse(BaseModel):
|
||||
"""Response for backup creation."""
|
||||
|
||||
success: bool
|
||||
message: str
|
||||
backup_name: Optional[str] = None
|
||||
size_bytes: Optional[int] = None
|
||||
|
||||
|
||||
class BackupListResponse(BaseModel):
|
||||
"""Response for listing backups."""
|
||||
|
||||
backups: List[Dict[str, Any]]
|
||||
total_count: int
|
||||
|
||||
|
||||
class RestoreRequest(BaseModel):
|
||||
"""Request to restore from backup."""
|
||||
|
||||
backup_name: str
|
||||
|
||||
|
||||
class RestoreResponse(BaseModel):
|
||||
"""Response for restore operation."""
|
||||
|
||||
success: bool
|
||||
message: str
|
||||
|
||||
|
||||
def get_backup_service_dep() -> BackupService:
|
||||
"""Dependency to get backup service."""
|
||||
return get_backup_service()
|
||||
|
||||
|
||||
@router.post("/create", response_model=BackupResponse)
|
||||
async def create_backup(
|
||||
request: BackupCreateRequest,
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> BackupResponse:
|
||||
"""Create a new backup.
|
||||
|
||||
Args:
|
||||
request: Backup creation request.
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
BackupResponse: Result of backup creation.
|
||||
"""
|
||||
try:
|
||||
backup_info = None
|
||||
|
||||
if request.backup_type == "config":
|
||||
backup_info = backup_service.backup_configuration(
|
||||
request.description or ""
|
||||
)
|
||||
elif request.backup_type == "database":
|
||||
backup_info = backup_service.backup_database(
|
||||
request.description or ""
|
||||
)
|
||||
elif request.backup_type == "full":
|
||||
backup_info = backup_service.backup_full(
|
||||
request.description or ""
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid backup type: {request.backup_type}")
|
||||
|
||||
if backup_info is None:
|
||||
return BackupResponse(
|
||||
success=False,
|
||||
message=f"Failed to create {request.backup_type} backup",
|
||||
)
|
||||
|
||||
return BackupResponse(
|
||||
success=True,
|
||||
message=(
|
||||
f"{request.backup_type.capitalize()} backup created "
|
||||
"successfully"
|
||||
),
|
||||
backup_name=backup_info.name,
|
||||
size_bytes=backup_info.size_bytes,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to create backup: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/list", response_model=BackupListResponse)
|
||||
async def list_backups(
|
||||
backup_type: Optional[str] = None,
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> BackupListResponse:
|
||||
"""List available backups.
|
||||
|
||||
Args:
|
||||
backup_type: Optional filter by backup type.
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
BackupListResponse: List of available backups.
|
||||
"""
|
||||
try:
|
||||
backups = backup_service.list_backups(backup_type)
|
||||
return BackupListResponse(backups=backups, total_count=len(backups))
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list backups: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/restore", response_model=RestoreResponse)
|
||||
async def restore_backup(
|
||||
request: RestoreRequest,
|
||||
backup_type: Optional[str] = None,
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> RestoreResponse:
|
||||
"""Restore from a backup.
|
||||
|
||||
Args:
|
||||
request: Restore request.
|
||||
backup_type: Type of backup to restore.
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
RestoreResponse: Result of restore operation.
|
||||
"""
|
||||
try:
|
||||
# Determine backup type from filename if not provided
|
||||
if backup_type is None:
|
||||
if "config" in request.backup_name:
|
||||
backup_type = "config"
|
||||
elif "database" in request.backup_name:
|
||||
backup_type = "database"
|
||||
else:
|
||||
backup_type = "full"
|
||||
|
||||
success = False
|
||||
|
||||
if backup_type == "config":
|
||||
success = backup_service.restore_configuration(
|
||||
request.backup_name
|
||||
)
|
||||
elif backup_type == "database":
|
||||
success = backup_service.restore_database(request.backup_name)
|
||||
else:
|
||||
raise ValueError(f"Cannot restore backup type: {backup_type}")
|
||||
|
||||
if not success:
|
||||
return RestoreResponse(
|
||||
success=False,
|
||||
message=f"Failed to restore {backup_type} backup",
|
||||
)
|
||||
|
||||
return RestoreResponse(
|
||||
success=True,
|
||||
message=f"{backup_type.capitalize()} backup restored successfully",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to restore backup: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.delete("/{backup_name}", response_model=Dict[str, Any])
|
||||
async def delete_backup(
|
||||
backup_name: str,
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> Dict[str, Any]:
|
||||
"""Delete a backup.
|
||||
|
||||
Args:
|
||||
backup_name: Name of the backup to delete.
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
dict: Result of delete operation.
|
||||
"""
|
||||
try:
|
||||
success = backup_service.delete_backup(backup_name)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail="Backup not found")
|
||||
|
||||
return {"success": True, "message": "Backup deleted successfully"}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to delete backup: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/cleanup", response_model=Dict[str, Any])
|
||||
async def cleanup_backups(
|
||||
max_backups: int = 10,
|
||||
backup_type: Optional[str] = None,
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> Dict[str, Any]:
|
||||
"""Clean up old backups.
|
||||
|
||||
Args:
|
||||
max_backups: Maximum number of backups to keep.
|
||||
backup_type: Optional filter by backup type.
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
dict: Number of backups deleted.
|
||||
"""
|
||||
try:
|
||||
deleted_count = backup_service.cleanup_old_backups(
|
||||
max_backups, backup_type
|
||||
)
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Cleanup completed",
|
||||
"deleted_count": deleted_count,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup backups: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/export/anime", response_model=Dict[str, Any])
|
||||
async def export_anime_data(
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> Dict[str, Any]:
|
||||
"""Export anime library data.
|
||||
|
||||
Args:
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
dict: Result of export operation.
|
||||
"""
|
||||
try:
|
||||
output_file = "data/backups/anime_export.json"
|
||||
success = backup_service.export_anime_data(output_file)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=500, detail="Failed to export anime data"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Anime data exported successfully",
|
||||
"export_file": output_file,
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export anime data: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/import/anime", response_model=Dict[str, Any])
|
||||
async def import_anime_data(
|
||||
import_file: str,
|
||||
backup_service: BackupService = Depends(get_backup_service_dep),
|
||||
) -> Dict[str, Any]:
|
||||
"""Import anime library data.
|
||||
|
||||
Args:
|
||||
import_file: Path to import file.
|
||||
backup_service: Backup service dependency.
|
||||
|
||||
Returns:
|
||||
dict: Result of import operation.
|
||||
"""
|
||||
try:
|
||||
success = backup_service.import_anime_data(import_file)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Failed to import anime data"
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Anime data imported successfully",
|
||||
}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to import anime data: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
266
src/server/api/health.py
Normal file
266
src/server/api/health.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""Health check endpoints for system monitoring and status verification."""
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import psutil
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.server.utils.dependencies import get_database_session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/health", tags=["health"])
|
||||
|
||||
|
||||
class HealthStatus(BaseModel):
|
||||
"""Basic health status response."""
|
||||
|
||||
status: str
|
||||
timestamp: str
|
||||
version: str = "1.0.0"
|
||||
|
||||
|
||||
class DatabaseHealth(BaseModel):
|
||||
"""Database health status."""
|
||||
|
||||
status: str
|
||||
connection_time_ms: float
|
||||
message: Optional[str] = None
|
||||
|
||||
|
||||
class SystemMetrics(BaseModel):
|
||||
"""System resource metrics."""
|
||||
|
||||
cpu_percent: float
|
||||
memory_percent: float
|
||||
memory_available_mb: float
|
||||
disk_percent: float
|
||||
disk_free_mb: float
|
||||
uptime_seconds: float
|
||||
|
||||
|
||||
class DependencyHealth(BaseModel):
|
||||
"""Health status of external dependencies."""
|
||||
|
||||
database: DatabaseHealth
|
||||
filesystem: Dict[str, Any]
|
||||
system: SystemMetrics
|
||||
|
||||
|
||||
class DetailedHealthStatus(BaseModel):
|
||||
"""Comprehensive health check response."""
|
||||
|
||||
status: str
|
||||
timestamp: str
|
||||
version: str = "1.0.0"
|
||||
dependencies: DependencyHealth
|
||||
startup_time: datetime
|
||||
|
||||
|
||||
# Global startup time
|
||||
startup_time = datetime.now()
|
||||
|
||||
|
||||
async def check_database_health(db: AsyncSession) -> DatabaseHealth:
|
||||
"""Check database connection and performance.
|
||||
|
||||
Args:
|
||||
db: Database session dependency.
|
||||
|
||||
Returns:
|
||||
DatabaseHealth: Database status and connection time.
|
||||
"""
|
||||
try:
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
await db.execute(text("SELECT 1"))
|
||||
connection_time = (time.time() - start_time) * 1000 # Convert to milliseconds
|
||||
|
||||
return DatabaseHealth(
|
||||
status="healthy",
|
||||
connection_time_ms=connection_time,
|
||||
message="Database connection successful",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Database health check failed: {e}")
|
||||
return DatabaseHealth(
|
||||
status="unhealthy",
|
||||
connection_time_ms=0,
|
||||
message=f"Database connection failed: {str(e)}",
|
||||
)
|
||||
|
||||
|
||||
async def check_filesystem_health() -> Dict[str, Any]:
|
||||
"""Check filesystem availability and permissions.
|
||||
|
||||
Returns:
|
||||
dict: Filesystem status and available space.
|
||||
"""
|
||||
try:
|
||||
import os
|
||||
|
||||
data_dir = "data"
|
||||
logs_dir = "logs"
|
||||
|
||||
data_accessible = os.path.exists(data_dir) and os.access(data_dir, os.W_OK)
|
||||
logs_accessible = os.path.exists(logs_dir) and os.access(logs_dir, os.W_OK)
|
||||
|
||||
return {
|
||||
"status": "healthy" if (data_accessible and logs_accessible) else "degraded",
|
||||
"data_dir_writable": data_accessible,
|
||||
"logs_dir_writable": logs_accessible,
|
||||
"message": "Filesystem check completed",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Filesystem health check failed: {e}")
|
||||
return {
|
||||
"status": "unhealthy",
|
||||
"message": f"Filesystem check failed: {str(e)}",
|
||||
}
|
||||
|
||||
|
||||
def get_system_metrics() -> SystemMetrics:
|
||||
"""Get system resource metrics.
|
||||
|
||||
Returns:
|
||||
SystemMetrics: CPU, memory, disk, and uptime information.
|
||||
"""
|
||||
try:
|
||||
import os
|
||||
import time
|
||||
|
||||
# CPU usage
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
|
||||
# Memory usage
|
||||
memory_info = psutil.virtual_memory()
|
||||
memory_percent = memory_info.percent
|
||||
memory_available_mb = memory_info.available / (1024 * 1024)
|
||||
|
||||
# Disk usage
|
||||
disk_info = psutil.disk_usage("/")
|
||||
disk_percent = disk_info.percent
|
||||
disk_free_mb = disk_info.free / (1024 * 1024)
|
||||
|
||||
# Uptime
|
||||
boot_time = psutil.boot_time()
|
||||
uptime_seconds = time.time() - boot_time
|
||||
|
||||
return SystemMetrics(
|
||||
cpu_percent=cpu_percent,
|
||||
memory_percent=memory_percent,
|
||||
memory_available_mb=memory_available_mb,
|
||||
disk_percent=disk_percent,
|
||||
disk_free_mb=disk_free_mb,
|
||||
uptime_seconds=uptime_seconds,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"System metrics collection failed: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500, detail=f"Failed to collect system metrics: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("", response_model=HealthStatus)
|
||||
async def basic_health_check() -> HealthStatus:
|
||||
"""Basic health check endpoint.
|
||||
|
||||
Returns:
|
||||
HealthStatus: Simple health status with timestamp.
|
||||
"""
|
||||
logger.debug("Basic health check requested")
|
||||
return HealthStatus(
|
||||
status="healthy",
|
||||
timestamp=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/detailed", response_model=DetailedHealthStatus)
|
||||
async def detailed_health_check(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
) -> DetailedHealthStatus:
|
||||
"""Comprehensive health check endpoint.
|
||||
|
||||
Checks database, filesystem, and system metrics.
|
||||
|
||||
Args:
|
||||
db: Database session dependency.
|
||||
|
||||
Returns:
|
||||
DetailedHealthStatus: Comprehensive health information.
|
||||
"""
|
||||
logger.debug("Detailed health check requested")
|
||||
|
||||
try:
|
||||
# Check dependencies
|
||||
database_health = await check_database_health(db)
|
||||
filesystem_health = await check_filesystem_health()
|
||||
system_metrics = get_system_metrics()
|
||||
|
||||
# Determine overall status
|
||||
overall_status = "healthy"
|
||||
if database_health.status != "healthy":
|
||||
overall_status = "degraded"
|
||||
if filesystem_health.get("status") != "healthy":
|
||||
overall_status = "degraded"
|
||||
|
||||
dependencies = DependencyHealth(
|
||||
database=database_health,
|
||||
filesystem=filesystem_health,
|
||||
system=system_metrics,
|
||||
)
|
||||
|
||||
return DetailedHealthStatus(
|
||||
status=overall_status,
|
||||
timestamp=datetime.now().isoformat(),
|
||||
dependencies=dependencies,
|
||||
startup_time=startup_time,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Detailed health check failed: {e}")
|
||||
raise HTTPException(status_code=500, detail="Health check failed")
|
||||
|
||||
|
||||
@router.get("/metrics", response_model=SystemMetrics)
|
||||
async def get_metrics() -> SystemMetrics:
|
||||
"""Get system resource metrics.
|
||||
|
||||
Returns:
|
||||
SystemMetrics: Current CPU, memory, disk, and uptime metrics.
|
||||
"""
|
||||
logger.debug("System metrics requested")
|
||||
return get_system_metrics()
|
||||
|
||||
|
||||
@router.get("/metrics/prometheus")
|
||||
async def get_prometheus_metrics() -> str:
|
||||
"""Get metrics in Prometheus format.
|
||||
|
||||
Returns:
|
||||
str: Prometheus formatted metrics.
|
||||
"""
|
||||
from src.server.utils.metrics import get_metrics_collector
|
||||
|
||||
logger.debug("Prometheus metrics requested")
|
||||
collector = get_metrics_collector()
|
||||
return collector.export_prometheus_format()
|
||||
|
||||
|
||||
@router.get("/metrics/json")
|
||||
async def get_metrics_json() -> Dict[str, Any]:
|
||||
"""Get metrics as JSON.
|
||||
|
||||
Returns:
|
||||
dict: Metrics in JSON format.
|
||||
"""
|
||||
from src.server.utils.metrics import get_metrics_collector
|
||||
|
||||
logger.debug("JSON metrics requested")
|
||||
collector = get_metrics_collector()
|
||||
return collector.export_json()
|
||||
369
src/server/api/maintenance.py
Normal file
369
src/server/api/maintenance.py
Normal file
@@ -0,0 +1,369 @@
|
||||
"""Maintenance and system management API endpoints."""
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from src.server.services.monitoring_service import get_monitoring_service
|
||||
from src.server.utils.dependencies import get_database_session
|
||||
from src.server.utils.system import get_system_utilities
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/api/maintenance", tags=["maintenance"])
|
||||
|
||||
|
||||
def get_system_utils():
|
||||
"""Dependency to get system utilities."""
|
||||
return get_system_utilities()
|
||||
|
||||
|
||||
@router.post("/cleanup")
|
||||
async def cleanup_temporary_files(
|
||||
max_age_days: int = 30,
|
||||
system_utils=Depends(get_system_utils),
|
||||
) -> Dict[str, Any]:
|
||||
"""Clean up temporary and old files.
|
||||
|
||||
Args:
|
||||
max_age_days: Delete files older than this many days.
|
||||
system_utils: System utilities dependency.
|
||||
|
||||
Returns:
|
||||
dict: Cleanup results.
|
||||
"""
|
||||
try:
|
||||
deleted_logs = system_utils.cleanup_directory(
|
||||
"logs", "*.log", max_age_days
|
||||
)
|
||||
deleted_temp = system_utils.cleanup_directory(
|
||||
"Temp", "*", max_age_days
|
||||
)
|
||||
deleted_dirs = system_utils.cleanup_empty_directories("logs")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"deleted_logs": deleted_logs,
|
||||
"deleted_temp_files": deleted_temp,
|
||||
"deleted_empty_dirs": deleted_dirs,
|
||||
"total_deleted": deleted_logs + deleted_temp + deleted_dirs,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Cleanup failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/stats")
|
||||
async def get_maintenance_stats(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
system_utils=Depends(get_system_utils),
|
||||
) -> Dict[str, Any]:
|
||||
"""Get system maintenance statistics.
|
||||
|
||||
Args:
|
||||
db: Database session dependency.
|
||||
system_utils: System utilities dependency.
|
||||
|
||||
Returns:
|
||||
dict: Maintenance statistics.
|
||||
"""
|
||||
try:
|
||||
monitoring = get_monitoring_service()
|
||||
|
||||
# Get disk usage
|
||||
disk_info = system_utils.get_disk_usage("/")
|
||||
|
||||
# Get logs directory size
|
||||
logs_size = system_utils.get_directory_size("logs")
|
||||
data_size = system_utils.get_directory_size("data")
|
||||
temp_size = system_utils.get_directory_size("Temp")
|
||||
|
||||
# Get system info
|
||||
system_info = system_utils.get_system_info()
|
||||
|
||||
# Get queue metrics
|
||||
queue_metrics = await monitoring.get_queue_metrics(db)
|
||||
|
||||
return {
|
||||
"disk": {
|
||||
"total_gb": disk_info.total_bytes / (1024**3),
|
||||
"used_gb": disk_info.used_bytes / (1024**3),
|
||||
"free_gb": disk_info.free_bytes / (1024**3),
|
||||
"percent_used": disk_info.percent_used,
|
||||
},
|
||||
"directories": {
|
||||
"logs_mb": logs_size / (1024 * 1024),
|
||||
"data_mb": data_size / (1024 * 1024),
|
||||
"temp_mb": temp_size / (1024 * 1024),
|
||||
},
|
||||
"system": system_info,
|
||||
"queue": {
|
||||
"total_items": queue_metrics.total_items,
|
||||
"downloaded_gb": queue_metrics.downloaded_bytes / (1024**3),
|
||||
"total_gb": queue_metrics.total_size_bytes / (1024**3),
|
||||
},
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get maintenance stats: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/vacuum")
|
||||
async def vacuum_database(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
) -> Dict[str, Any]:
|
||||
"""Optimize database (vacuum).
|
||||
|
||||
Args:
|
||||
db: Database session dependency.
|
||||
|
||||
Returns:
|
||||
dict: Vacuum result.
|
||||
"""
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
# VACUUM command to optimize database
|
||||
await db.execute(text("VACUUM"))
|
||||
await db.commit()
|
||||
|
||||
logger.info("Database vacuumed successfully")
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Database optimized successfully",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Database vacuum failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/rebuild-index")
|
||||
async def rebuild_database_indexes(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
) -> Dict[str, Any]:
|
||||
"""Rebuild database indexes.
|
||||
|
||||
Note: This is a placeholder as SQLite doesn't have REINDEX
|
||||
for most operations. For production databases, implement
|
||||
specific index rebuilding logic.
|
||||
|
||||
Args:
|
||||
db: Database session dependency.
|
||||
|
||||
Returns:
|
||||
dict: Rebuild result.
|
||||
"""
|
||||
try:
|
||||
from sqlalchemy import text
|
||||
|
||||
# Analyze database for query optimization
|
||||
await db.execute(text("ANALYZE"))
|
||||
await db.commit()
|
||||
|
||||
logger.info("Database indexes analyzed successfully")
|
||||
return {
|
||||
"success": True,
|
||||
"message": "Database indexes analyzed successfully",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Index rebuild failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/prune-logs")
|
||||
async def prune_old_logs(
|
||||
days: int = 7,
|
||||
system_utils=Depends(get_system_utils),
|
||||
) -> Dict[str, Any]:
|
||||
"""Remove log files older than specified days.
|
||||
|
||||
Args:
|
||||
days: Keep logs from last N days.
|
||||
system_utils: System utilities dependency.
|
||||
|
||||
Returns:
|
||||
dict: Pruning results.
|
||||
"""
|
||||
try:
|
||||
deleted = system_utils.cleanup_directory(
|
||||
"logs", "*.log", max_age_days=days
|
||||
)
|
||||
|
||||
logger.info(f"Pruned {deleted} log files")
|
||||
return {
|
||||
"success": True,
|
||||
"deleted_count": deleted,
|
||||
"message": f"Deleted {deleted} log files older than {days} days",
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Log pruning failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/disk-usage")
|
||||
async def get_disk_usage(
|
||||
system_utils=Depends(get_system_utils),
|
||||
) -> Dict[str, Any]:
|
||||
"""Get detailed disk usage information.
|
||||
|
||||
Args:
|
||||
system_utils: System utilities dependency.
|
||||
|
||||
Returns:
|
||||
dict: Disk usage for all partitions.
|
||||
"""
|
||||
try:
|
||||
disk_infos = system_utils.get_all_disk_usage()
|
||||
|
||||
partitions = []
|
||||
for disk_info in disk_infos:
|
||||
partitions.append(
|
||||
{
|
||||
"path": disk_info.path,
|
||||
"total_gb": disk_info.total_bytes / (1024**3),
|
||||
"used_gb": disk_info.used_bytes / (1024**3),
|
||||
"free_gb": disk_info.free_bytes / (1024**3),
|
||||
"percent_used": disk_info.percent_used,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"partitions": partitions,
|
||||
"total_partitions": len(partitions),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get disk usage: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.get("/processes")
|
||||
async def get_running_processes(
|
||||
limit: int = 10,
|
||||
system_utils=Depends(get_system_utils),
|
||||
) -> Dict[str, Any]:
|
||||
"""Get running processes information.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of processes to return.
|
||||
system_utils: System utilities dependency.
|
||||
|
||||
Returns:
|
||||
dict: Running processes information.
|
||||
"""
|
||||
try:
|
||||
processes = system_utils.get_all_processes()
|
||||
|
||||
# Sort by memory usage and get top N
|
||||
sorted_processes = sorted(
|
||||
processes, key=lambda x: x.memory_mb, reverse=True
|
||||
)
|
||||
|
||||
top_processes = []
|
||||
for proc in sorted_processes[:limit]:
|
||||
top_processes.append(
|
||||
{
|
||||
"pid": proc.pid,
|
||||
"name": proc.name,
|
||||
"cpu_percent": round(proc.cpu_percent, 2),
|
||||
"memory_mb": round(proc.memory_mb, 2),
|
||||
"status": proc.status,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"processes": top_processes,
|
||||
"total_processes": len(processes),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get processes: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@router.post("/health-check")
|
||||
async def full_health_check(
|
||||
db: AsyncSession = Depends(get_database_session),
|
||||
system_utils=Depends(get_system_utils),
|
||||
) -> Dict[str, Any]:
|
||||
"""Perform full system health check and generate report.
|
||||
|
||||
Args:
|
||||
db: Database session dependency.
|
||||
system_utils: System utilities dependency.
|
||||
|
||||
Returns:
|
||||
dict: Complete health check report.
|
||||
"""
|
||||
try:
|
||||
monitoring = get_monitoring_service()
|
||||
|
||||
# Check database and filesystem
|
||||
from src.server.api.health import check_database_health
|
||||
from src.server.api.health import check_filesystem_health as check_fs
|
||||
db_health = await check_database_health(db)
|
||||
fs_health = check_fs()
|
||||
|
||||
# Get system metrics
|
||||
system_metrics = monitoring.get_system_metrics()
|
||||
|
||||
# Get error metrics
|
||||
error_metrics = monitoring.get_error_metrics()
|
||||
|
||||
# Get queue metrics
|
||||
queue_metrics = await monitoring.get_queue_metrics(db)
|
||||
|
||||
# Determine overall health
|
||||
issues = []
|
||||
if db_health.status != "healthy":
|
||||
issues.append("Database connectivity issue")
|
||||
if fs_health.get("status") != "healthy":
|
||||
issues.append("Filesystem accessibility issue")
|
||||
if system_metrics.cpu_percent > 80:
|
||||
issues.append(f"High CPU usage: {system_metrics.cpu_percent}%")
|
||||
if system_metrics.memory_percent > 80:
|
||||
issues.append(
|
||||
f"High memory usage: {system_metrics.memory_percent}%"
|
||||
)
|
||||
if error_metrics.error_rate_per_hour > 1.0:
|
||||
issues.append(
|
||||
f"High error rate: "
|
||||
f"{error_metrics.error_rate_per_hour:.2f} errors/hour"
|
||||
)
|
||||
|
||||
overall_health = "healthy"
|
||||
if issues:
|
||||
overall_health = "degraded" if len(issues) < 3 else "unhealthy"
|
||||
|
||||
return {
|
||||
"overall_health": overall_health,
|
||||
"issues": issues,
|
||||
"metrics": {
|
||||
"database": {
|
||||
"status": db_health.status,
|
||||
"connection_time_ms": db_health.connection_time_ms,
|
||||
},
|
||||
"filesystem": fs_health,
|
||||
"system": {
|
||||
"cpu_percent": system_metrics.cpu_percent,
|
||||
"memory_percent": system_metrics.memory_percent,
|
||||
"disk_percent": system_metrics.disk_percent,
|
||||
},
|
||||
"queue": {
|
||||
"total_items": queue_metrics.total_items,
|
||||
"failed_items": queue_metrics.failed_items,
|
||||
"success_rate": round(queue_metrics.success_rate, 2),
|
||||
},
|
||||
"errors": {
|
||||
"errors_24h": error_metrics.errors_24h,
|
||||
"rate_per_hour": round(
|
||||
error_metrics.error_rate_per_hour, 2
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Health check failed: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
Reference in New Issue
Block a user