This commit is contained in:
2025-10-22 09:20:35 +02:00
parent 1c8c18c1ea
commit 9e686017a6
18 changed files with 5177 additions and 0 deletions

270
src/server/api/analytics.py Normal file
View File

@@ -0,0 +1,270 @@
"""Analytics API endpoints for accessing system analytics and reports.
Provides REST API endpoints for querying analytics data including download
statistics, series popularity, storage analysis, and performance reports.
"""
from typing import Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.database.connection import get_db
from src.server.services.analytics_service import get_analytics_service
router = APIRouter(prefix="/api/analytics", tags=["analytics"])
class DownloadStatsResponse(BaseModel):
"""Download statistics response model."""
total_downloads: int
successful_downloads: int
failed_downloads: int
total_bytes_downloaded: int
average_speed_mbps: float
success_rate: float
average_duration_seconds: float
class SeriesPopularityResponse(BaseModel):
"""Series popularity response model."""
series_name: str
download_count: int
total_size_bytes: int
last_download: Optional[str]
success_rate: float
class StorageAnalysisResponse(BaseModel):
"""Storage analysis response model."""
total_storage_bytes: int
used_storage_bytes: int
free_storage_bytes: int
storage_percent_used: float
downloads_directory_size_bytes: int
cache_directory_size_bytes: int
logs_directory_size_bytes: int
class PerformanceReportResponse(BaseModel):
"""Performance report response model."""
period_start: str
period_end: str
downloads_per_hour: float
average_queue_size: float
peak_memory_usage_mb: float
average_cpu_percent: float
uptime_seconds: float
error_rate: float
class SummaryReportResponse(BaseModel):
"""Comprehensive analytics summary response."""
timestamp: str
download_stats: DownloadStatsResponse
series_popularity: list[SeriesPopularityResponse]
storage_analysis: StorageAnalysisResponse
performance_report: PerformanceReportResponse
@router.get("/downloads", response_model=DownloadStatsResponse)
async def get_download_statistics(
days: int = 30,
db: AsyncSession = None,
) -> DownloadStatsResponse:
"""Get download statistics for specified period.
Args:
days: Number of days to analyze (default: 30)
db: Database session
Returns:
Download statistics including success rates and speeds
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
stats = await service.get_download_stats(db, days=days)
return DownloadStatsResponse(
total_downloads=stats.total_downloads,
successful_downloads=stats.successful_downloads,
failed_downloads=stats.failed_downloads,
total_bytes_downloaded=stats.total_bytes_downloaded,
average_speed_mbps=stats.average_speed_mbps,
success_rate=stats.success_rate,
average_duration_seconds=stats.average_duration_seconds,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get download statistics: {str(e)}",
)
@router.get(
"/series-popularity",
response_model=list[SeriesPopularityResponse]
)
async def get_series_popularity(
limit: int = 10,
db: AsyncSession = None,
) -> list[SeriesPopularityResponse]:
"""Get most popular series by download count.
Args:
limit: Maximum number of series (default: 10)
db: Database session
Returns:
List of series sorted by popularity
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
popularity = await service.get_series_popularity(db, limit=limit)
return [
SeriesPopularityResponse(
series_name=p.series_name,
download_count=p.download_count,
total_size_bytes=p.total_size_bytes,
last_download=p.last_download,
success_rate=p.success_rate,
)
for p in popularity
]
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get series popularity: {str(e)}",
)
@router.get(
"/storage",
response_model=StorageAnalysisResponse
)
async def get_storage_analysis() -> StorageAnalysisResponse:
"""Get current storage usage analysis.
Returns:
Storage breakdown including disk and directory usage
"""
try:
service = get_analytics_service()
analysis = service.get_storage_analysis()
return StorageAnalysisResponse(
total_storage_bytes=analysis.total_storage_bytes,
used_storage_bytes=analysis.used_storage_bytes,
free_storage_bytes=analysis.free_storage_bytes,
storage_percent_used=analysis.storage_percent_used,
downloads_directory_size_bytes=(
analysis.downloads_directory_size_bytes
),
cache_directory_size_bytes=(
analysis.cache_directory_size_bytes
),
logs_directory_size_bytes=(
analysis.logs_directory_size_bytes
),
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get storage analysis: {str(e)}",
)
@router.get(
"/performance",
response_model=PerformanceReportResponse
)
async def get_performance_report(
hours: int = 24,
db: AsyncSession = None,
) -> PerformanceReportResponse:
"""Get performance metrics for specified period.
Args:
hours: Number of hours to analyze (default: 24)
db: Database session
Returns:
Performance metrics including speeds and system usage
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
report = await service.get_performance_report(db, hours=hours)
return PerformanceReportResponse(
period_start=report.period_start,
period_end=report.period_end,
downloads_per_hour=report.downloads_per_hour,
average_queue_size=report.average_queue_size,
peak_memory_usage_mb=report.peak_memory_usage_mb,
average_cpu_percent=report.average_cpu_percent,
uptime_seconds=report.uptime_seconds,
error_rate=report.error_rate,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get performance report: {str(e)}",
)
@router.get("/summary", response_model=SummaryReportResponse)
async def get_summary_report(
db: AsyncSession = None,
) -> SummaryReportResponse:
"""Get comprehensive analytics summary.
Args:
db: Database session
Returns:
Complete analytics report with all metrics
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
summary = await service.generate_summary_report(db)
return SummaryReportResponse(
timestamp=summary["timestamp"],
download_stats=DownloadStatsResponse(
**summary["download_stats"]
),
series_popularity=[
SeriesPopularityResponse(**p)
for p in summary["series_popularity"]
],
storage_analysis=StorageAnalysisResponse(
**summary["storage_analysis"]
),
performance_report=PerformanceReportResponse(
**summary["performance_report"]
),
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to generate summary report: {str(e)}",
)

304
src/server/api/backup.py Normal file
View File

@@ -0,0 +1,304 @@
"""Backup management API endpoints."""
import logging
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from src.server.services.backup_service import BackupService, get_backup_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/backup", tags=["backup"])
class BackupCreateRequest(BaseModel):
"""Request to create a backup."""
backup_type: str # 'config', 'database', 'full'
description: Optional[str] = None
class BackupResponse(BaseModel):
"""Response for backup creation."""
success: bool
message: str
backup_name: Optional[str] = None
size_bytes: Optional[int] = None
class BackupListResponse(BaseModel):
"""Response for listing backups."""
backups: List[Dict[str, Any]]
total_count: int
class RestoreRequest(BaseModel):
"""Request to restore from backup."""
backup_name: str
class RestoreResponse(BaseModel):
"""Response for restore operation."""
success: bool
message: str
def get_backup_service_dep() -> BackupService:
"""Dependency to get backup service."""
return get_backup_service()
@router.post("/create", response_model=BackupResponse)
async def create_backup(
request: BackupCreateRequest,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> BackupResponse:
"""Create a new backup.
Args:
request: Backup creation request.
backup_service: Backup service dependency.
Returns:
BackupResponse: Result of backup creation.
"""
try:
backup_info = None
if request.backup_type == "config":
backup_info = backup_service.backup_configuration(
request.description or ""
)
elif request.backup_type == "database":
backup_info = backup_service.backup_database(
request.description or ""
)
elif request.backup_type == "full":
backup_info = backup_service.backup_full(
request.description or ""
)
else:
raise ValueError(f"Invalid backup type: {request.backup_type}")
if backup_info is None:
return BackupResponse(
success=False,
message=f"Failed to create {request.backup_type} backup",
)
return BackupResponse(
success=True,
message=(
f"{request.backup_type.capitalize()} backup created "
"successfully"
),
backup_name=backup_info.name,
size_bytes=backup_info.size_bytes,
)
except Exception as e:
logger.error(f"Failed to create backup: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/list", response_model=BackupListResponse)
async def list_backups(
backup_type: Optional[str] = None,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> BackupListResponse:
"""List available backups.
Args:
backup_type: Optional filter by backup type.
backup_service: Backup service dependency.
Returns:
BackupListResponse: List of available backups.
"""
try:
backups = backup_service.list_backups(backup_type)
return BackupListResponse(backups=backups, total_count=len(backups))
except Exception as e:
logger.error(f"Failed to list backups: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/restore", response_model=RestoreResponse)
async def restore_backup(
request: RestoreRequest,
backup_type: Optional[str] = None,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> RestoreResponse:
"""Restore from a backup.
Args:
request: Restore request.
backup_type: Type of backup to restore.
backup_service: Backup service dependency.
Returns:
RestoreResponse: Result of restore operation.
"""
try:
# Determine backup type from filename if not provided
if backup_type is None:
if "config" in request.backup_name:
backup_type = "config"
elif "database" in request.backup_name:
backup_type = "database"
else:
backup_type = "full"
success = False
if backup_type == "config":
success = backup_service.restore_configuration(
request.backup_name
)
elif backup_type == "database":
success = backup_service.restore_database(request.backup_name)
else:
raise ValueError(f"Cannot restore backup type: {backup_type}")
if not success:
return RestoreResponse(
success=False,
message=f"Failed to restore {backup_type} backup",
)
return RestoreResponse(
success=True,
message=f"{backup_type.capitalize()} backup restored successfully",
)
except Exception as e:
logger.error(f"Failed to restore backup: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{backup_name}", response_model=Dict[str, Any])
async def delete_backup(
backup_name: str,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Delete a backup.
Args:
backup_name: Name of the backup to delete.
backup_service: Backup service dependency.
Returns:
dict: Result of delete operation.
"""
try:
success = backup_service.delete_backup(backup_name)
if not success:
raise HTTPException(status_code=404, detail="Backup not found")
return {"success": True, "message": "Backup deleted successfully"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete backup: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/cleanup", response_model=Dict[str, Any])
async def cleanup_backups(
max_backups: int = 10,
backup_type: Optional[str] = None,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Clean up old backups.
Args:
max_backups: Maximum number of backups to keep.
backup_type: Optional filter by backup type.
backup_service: Backup service dependency.
Returns:
dict: Number of backups deleted.
"""
try:
deleted_count = backup_service.cleanup_old_backups(
max_backups, backup_type
)
return {
"success": True,
"message": "Cleanup completed",
"deleted_count": deleted_count,
}
except Exception as e:
logger.error(f"Failed to cleanup backups: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/export/anime", response_model=Dict[str, Any])
async def export_anime_data(
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Export anime library data.
Args:
backup_service: Backup service dependency.
Returns:
dict: Result of export operation.
"""
try:
output_file = "data/backups/anime_export.json"
success = backup_service.export_anime_data(output_file)
if not success:
raise HTTPException(
status_code=500, detail="Failed to export anime data"
)
return {
"success": True,
"message": "Anime data exported successfully",
"export_file": output_file,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to export anime data: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/import/anime", response_model=Dict[str, Any])
async def import_anime_data(
import_file: str,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Import anime library data.
Args:
import_file: Path to import file.
backup_service: Backup service dependency.
Returns:
dict: Result of import operation.
"""
try:
success = backup_service.import_anime_data(import_file)
if not success:
raise HTTPException(
status_code=400, detail="Failed to import anime data"
)
return {
"success": True,
"message": "Anime data imported successfully",
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to import anime data: {e}")
raise HTTPException(status_code=500, detail=str(e))

266
src/server/api/health.py Normal file
View File

@@ -0,0 +1,266 @@
"""Health check endpoints for system monitoring and status verification."""
import logging
from datetime import datetime
from typing import Any, Dict, Optional
import psutil
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.utils.dependencies import get_database_session
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/health", tags=["health"])
class HealthStatus(BaseModel):
"""Basic health status response."""
status: str
timestamp: str
version: str = "1.0.0"
class DatabaseHealth(BaseModel):
"""Database health status."""
status: str
connection_time_ms: float
message: Optional[str] = None
class SystemMetrics(BaseModel):
"""System resource metrics."""
cpu_percent: float
memory_percent: float
memory_available_mb: float
disk_percent: float
disk_free_mb: float
uptime_seconds: float
class DependencyHealth(BaseModel):
"""Health status of external dependencies."""
database: DatabaseHealth
filesystem: Dict[str, Any]
system: SystemMetrics
class DetailedHealthStatus(BaseModel):
"""Comprehensive health check response."""
status: str
timestamp: str
version: str = "1.0.0"
dependencies: DependencyHealth
startup_time: datetime
# Global startup time
startup_time = datetime.now()
async def check_database_health(db: AsyncSession) -> DatabaseHealth:
"""Check database connection and performance.
Args:
db: Database session dependency.
Returns:
DatabaseHealth: Database status and connection time.
"""
try:
import time
start_time = time.time()
await db.execute(text("SELECT 1"))
connection_time = (time.time() - start_time) * 1000 # Convert to milliseconds
return DatabaseHealth(
status="healthy",
connection_time_ms=connection_time,
message="Database connection successful",
)
except Exception as e:
logger.error(f"Database health check failed: {e}")
return DatabaseHealth(
status="unhealthy",
connection_time_ms=0,
message=f"Database connection failed: {str(e)}",
)
async def check_filesystem_health() -> Dict[str, Any]:
"""Check filesystem availability and permissions.
Returns:
dict: Filesystem status and available space.
"""
try:
import os
data_dir = "data"
logs_dir = "logs"
data_accessible = os.path.exists(data_dir) and os.access(data_dir, os.W_OK)
logs_accessible = os.path.exists(logs_dir) and os.access(logs_dir, os.W_OK)
return {
"status": "healthy" if (data_accessible and logs_accessible) else "degraded",
"data_dir_writable": data_accessible,
"logs_dir_writable": logs_accessible,
"message": "Filesystem check completed",
}
except Exception as e:
logger.error(f"Filesystem health check failed: {e}")
return {
"status": "unhealthy",
"message": f"Filesystem check failed: {str(e)}",
}
def get_system_metrics() -> SystemMetrics:
"""Get system resource metrics.
Returns:
SystemMetrics: CPU, memory, disk, and uptime information.
"""
try:
import os
import time
# CPU usage
cpu_percent = psutil.cpu_percent(interval=1)
# Memory usage
memory_info = psutil.virtual_memory()
memory_percent = memory_info.percent
memory_available_mb = memory_info.available / (1024 * 1024)
# Disk usage
disk_info = psutil.disk_usage("/")
disk_percent = disk_info.percent
disk_free_mb = disk_info.free / (1024 * 1024)
# Uptime
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
return SystemMetrics(
cpu_percent=cpu_percent,
memory_percent=memory_percent,
memory_available_mb=memory_available_mb,
disk_percent=disk_percent,
disk_free_mb=disk_free_mb,
uptime_seconds=uptime_seconds,
)
except Exception as e:
logger.error(f"System metrics collection failed: {e}")
raise HTTPException(
status_code=500, detail=f"Failed to collect system metrics: {str(e)}"
)
@router.get("", response_model=HealthStatus)
async def basic_health_check() -> HealthStatus:
"""Basic health check endpoint.
Returns:
HealthStatus: Simple health status with timestamp.
"""
logger.debug("Basic health check requested")
return HealthStatus(
status="healthy",
timestamp=datetime.now().isoformat(),
)
@router.get("/detailed", response_model=DetailedHealthStatus)
async def detailed_health_check(
db: AsyncSession = Depends(get_database_session),
) -> DetailedHealthStatus:
"""Comprehensive health check endpoint.
Checks database, filesystem, and system metrics.
Args:
db: Database session dependency.
Returns:
DetailedHealthStatus: Comprehensive health information.
"""
logger.debug("Detailed health check requested")
try:
# Check dependencies
database_health = await check_database_health(db)
filesystem_health = await check_filesystem_health()
system_metrics = get_system_metrics()
# Determine overall status
overall_status = "healthy"
if database_health.status != "healthy":
overall_status = "degraded"
if filesystem_health.get("status") != "healthy":
overall_status = "degraded"
dependencies = DependencyHealth(
database=database_health,
filesystem=filesystem_health,
system=system_metrics,
)
return DetailedHealthStatus(
status=overall_status,
timestamp=datetime.now().isoformat(),
dependencies=dependencies,
startup_time=startup_time,
)
except Exception as e:
logger.error(f"Detailed health check failed: {e}")
raise HTTPException(status_code=500, detail="Health check failed")
@router.get("/metrics", response_model=SystemMetrics)
async def get_metrics() -> SystemMetrics:
"""Get system resource metrics.
Returns:
SystemMetrics: Current CPU, memory, disk, and uptime metrics.
"""
logger.debug("System metrics requested")
return get_system_metrics()
@router.get("/metrics/prometheus")
async def get_prometheus_metrics() -> str:
"""Get metrics in Prometheus format.
Returns:
str: Prometheus formatted metrics.
"""
from src.server.utils.metrics import get_metrics_collector
logger.debug("Prometheus metrics requested")
collector = get_metrics_collector()
return collector.export_prometheus_format()
@router.get("/metrics/json")
async def get_metrics_json() -> Dict[str, Any]:
"""Get metrics as JSON.
Returns:
dict: Metrics in JSON format.
"""
from src.server.utils.metrics import get_metrics_collector
logger.debug("JSON metrics requested")
collector = get_metrics_collector()
return collector.export_json()

View File

@@ -0,0 +1,369 @@
"""Maintenance and system management API endpoints."""
import logging
from typing import Any, Dict
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.services.monitoring_service import get_monitoring_service
from src.server.utils.dependencies import get_database_session
from src.server.utils.system import get_system_utilities
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/maintenance", tags=["maintenance"])
def get_system_utils():
"""Dependency to get system utilities."""
return get_system_utilities()
@router.post("/cleanup")
async def cleanup_temporary_files(
max_age_days: int = 30,
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Clean up temporary and old files.
Args:
max_age_days: Delete files older than this many days.
system_utils: System utilities dependency.
Returns:
dict: Cleanup results.
"""
try:
deleted_logs = system_utils.cleanup_directory(
"logs", "*.log", max_age_days
)
deleted_temp = system_utils.cleanup_directory(
"Temp", "*", max_age_days
)
deleted_dirs = system_utils.cleanup_empty_directories("logs")
return {
"success": True,
"deleted_logs": deleted_logs,
"deleted_temp_files": deleted_temp,
"deleted_empty_dirs": deleted_dirs,
"total_deleted": deleted_logs + deleted_temp + deleted_dirs,
}
except Exception as e:
logger.error(f"Cleanup failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/stats")
async def get_maintenance_stats(
db: AsyncSession = Depends(get_database_session),
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Get system maintenance statistics.
Args:
db: Database session dependency.
system_utils: System utilities dependency.
Returns:
dict: Maintenance statistics.
"""
try:
monitoring = get_monitoring_service()
# Get disk usage
disk_info = system_utils.get_disk_usage("/")
# Get logs directory size
logs_size = system_utils.get_directory_size("logs")
data_size = system_utils.get_directory_size("data")
temp_size = system_utils.get_directory_size("Temp")
# Get system info
system_info = system_utils.get_system_info()
# Get queue metrics
queue_metrics = await monitoring.get_queue_metrics(db)
return {
"disk": {
"total_gb": disk_info.total_bytes / (1024**3),
"used_gb": disk_info.used_bytes / (1024**3),
"free_gb": disk_info.free_bytes / (1024**3),
"percent_used": disk_info.percent_used,
},
"directories": {
"logs_mb": logs_size / (1024 * 1024),
"data_mb": data_size / (1024 * 1024),
"temp_mb": temp_size / (1024 * 1024),
},
"system": system_info,
"queue": {
"total_items": queue_metrics.total_items,
"downloaded_gb": queue_metrics.downloaded_bytes / (1024**3),
"total_gb": queue_metrics.total_size_bytes / (1024**3),
},
}
except Exception as e:
logger.error(f"Failed to get maintenance stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/vacuum")
async def vacuum_database(
db: AsyncSession = Depends(get_database_session),
) -> Dict[str, Any]:
"""Optimize database (vacuum).
Args:
db: Database session dependency.
Returns:
dict: Vacuum result.
"""
try:
from sqlalchemy import text
# VACUUM command to optimize database
await db.execute(text("VACUUM"))
await db.commit()
logger.info("Database vacuumed successfully")
return {
"success": True,
"message": "Database optimized successfully",
}
except Exception as e:
logger.error(f"Database vacuum failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/rebuild-index")
async def rebuild_database_indexes(
db: AsyncSession = Depends(get_database_session),
) -> Dict[str, Any]:
"""Rebuild database indexes.
Note: This is a placeholder as SQLite doesn't have REINDEX
for most operations. For production databases, implement
specific index rebuilding logic.
Args:
db: Database session dependency.
Returns:
dict: Rebuild result.
"""
try:
from sqlalchemy import text
# Analyze database for query optimization
await db.execute(text("ANALYZE"))
await db.commit()
logger.info("Database indexes analyzed successfully")
return {
"success": True,
"message": "Database indexes analyzed successfully",
}
except Exception as e:
logger.error(f"Index rebuild failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/prune-logs")
async def prune_old_logs(
days: int = 7,
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Remove log files older than specified days.
Args:
days: Keep logs from last N days.
system_utils: System utilities dependency.
Returns:
dict: Pruning results.
"""
try:
deleted = system_utils.cleanup_directory(
"logs", "*.log", max_age_days=days
)
logger.info(f"Pruned {deleted} log files")
return {
"success": True,
"deleted_count": deleted,
"message": f"Deleted {deleted} log files older than {days} days",
}
except Exception as e:
logger.error(f"Log pruning failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/disk-usage")
async def get_disk_usage(
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Get detailed disk usage information.
Args:
system_utils: System utilities dependency.
Returns:
dict: Disk usage for all partitions.
"""
try:
disk_infos = system_utils.get_all_disk_usage()
partitions = []
for disk_info in disk_infos:
partitions.append(
{
"path": disk_info.path,
"total_gb": disk_info.total_bytes / (1024**3),
"used_gb": disk_info.used_bytes / (1024**3),
"free_gb": disk_info.free_bytes / (1024**3),
"percent_used": disk_info.percent_used,
}
)
return {
"success": True,
"partitions": partitions,
"total_partitions": len(partitions),
}
except Exception as e:
logger.error(f"Failed to get disk usage: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/processes")
async def get_running_processes(
limit: int = 10,
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Get running processes information.
Args:
limit: Maximum number of processes to return.
system_utils: System utilities dependency.
Returns:
dict: Running processes information.
"""
try:
processes = system_utils.get_all_processes()
# Sort by memory usage and get top N
sorted_processes = sorted(
processes, key=lambda x: x.memory_mb, reverse=True
)
top_processes = []
for proc in sorted_processes[:limit]:
top_processes.append(
{
"pid": proc.pid,
"name": proc.name,
"cpu_percent": round(proc.cpu_percent, 2),
"memory_mb": round(proc.memory_mb, 2),
"status": proc.status,
}
)
return {
"success": True,
"processes": top_processes,
"total_processes": len(processes),
}
except Exception as e:
logger.error(f"Failed to get processes: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/health-check")
async def full_health_check(
db: AsyncSession = Depends(get_database_session),
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Perform full system health check and generate report.
Args:
db: Database session dependency.
system_utils: System utilities dependency.
Returns:
dict: Complete health check report.
"""
try:
monitoring = get_monitoring_service()
# Check database and filesystem
from src.server.api.health import check_database_health
from src.server.api.health import check_filesystem_health as check_fs
db_health = await check_database_health(db)
fs_health = check_fs()
# Get system metrics
system_metrics = monitoring.get_system_metrics()
# Get error metrics
error_metrics = monitoring.get_error_metrics()
# Get queue metrics
queue_metrics = await monitoring.get_queue_metrics(db)
# Determine overall health
issues = []
if db_health.status != "healthy":
issues.append("Database connectivity issue")
if fs_health.get("status") != "healthy":
issues.append("Filesystem accessibility issue")
if system_metrics.cpu_percent > 80:
issues.append(f"High CPU usage: {system_metrics.cpu_percent}%")
if system_metrics.memory_percent > 80:
issues.append(
f"High memory usage: {system_metrics.memory_percent}%"
)
if error_metrics.error_rate_per_hour > 1.0:
issues.append(
f"High error rate: "
f"{error_metrics.error_rate_per_hour:.2f} errors/hour"
)
overall_health = "healthy"
if issues:
overall_health = "degraded" if len(issues) < 3 else "unhealthy"
return {
"overall_health": overall_health,
"issues": issues,
"metrics": {
"database": {
"status": db_health.status,
"connection_time_ms": db_health.connection_time_ms,
},
"filesystem": fs_health,
"system": {
"cpu_percent": system_metrics.cpu_percent,
"memory_percent": system_metrics.memory_percent,
"disk_percent": system_metrics.disk_percent,
},
"queue": {
"total_items": queue_metrics.total_items,
"failed_items": queue_metrics.failed_items,
"success_rate": round(queue_metrics.success_rate, 2),
},
"errors": {
"errors_24h": error_metrics.errors_24h,
"rate_per_hour": round(
error_metrics.error_rate_per_hour, 2
),
},
},
}
except Exception as e:
logger.error(f"Health check failed: {e}")
raise HTTPException(status_code=500, detail=str(e))