This commit is contained in:
2025-10-22 09:20:35 +02:00
parent 1c8c18c1ea
commit 9e686017a6
18 changed files with 5177 additions and 0 deletions

270
src/server/api/analytics.py Normal file
View File

@@ -0,0 +1,270 @@
"""Analytics API endpoints for accessing system analytics and reports.
Provides REST API endpoints for querying analytics data including download
statistics, series popularity, storage analysis, and performance reports.
"""
from typing import Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.database.connection import get_db
from src.server.services.analytics_service import get_analytics_service
router = APIRouter(prefix="/api/analytics", tags=["analytics"])
class DownloadStatsResponse(BaseModel):
"""Download statistics response model."""
total_downloads: int
successful_downloads: int
failed_downloads: int
total_bytes_downloaded: int
average_speed_mbps: float
success_rate: float
average_duration_seconds: float
class SeriesPopularityResponse(BaseModel):
"""Series popularity response model."""
series_name: str
download_count: int
total_size_bytes: int
last_download: Optional[str]
success_rate: float
class StorageAnalysisResponse(BaseModel):
"""Storage analysis response model."""
total_storage_bytes: int
used_storage_bytes: int
free_storage_bytes: int
storage_percent_used: float
downloads_directory_size_bytes: int
cache_directory_size_bytes: int
logs_directory_size_bytes: int
class PerformanceReportResponse(BaseModel):
"""Performance report response model."""
period_start: str
period_end: str
downloads_per_hour: float
average_queue_size: float
peak_memory_usage_mb: float
average_cpu_percent: float
uptime_seconds: float
error_rate: float
class SummaryReportResponse(BaseModel):
"""Comprehensive analytics summary response."""
timestamp: str
download_stats: DownloadStatsResponse
series_popularity: list[SeriesPopularityResponse]
storage_analysis: StorageAnalysisResponse
performance_report: PerformanceReportResponse
@router.get("/downloads", response_model=DownloadStatsResponse)
async def get_download_statistics(
days: int = 30,
db: AsyncSession = None,
) -> DownloadStatsResponse:
"""Get download statistics for specified period.
Args:
days: Number of days to analyze (default: 30)
db: Database session
Returns:
Download statistics including success rates and speeds
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
stats = await service.get_download_stats(db, days=days)
return DownloadStatsResponse(
total_downloads=stats.total_downloads,
successful_downloads=stats.successful_downloads,
failed_downloads=stats.failed_downloads,
total_bytes_downloaded=stats.total_bytes_downloaded,
average_speed_mbps=stats.average_speed_mbps,
success_rate=stats.success_rate,
average_duration_seconds=stats.average_duration_seconds,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get download statistics: {str(e)}",
)
@router.get(
"/series-popularity",
response_model=list[SeriesPopularityResponse]
)
async def get_series_popularity(
limit: int = 10,
db: AsyncSession = None,
) -> list[SeriesPopularityResponse]:
"""Get most popular series by download count.
Args:
limit: Maximum number of series (default: 10)
db: Database session
Returns:
List of series sorted by popularity
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
popularity = await service.get_series_popularity(db, limit=limit)
return [
SeriesPopularityResponse(
series_name=p.series_name,
download_count=p.download_count,
total_size_bytes=p.total_size_bytes,
last_download=p.last_download,
success_rate=p.success_rate,
)
for p in popularity
]
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get series popularity: {str(e)}",
)
@router.get(
"/storage",
response_model=StorageAnalysisResponse
)
async def get_storage_analysis() -> StorageAnalysisResponse:
"""Get current storage usage analysis.
Returns:
Storage breakdown including disk and directory usage
"""
try:
service = get_analytics_service()
analysis = service.get_storage_analysis()
return StorageAnalysisResponse(
total_storage_bytes=analysis.total_storage_bytes,
used_storage_bytes=analysis.used_storage_bytes,
free_storage_bytes=analysis.free_storage_bytes,
storage_percent_used=analysis.storage_percent_used,
downloads_directory_size_bytes=(
analysis.downloads_directory_size_bytes
),
cache_directory_size_bytes=(
analysis.cache_directory_size_bytes
),
logs_directory_size_bytes=(
analysis.logs_directory_size_bytes
),
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get storage analysis: {str(e)}",
)
@router.get(
"/performance",
response_model=PerformanceReportResponse
)
async def get_performance_report(
hours: int = 24,
db: AsyncSession = None,
) -> PerformanceReportResponse:
"""Get performance metrics for specified period.
Args:
hours: Number of hours to analyze (default: 24)
db: Database session
Returns:
Performance metrics including speeds and system usage
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
report = await service.get_performance_report(db, hours=hours)
return PerformanceReportResponse(
period_start=report.period_start,
period_end=report.period_end,
downloads_per_hour=report.downloads_per_hour,
average_queue_size=report.average_queue_size,
peak_memory_usage_mb=report.peak_memory_usage_mb,
average_cpu_percent=report.average_cpu_percent,
uptime_seconds=report.uptime_seconds,
error_rate=report.error_rate,
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to get performance report: {str(e)}",
)
@router.get("/summary", response_model=SummaryReportResponse)
async def get_summary_report(
db: AsyncSession = None,
) -> SummaryReportResponse:
"""Get comprehensive analytics summary.
Args:
db: Database session
Returns:
Complete analytics report with all metrics
"""
if db is None:
db = await get_db().__anext__()
try:
service = get_analytics_service()
summary = await service.generate_summary_report(db)
return SummaryReportResponse(
timestamp=summary["timestamp"],
download_stats=DownloadStatsResponse(
**summary["download_stats"]
),
series_popularity=[
SeriesPopularityResponse(**p)
for p in summary["series_popularity"]
],
storage_analysis=StorageAnalysisResponse(
**summary["storage_analysis"]
),
performance_report=PerformanceReportResponse(
**summary["performance_report"]
),
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to generate summary report: {str(e)}",
)

304
src/server/api/backup.py Normal file
View File

@@ -0,0 +1,304 @@
"""Backup management API endpoints."""
import logging
from typing import Any, Dict, List, Optional
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from src.server.services.backup_service import BackupService, get_backup_service
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/backup", tags=["backup"])
class BackupCreateRequest(BaseModel):
"""Request to create a backup."""
backup_type: str # 'config', 'database', 'full'
description: Optional[str] = None
class BackupResponse(BaseModel):
"""Response for backup creation."""
success: bool
message: str
backup_name: Optional[str] = None
size_bytes: Optional[int] = None
class BackupListResponse(BaseModel):
"""Response for listing backups."""
backups: List[Dict[str, Any]]
total_count: int
class RestoreRequest(BaseModel):
"""Request to restore from backup."""
backup_name: str
class RestoreResponse(BaseModel):
"""Response for restore operation."""
success: bool
message: str
def get_backup_service_dep() -> BackupService:
"""Dependency to get backup service."""
return get_backup_service()
@router.post("/create", response_model=BackupResponse)
async def create_backup(
request: BackupCreateRequest,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> BackupResponse:
"""Create a new backup.
Args:
request: Backup creation request.
backup_service: Backup service dependency.
Returns:
BackupResponse: Result of backup creation.
"""
try:
backup_info = None
if request.backup_type == "config":
backup_info = backup_service.backup_configuration(
request.description or ""
)
elif request.backup_type == "database":
backup_info = backup_service.backup_database(
request.description or ""
)
elif request.backup_type == "full":
backup_info = backup_service.backup_full(
request.description or ""
)
else:
raise ValueError(f"Invalid backup type: {request.backup_type}")
if backup_info is None:
return BackupResponse(
success=False,
message=f"Failed to create {request.backup_type} backup",
)
return BackupResponse(
success=True,
message=(
f"{request.backup_type.capitalize()} backup created "
"successfully"
),
backup_name=backup_info.name,
size_bytes=backup_info.size_bytes,
)
except Exception as e:
logger.error(f"Failed to create backup: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/list", response_model=BackupListResponse)
async def list_backups(
backup_type: Optional[str] = None,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> BackupListResponse:
"""List available backups.
Args:
backup_type: Optional filter by backup type.
backup_service: Backup service dependency.
Returns:
BackupListResponse: List of available backups.
"""
try:
backups = backup_service.list_backups(backup_type)
return BackupListResponse(backups=backups, total_count=len(backups))
except Exception as e:
logger.error(f"Failed to list backups: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/restore", response_model=RestoreResponse)
async def restore_backup(
request: RestoreRequest,
backup_type: Optional[str] = None,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> RestoreResponse:
"""Restore from a backup.
Args:
request: Restore request.
backup_type: Type of backup to restore.
backup_service: Backup service dependency.
Returns:
RestoreResponse: Result of restore operation.
"""
try:
# Determine backup type from filename if not provided
if backup_type is None:
if "config" in request.backup_name:
backup_type = "config"
elif "database" in request.backup_name:
backup_type = "database"
else:
backup_type = "full"
success = False
if backup_type == "config":
success = backup_service.restore_configuration(
request.backup_name
)
elif backup_type == "database":
success = backup_service.restore_database(request.backup_name)
else:
raise ValueError(f"Cannot restore backup type: {backup_type}")
if not success:
return RestoreResponse(
success=False,
message=f"Failed to restore {backup_type} backup",
)
return RestoreResponse(
success=True,
message=f"{backup_type.capitalize()} backup restored successfully",
)
except Exception as e:
logger.error(f"Failed to restore backup: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{backup_name}", response_model=Dict[str, Any])
async def delete_backup(
backup_name: str,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Delete a backup.
Args:
backup_name: Name of the backup to delete.
backup_service: Backup service dependency.
Returns:
dict: Result of delete operation.
"""
try:
success = backup_service.delete_backup(backup_name)
if not success:
raise HTTPException(status_code=404, detail="Backup not found")
return {"success": True, "message": "Backup deleted successfully"}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to delete backup: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/cleanup", response_model=Dict[str, Any])
async def cleanup_backups(
max_backups: int = 10,
backup_type: Optional[str] = None,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Clean up old backups.
Args:
max_backups: Maximum number of backups to keep.
backup_type: Optional filter by backup type.
backup_service: Backup service dependency.
Returns:
dict: Number of backups deleted.
"""
try:
deleted_count = backup_service.cleanup_old_backups(
max_backups, backup_type
)
return {
"success": True,
"message": "Cleanup completed",
"deleted_count": deleted_count,
}
except Exception as e:
logger.error(f"Failed to cleanup backups: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/export/anime", response_model=Dict[str, Any])
async def export_anime_data(
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Export anime library data.
Args:
backup_service: Backup service dependency.
Returns:
dict: Result of export operation.
"""
try:
output_file = "data/backups/anime_export.json"
success = backup_service.export_anime_data(output_file)
if not success:
raise HTTPException(
status_code=500, detail="Failed to export anime data"
)
return {
"success": True,
"message": "Anime data exported successfully",
"export_file": output_file,
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to export anime data: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/import/anime", response_model=Dict[str, Any])
async def import_anime_data(
import_file: str,
backup_service: BackupService = Depends(get_backup_service_dep),
) -> Dict[str, Any]:
"""Import anime library data.
Args:
import_file: Path to import file.
backup_service: Backup service dependency.
Returns:
dict: Result of import operation.
"""
try:
success = backup_service.import_anime_data(import_file)
if not success:
raise HTTPException(
status_code=400, detail="Failed to import anime data"
)
return {
"success": True,
"message": "Anime data imported successfully",
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Failed to import anime data: {e}")
raise HTTPException(status_code=500, detail=str(e))

266
src/server/api/health.py Normal file
View File

@@ -0,0 +1,266 @@
"""Health check endpoints for system monitoring and status verification."""
import logging
from datetime import datetime
from typing import Any, Dict, Optional
import psutil
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.utils.dependencies import get_database_session
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/health", tags=["health"])
class HealthStatus(BaseModel):
"""Basic health status response."""
status: str
timestamp: str
version: str = "1.0.0"
class DatabaseHealth(BaseModel):
"""Database health status."""
status: str
connection_time_ms: float
message: Optional[str] = None
class SystemMetrics(BaseModel):
"""System resource metrics."""
cpu_percent: float
memory_percent: float
memory_available_mb: float
disk_percent: float
disk_free_mb: float
uptime_seconds: float
class DependencyHealth(BaseModel):
"""Health status of external dependencies."""
database: DatabaseHealth
filesystem: Dict[str, Any]
system: SystemMetrics
class DetailedHealthStatus(BaseModel):
"""Comprehensive health check response."""
status: str
timestamp: str
version: str = "1.0.0"
dependencies: DependencyHealth
startup_time: datetime
# Global startup time
startup_time = datetime.now()
async def check_database_health(db: AsyncSession) -> DatabaseHealth:
"""Check database connection and performance.
Args:
db: Database session dependency.
Returns:
DatabaseHealth: Database status and connection time.
"""
try:
import time
start_time = time.time()
await db.execute(text("SELECT 1"))
connection_time = (time.time() - start_time) * 1000 # Convert to milliseconds
return DatabaseHealth(
status="healthy",
connection_time_ms=connection_time,
message="Database connection successful",
)
except Exception as e:
logger.error(f"Database health check failed: {e}")
return DatabaseHealth(
status="unhealthy",
connection_time_ms=0,
message=f"Database connection failed: {str(e)}",
)
async def check_filesystem_health() -> Dict[str, Any]:
"""Check filesystem availability and permissions.
Returns:
dict: Filesystem status and available space.
"""
try:
import os
data_dir = "data"
logs_dir = "logs"
data_accessible = os.path.exists(data_dir) and os.access(data_dir, os.W_OK)
logs_accessible = os.path.exists(logs_dir) and os.access(logs_dir, os.W_OK)
return {
"status": "healthy" if (data_accessible and logs_accessible) else "degraded",
"data_dir_writable": data_accessible,
"logs_dir_writable": logs_accessible,
"message": "Filesystem check completed",
}
except Exception as e:
logger.error(f"Filesystem health check failed: {e}")
return {
"status": "unhealthy",
"message": f"Filesystem check failed: {str(e)}",
}
def get_system_metrics() -> SystemMetrics:
"""Get system resource metrics.
Returns:
SystemMetrics: CPU, memory, disk, and uptime information.
"""
try:
import os
import time
# CPU usage
cpu_percent = psutil.cpu_percent(interval=1)
# Memory usage
memory_info = psutil.virtual_memory()
memory_percent = memory_info.percent
memory_available_mb = memory_info.available / (1024 * 1024)
# Disk usage
disk_info = psutil.disk_usage("/")
disk_percent = disk_info.percent
disk_free_mb = disk_info.free / (1024 * 1024)
# Uptime
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
return SystemMetrics(
cpu_percent=cpu_percent,
memory_percent=memory_percent,
memory_available_mb=memory_available_mb,
disk_percent=disk_percent,
disk_free_mb=disk_free_mb,
uptime_seconds=uptime_seconds,
)
except Exception as e:
logger.error(f"System metrics collection failed: {e}")
raise HTTPException(
status_code=500, detail=f"Failed to collect system metrics: {str(e)}"
)
@router.get("", response_model=HealthStatus)
async def basic_health_check() -> HealthStatus:
"""Basic health check endpoint.
Returns:
HealthStatus: Simple health status with timestamp.
"""
logger.debug("Basic health check requested")
return HealthStatus(
status="healthy",
timestamp=datetime.now().isoformat(),
)
@router.get("/detailed", response_model=DetailedHealthStatus)
async def detailed_health_check(
db: AsyncSession = Depends(get_database_session),
) -> DetailedHealthStatus:
"""Comprehensive health check endpoint.
Checks database, filesystem, and system metrics.
Args:
db: Database session dependency.
Returns:
DetailedHealthStatus: Comprehensive health information.
"""
logger.debug("Detailed health check requested")
try:
# Check dependencies
database_health = await check_database_health(db)
filesystem_health = await check_filesystem_health()
system_metrics = get_system_metrics()
# Determine overall status
overall_status = "healthy"
if database_health.status != "healthy":
overall_status = "degraded"
if filesystem_health.get("status") != "healthy":
overall_status = "degraded"
dependencies = DependencyHealth(
database=database_health,
filesystem=filesystem_health,
system=system_metrics,
)
return DetailedHealthStatus(
status=overall_status,
timestamp=datetime.now().isoformat(),
dependencies=dependencies,
startup_time=startup_time,
)
except Exception as e:
logger.error(f"Detailed health check failed: {e}")
raise HTTPException(status_code=500, detail="Health check failed")
@router.get("/metrics", response_model=SystemMetrics)
async def get_metrics() -> SystemMetrics:
"""Get system resource metrics.
Returns:
SystemMetrics: Current CPU, memory, disk, and uptime metrics.
"""
logger.debug("System metrics requested")
return get_system_metrics()
@router.get("/metrics/prometheus")
async def get_prometheus_metrics() -> str:
"""Get metrics in Prometheus format.
Returns:
str: Prometheus formatted metrics.
"""
from src.server.utils.metrics import get_metrics_collector
logger.debug("Prometheus metrics requested")
collector = get_metrics_collector()
return collector.export_prometheus_format()
@router.get("/metrics/json")
async def get_metrics_json() -> Dict[str, Any]:
"""Get metrics as JSON.
Returns:
dict: Metrics in JSON format.
"""
from src.server.utils.metrics import get_metrics_collector
logger.debug("JSON metrics requested")
collector = get_metrics_collector()
return collector.export_json()

View File

@@ -0,0 +1,369 @@
"""Maintenance and system management API endpoints."""
import logging
from typing import Any, Dict
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.services.monitoring_service import get_monitoring_service
from src.server.utils.dependencies import get_database_session
from src.server.utils.system import get_system_utilities
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/maintenance", tags=["maintenance"])
def get_system_utils():
"""Dependency to get system utilities."""
return get_system_utilities()
@router.post("/cleanup")
async def cleanup_temporary_files(
max_age_days: int = 30,
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Clean up temporary and old files.
Args:
max_age_days: Delete files older than this many days.
system_utils: System utilities dependency.
Returns:
dict: Cleanup results.
"""
try:
deleted_logs = system_utils.cleanup_directory(
"logs", "*.log", max_age_days
)
deleted_temp = system_utils.cleanup_directory(
"Temp", "*", max_age_days
)
deleted_dirs = system_utils.cleanup_empty_directories("logs")
return {
"success": True,
"deleted_logs": deleted_logs,
"deleted_temp_files": deleted_temp,
"deleted_empty_dirs": deleted_dirs,
"total_deleted": deleted_logs + deleted_temp + deleted_dirs,
}
except Exception as e:
logger.error(f"Cleanup failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/stats")
async def get_maintenance_stats(
db: AsyncSession = Depends(get_database_session),
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Get system maintenance statistics.
Args:
db: Database session dependency.
system_utils: System utilities dependency.
Returns:
dict: Maintenance statistics.
"""
try:
monitoring = get_monitoring_service()
# Get disk usage
disk_info = system_utils.get_disk_usage("/")
# Get logs directory size
logs_size = system_utils.get_directory_size("logs")
data_size = system_utils.get_directory_size("data")
temp_size = system_utils.get_directory_size("Temp")
# Get system info
system_info = system_utils.get_system_info()
# Get queue metrics
queue_metrics = await monitoring.get_queue_metrics(db)
return {
"disk": {
"total_gb": disk_info.total_bytes / (1024**3),
"used_gb": disk_info.used_bytes / (1024**3),
"free_gb": disk_info.free_bytes / (1024**3),
"percent_used": disk_info.percent_used,
},
"directories": {
"logs_mb": logs_size / (1024 * 1024),
"data_mb": data_size / (1024 * 1024),
"temp_mb": temp_size / (1024 * 1024),
},
"system": system_info,
"queue": {
"total_items": queue_metrics.total_items,
"downloaded_gb": queue_metrics.downloaded_bytes / (1024**3),
"total_gb": queue_metrics.total_size_bytes / (1024**3),
},
}
except Exception as e:
logger.error(f"Failed to get maintenance stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/vacuum")
async def vacuum_database(
db: AsyncSession = Depends(get_database_session),
) -> Dict[str, Any]:
"""Optimize database (vacuum).
Args:
db: Database session dependency.
Returns:
dict: Vacuum result.
"""
try:
from sqlalchemy import text
# VACUUM command to optimize database
await db.execute(text("VACUUM"))
await db.commit()
logger.info("Database vacuumed successfully")
return {
"success": True,
"message": "Database optimized successfully",
}
except Exception as e:
logger.error(f"Database vacuum failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/rebuild-index")
async def rebuild_database_indexes(
db: AsyncSession = Depends(get_database_session),
) -> Dict[str, Any]:
"""Rebuild database indexes.
Note: This is a placeholder as SQLite doesn't have REINDEX
for most operations. For production databases, implement
specific index rebuilding logic.
Args:
db: Database session dependency.
Returns:
dict: Rebuild result.
"""
try:
from sqlalchemy import text
# Analyze database for query optimization
await db.execute(text("ANALYZE"))
await db.commit()
logger.info("Database indexes analyzed successfully")
return {
"success": True,
"message": "Database indexes analyzed successfully",
}
except Exception as e:
logger.error(f"Index rebuild failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/prune-logs")
async def prune_old_logs(
days: int = 7,
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Remove log files older than specified days.
Args:
days: Keep logs from last N days.
system_utils: System utilities dependency.
Returns:
dict: Pruning results.
"""
try:
deleted = system_utils.cleanup_directory(
"logs", "*.log", max_age_days=days
)
logger.info(f"Pruned {deleted} log files")
return {
"success": True,
"deleted_count": deleted,
"message": f"Deleted {deleted} log files older than {days} days",
}
except Exception as e:
logger.error(f"Log pruning failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/disk-usage")
async def get_disk_usage(
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Get detailed disk usage information.
Args:
system_utils: System utilities dependency.
Returns:
dict: Disk usage for all partitions.
"""
try:
disk_infos = system_utils.get_all_disk_usage()
partitions = []
for disk_info in disk_infos:
partitions.append(
{
"path": disk_info.path,
"total_gb": disk_info.total_bytes / (1024**3),
"used_gb": disk_info.used_bytes / (1024**3),
"free_gb": disk_info.free_bytes / (1024**3),
"percent_used": disk_info.percent_used,
}
)
return {
"success": True,
"partitions": partitions,
"total_partitions": len(partitions),
}
except Exception as e:
logger.error(f"Failed to get disk usage: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/processes")
async def get_running_processes(
limit: int = 10,
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Get running processes information.
Args:
limit: Maximum number of processes to return.
system_utils: System utilities dependency.
Returns:
dict: Running processes information.
"""
try:
processes = system_utils.get_all_processes()
# Sort by memory usage and get top N
sorted_processes = sorted(
processes, key=lambda x: x.memory_mb, reverse=True
)
top_processes = []
for proc in sorted_processes[:limit]:
top_processes.append(
{
"pid": proc.pid,
"name": proc.name,
"cpu_percent": round(proc.cpu_percent, 2),
"memory_mb": round(proc.memory_mb, 2),
"status": proc.status,
}
)
return {
"success": True,
"processes": top_processes,
"total_processes": len(processes),
}
except Exception as e:
logger.error(f"Failed to get processes: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/health-check")
async def full_health_check(
db: AsyncSession = Depends(get_database_session),
system_utils=Depends(get_system_utils),
) -> Dict[str, Any]:
"""Perform full system health check and generate report.
Args:
db: Database session dependency.
system_utils: System utilities dependency.
Returns:
dict: Complete health check report.
"""
try:
monitoring = get_monitoring_service()
# Check database and filesystem
from src.server.api.health import check_database_health
from src.server.api.health import check_filesystem_health as check_fs
db_health = await check_database_health(db)
fs_health = check_fs()
# Get system metrics
system_metrics = monitoring.get_system_metrics()
# Get error metrics
error_metrics = monitoring.get_error_metrics()
# Get queue metrics
queue_metrics = await monitoring.get_queue_metrics(db)
# Determine overall health
issues = []
if db_health.status != "healthy":
issues.append("Database connectivity issue")
if fs_health.get("status") != "healthy":
issues.append("Filesystem accessibility issue")
if system_metrics.cpu_percent > 80:
issues.append(f"High CPU usage: {system_metrics.cpu_percent}%")
if system_metrics.memory_percent > 80:
issues.append(
f"High memory usage: {system_metrics.memory_percent}%"
)
if error_metrics.error_rate_per_hour > 1.0:
issues.append(
f"High error rate: "
f"{error_metrics.error_rate_per_hour:.2f} errors/hour"
)
overall_health = "healthy"
if issues:
overall_health = "degraded" if len(issues) < 3 else "unhealthy"
return {
"overall_health": overall_health,
"issues": issues,
"metrics": {
"database": {
"status": db_health.status,
"connection_time_ms": db_health.connection_time_ms,
},
"filesystem": fs_health,
"system": {
"cpu_percent": system_metrics.cpu_percent,
"memory_percent": system_metrics.memory_percent,
"disk_percent": system_metrics.disk_percent,
},
"queue": {
"total_items": queue_metrics.total_items,
"failed_items": queue_metrics.failed_items,
"success_rate": round(queue_metrics.success_rate, 2),
},
"errors": {
"errors_24h": error_metrics.errors_24h,
"rate_per_hour": round(
error_metrics.error_rate_per_hour, 2
),
},
},
}
except Exception as e:
logger.error(f"Health check failed: {e}")
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -0,0 +1,422 @@
"""Analytics service for downloads, popularity, and performance metrics.
This module provides comprehensive analytics tracking including download
statistics, series popularity analysis, storage usage trends, and
performance reporting.
"""
import json
import logging
from dataclasses import asdict, dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional
import psutil
from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.database.models import Download, DownloadStatus
logger = logging.getLogger(__name__)
ANALYTICS_FILE = Path("data") / "analytics.json"
@dataclass
class DownloadStats:
"""Download statistics snapshot."""
total_downloads: int = 0
successful_downloads: int = 0
failed_downloads: int = 0
total_bytes_downloaded: int = 0
average_speed_mbps: float = 0.0
success_rate: float = 0.0
average_duration_seconds: float = 0.0
@dataclass
class SeriesPopularity:
"""Series popularity metrics."""
series_name: str
download_count: int
total_size_bytes: int
last_download: Optional[str] = None
success_rate: float = 0.0
@dataclass
class StorageAnalysis:
"""Storage usage analysis."""
total_storage_bytes: int = 0
used_storage_bytes: int = 0
free_storage_bytes: int = 0
storage_percent_used: float = 0.0
downloads_directory_size_bytes: int = 0
cache_directory_size_bytes: int = 0
logs_directory_size_bytes: int = 0
@dataclass
class PerformanceReport:
"""Performance metrics and trends."""
period_start: str
period_end: str
downloads_per_hour: float = 0.0
average_queue_size: float = 0.0
peak_memory_usage_mb: float = 0.0
average_cpu_percent: float = 0.0
uptime_seconds: float = 0.0
error_rate: float = 0.0
samples: List[Dict[str, Any]] = field(default_factory=list)
class AnalyticsService:
"""Service for tracking and reporting analytics data."""
def __init__(self):
"""Initialize the analytics service."""
self.analytics_file = ANALYTICS_FILE
self._ensure_analytics_file()
def _ensure_analytics_file(self) -> None:
"""Ensure analytics file exists with default data."""
if not self.analytics_file.exists():
default_data = {
"created_at": datetime.now().isoformat(),
"last_updated": datetime.now().isoformat(),
"download_stats": asdict(DownloadStats()),
"series_popularity": [],
"storage_history": [],
"performance_samples": [],
}
self.analytics_file.write_text(json.dumps(default_data, indent=2))
def _load_analytics(self) -> Dict[str, Any]:
"""Load analytics data from file."""
try:
return json.loads(self.analytics_file.read_text())
except (FileNotFoundError, json.JSONDecodeError):
self._ensure_analytics_file()
return json.loads(self.analytics_file.read_text())
def _save_analytics(self, data: Dict[str, Any]) -> None:
"""Save analytics data to file."""
data["last_updated"] = datetime.now().isoformat()
self.analytics_file.write_text(json.dumps(data, indent=2))
async def get_download_stats(
self, db: AsyncSession, days: int = 30
) -> DownloadStats:
"""Get download statistics for the specified period.
Args:
db: Database session
days: Number of days to analyze
Returns:
DownloadStats with aggregated download data
"""
cutoff_date = datetime.now() - timedelta(days=days)
# Query downloads within period
stmt = select(Download).where(
Download.created_at >= cutoff_date
)
result = await db.execute(stmt)
downloads = result.scalars().all()
if not downloads:
return DownloadStats()
successful = [d for d in downloads
if d.status == DownloadStatus.COMPLETED]
failed = [d for d in downloads
if d.status == DownloadStatus.FAILED]
total_bytes = sum(d.size_bytes or 0 for d in successful)
total_seconds = sum(
d.duration_seconds or 0 for d in successful
) or 1
avg_speed = (
(total_bytes / (1024 * 1024)) / total_seconds
if total_seconds > 0
else 0.0
)
success_rate = (
len(successful) / len(downloads) * 100 if downloads else 0.0
)
return DownloadStats(
total_downloads=len(downloads),
successful_downloads=len(successful),
failed_downloads=len(failed),
total_bytes_downloaded=total_bytes,
average_speed_mbps=avg_speed,
success_rate=success_rate,
average_duration_seconds=total_seconds / len(successful)
if successful
else 0.0,
)
async def get_series_popularity(
self, db: AsyncSession, limit: int = 10
) -> List[SeriesPopularity]:
"""Get most popular series by download count.
Args:
db: Database session
limit: Maximum number of series to return
Returns:
List of SeriesPopularity objects
"""
stmt = (
select(
Download.series_name,
func.count(Download.id).label("download_count"),
func.sum(Download.size_bytes).label("total_size"),
func.max(Download.created_at).label("last_download"),
func.countif(
Download.status == DownloadStatus.COMPLETED
).label("successful"),
)
.group_by(Download.series_name)
.order_by(func.count(Download.id).desc())
.limit(limit)
)
result = await db.execute(stmt)
rows = result.all()
popularity = []
for row in rows:
success_rate = 0.0
if row.download_count > 0:
success_rate = (
(row.successful or 0) / row.download_count * 100
)
popularity.append(
SeriesPopularity(
series_name=row.series_name or "Unknown",
download_count=row.download_count or 0,
total_size_bytes=row.total_size or 0,
last_download=row.last_download.isoformat()
if row.last_download
else None,
success_rate=success_rate,
)
)
return popularity
def get_storage_analysis(self) -> StorageAnalysis:
"""Get current storage usage analysis.
Returns:
StorageAnalysis with storage breakdown
"""
try:
# Get disk usage for data directory
disk = psutil.disk_usage("/")
total = disk.total
used = disk.used
free = disk.free
analysis = StorageAnalysis(
total_storage_bytes=total,
used_storage_bytes=used,
free_storage_bytes=free,
storage_percent_used=disk.percent,
downloads_directory_size_bytes=self._get_dir_size(
Path("data")
),
cache_directory_size_bytes=self._get_dir_size(
Path("data") / "cache"
),
logs_directory_size_bytes=self._get_dir_size(
Path("logs")
),
)
return analysis
except Exception as e:
logger.error(f"Storage analysis failed: {e}")
return StorageAnalysis()
def _get_dir_size(self, path: Path) -> int:
"""Calculate total size of directory.
Args:
path: Directory path
Returns:
Total size in bytes
"""
if not path.exists():
return 0
total = 0
try:
for item in path.rglob("*"):
if item.is_file():
total += item.stat().st_size
except (OSError, PermissionError):
pass
return total
async def get_performance_report(
self, db: AsyncSession, hours: int = 24
) -> PerformanceReport:
"""Get performance metrics for the specified period.
Args:
db: Database session
hours: Number of hours to analyze
Returns:
PerformanceReport with performance metrics
"""
cutoff_time = datetime.now() - timedelta(hours=hours)
# Get download metrics
stmt = select(Download).where(
Download.created_at >= cutoff_time
)
result = await db.execute(stmt)
downloads = result.scalars().all()
downloads_per_hour = len(downloads) / max(hours, 1)
# Get queue size over time (estimated from analytics)
analytics = self._load_analytics()
performance_samples = analytics.get("performance_samples", [])
# Filter recent samples
recent_samples = [
s
for s in performance_samples
if datetime.fromisoformat(s.get("timestamp", "2000-01-01"))
>= cutoff_time
]
avg_queue = sum(
s.get("queue_size", 0) for s in recent_samples
) / len(recent_samples) if recent_samples else 0.0
# Get memory and CPU stats
process = psutil.Process()
memory_info = process.memory_info()
peak_memory_mb = memory_info.rss / (1024 * 1024)
cpu_percent = process.cpu_percent(interval=1)
# Calculate error rate
failed_count = sum(
1 for d in downloads
if d.status == DownloadStatus.FAILED
)
error_rate = (
failed_count / len(downloads) * 100 if downloads else 0.0
)
# Get uptime
boot_time = datetime.fromtimestamp(psutil.boot_time())
uptime_seconds = (datetime.now() - boot_time).total_seconds()
return PerformanceReport(
period_start=cutoff_time.isoformat(),
period_end=datetime.now().isoformat(),
downloads_per_hour=downloads_per_hour,
average_queue_size=avg_queue,
peak_memory_usage_mb=peak_memory_mb,
average_cpu_percent=cpu_percent,
uptime_seconds=uptime_seconds,
error_rate=error_rate,
samples=recent_samples[-100:], # Keep last 100 samples
)
def record_performance_sample(
self,
queue_size: int,
active_downloads: int,
cpu_percent: float,
memory_mb: float,
) -> None:
"""Record a performance metric sample.
Args:
queue_size: Current queue size
active_downloads: Number of active downloads
cpu_percent: Current CPU usage percentage
memory_mb: Current memory usage in MB
"""
analytics = self._load_analytics()
samples = analytics.get("performance_samples", [])
sample = {
"timestamp": datetime.now().isoformat(),
"queue_size": queue_size,
"active_downloads": active_downloads,
"cpu_percent": cpu_percent,
"memory_mb": memory_mb,
}
samples.append(sample)
# Keep only recent samples (7 days worth at 1 sample per minute)
max_samples = 7 * 24 * 60
if len(samples) > max_samples:
samples = samples[-max_samples:]
analytics["performance_samples"] = samples
self._save_analytics(analytics)
async def generate_summary_report(
self, db: AsyncSession
) -> Dict[str, Any]:
"""Generate comprehensive analytics summary.
Args:
db: Database session
Returns:
Summary report with all analytics
"""
download_stats = await self.get_download_stats(db)
series_popularity = await self.get_series_popularity(db, limit=5)
storage = self.get_storage_analysis()
performance = await self.get_performance_report(db)
return {
"timestamp": datetime.now().isoformat(),
"download_stats": asdict(download_stats),
"series_popularity": [
asdict(s) for s in series_popularity
],
"storage_analysis": asdict(storage),
"performance_report": asdict(performance),
}
_analytics_service_instance: Optional[AnalyticsService] = None
def get_analytics_service() -> AnalyticsService:
"""Get or create singleton analytics service instance.
Returns:
AnalyticsService instance
"""
global _analytics_service_instance
if _analytics_service_instance is None:
_analytics_service_instance = AnalyticsService()
return _analytics_service_instance

View File

@@ -0,0 +1,432 @@
"""Backup and restore service for configuration and data management."""
import json
import logging
import os
import shutil
import tarfile
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class BackupInfo:
"""Information about a backup."""
name: str
timestamp: datetime
size_bytes: int
backup_type: str # 'config', 'data', 'full'
description: Optional[str] = None
class BackupService:
"""Service for managing backups and restores."""
def __init__(
self,
backup_dir: str = "data/backups",
config_dir: str = "data",
database_path: str = "data/aniworld.db",
):
"""Initialize backup service.
Args:
backup_dir: Directory to store backups.
config_dir: Directory containing configuration files.
database_path: Path to the database file.
"""
self.backup_dir = Path(backup_dir)
self.config_dir = Path(config_dir)
self.database_path = Path(database_path)
# Create backup directory if it doesn't exist
self.backup_dir.mkdir(parents=True, exist_ok=True)
def backup_configuration(
self, description: str = ""
) -> Optional[BackupInfo]:
"""Create a configuration backup.
Args:
description: Optional description for the backup.
Returns:
BackupInfo: Information about the created backup.
"""
try:
timestamp = datetime.now()
backup_name = (
f"config_{timestamp.strftime('%Y%m%d_%H%M%S')}.tar.gz"
)
backup_path = self.backup_dir / backup_name
with tarfile.open(backup_path, "w:gz") as tar:
# Add configuration files
config_files = [
self.config_dir / "config.json",
]
for config_file in config_files:
if config_file.exists():
tar.add(config_file, arcname=config_file.name)
size_bytes = backup_path.stat().st_size
info = BackupInfo(
name=backup_name,
timestamp=timestamp,
size_bytes=size_bytes,
backup_type="config",
description=description,
)
logger.info(f"Configuration backup created: {backup_name}")
return info
except Exception as e:
logger.error(f"Failed to create configuration backup: {e}")
return None
def backup_database(
self, description: str = ""
) -> Optional[BackupInfo]:
"""Create a database backup.
Args:
description: Optional description for the backup.
Returns:
BackupInfo: Information about the created backup.
"""
try:
if not self.database_path.exists():
logger.warning(
f"Database file not found: {self.database_path}"
)
return None
timestamp = datetime.now()
backup_name = (
f"database_{timestamp.strftime('%Y%m%d_%H%M%S')}.tar.gz"
)
backup_path = self.backup_dir / backup_name
with tarfile.open(backup_path, "w:gz") as tar:
tar.add(self.database_path, arcname=self.database_path.name)
size_bytes = backup_path.stat().st_size
info = BackupInfo(
name=backup_name,
timestamp=timestamp,
size_bytes=size_bytes,
backup_type="data",
description=description,
)
logger.info(f"Database backup created: {backup_name}")
return info
except Exception as e:
logger.error(f"Failed to create database backup: {e}")
return None
def backup_full(
self, description: str = ""
) -> Optional[BackupInfo]:
"""Create a full system backup.
Args:
description: Optional description for the backup.
Returns:
BackupInfo: Information about the created backup.
"""
try:
timestamp = datetime.now()
backup_name = f"full_{timestamp.strftime('%Y%m%d_%H%M%S')}.tar.gz"
backup_path = self.backup_dir / backup_name
with tarfile.open(backup_path, "w:gz") as tar:
# Add configuration
config_file = self.config_dir / "config.json"
if config_file.exists():
tar.add(config_file, arcname=config_file.name)
# Add database
if self.database_path.exists():
tar.add(
self.database_path,
arcname=self.database_path.name,
)
# Add download queue
queue_file = self.config_dir / "download_queue.json"
if queue_file.exists():
tar.add(queue_file, arcname=queue_file.name)
size_bytes = backup_path.stat().st_size
info = BackupInfo(
name=backup_name,
timestamp=timestamp,
size_bytes=size_bytes,
backup_type="full",
description=description,
)
logger.info(f"Full backup created: {backup_name}")
return info
except Exception as e:
logger.error(f"Failed to create full backup: {e}")
return None
def restore_configuration(self, backup_name: str) -> bool:
"""Restore configuration from backup.
Args:
backup_name: Name of the backup to restore.
Returns:
bool: True if restore was successful.
"""
try:
backup_path = self.backup_dir / backup_name
if not backup_path.exists():
logger.error(f"Backup file not found: {backup_name}")
return False
# Extract to temporary directory
temp_dir = self.backup_dir / "temp_restore"
temp_dir.mkdir(exist_ok=True)
with tarfile.open(backup_path, "r:gz") as tar:
tar.extractall(temp_dir)
# Copy configuration file back
config_file = temp_dir / "config.json"
if config_file.exists():
shutil.copy(config_file, self.config_dir / "config.json")
# Cleanup
shutil.rmtree(temp_dir)
logger.info(f"Configuration restored from: {backup_name}")
return True
except Exception as e:
logger.error(f"Failed to restore configuration: {e}")
return False
def restore_database(self, backup_name: str) -> bool:
"""Restore database from backup.
Args:
backup_name: Name of the backup to restore.
Returns:
bool: True if restore was successful.
"""
try:
backup_path = self.backup_dir / backup_name
if not backup_path.exists():
logger.error(f"Backup file not found: {backup_name}")
return False
# Create backup of current database
if self.database_path.exists():
current_backup = (
self.database_path.parent
/ f"{self.database_path.name}.backup"
)
shutil.copy(self.database_path, current_backup)
logger.info(f"Current database backed up to: {current_backup}")
# Extract to temporary directory
temp_dir = self.backup_dir / "temp_restore"
temp_dir.mkdir(exist_ok=True)
with tarfile.open(backup_path, "r:gz") as tar:
tar.extractall(temp_dir)
# Copy database file back
db_file = temp_dir / self.database_path.name
if db_file.exists():
shutil.copy(db_file, self.database_path)
# Cleanup
shutil.rmtree(temp_dir)
logger.info(f"Database restored from: {backup_name}")
return True
except Exception as e:
logger.error(f"Failed to restore database: {e}")
return False
def list_backups(
self, backup_type: Optional[str] = None
) -> List[Dict[str, Any]]:
"""List available backups.
Args:
backup_type: Optional filter by backup type.
Returns:
list: List of backup information.
"""
try:
backups = []
for backup_file in sorted(self.backup_dir.glob("*.tar.gz")):
# Extract type from filename
filename = backup_file.name
file_type = filename.split("_")[0]
if backup_type and file_type != backup_type:
continue
# Extract timestamp
timestamp_str = (
filename.split("_", 1)[1].replace(".tar.gz", "")
)
backups.append(
{
"name": filename,
"type": file_type,
"size_bytes": backup_file.stat().st_size,
"created": timestamp_str,
}
)
return sorted(backups, key=lambda x: x["created"], reverse=True)
except Exception as e:
logger.error(f"Failed to list backups: {e}")
return []
def delete_backup(self, backup_name: str) -> bool:
"""Delete a backup.
Args:
backup_name: Name of the backup to delete.
Returns:
bool: True if delete was successful.
"""
try:
backup_path = self.backup_dir / backup_name
if not backup_path.exists():
logger.warning(f"Backup not found: {backup_name}")
return False
backup_path.unlink()
logger.info(f"Backup deleted: {backup_name}")
return True
except Exception as e:
logger.error(f"Failed to delete backup: {e}")
return False
def cleanup_old_backups(
self, max_backups: int = 10, backup_type: Optional[str] = None
) -> int:
"""Remove old backups, keeping only the most recent ones.
Args:
max_backups: Maximum number of backups to keep.
backup_type: Optional filter by backup type.
Returns:
int: Number of backups deleted.
"""
try:
backups = self.list_backups(backup_type)
if len(backups) <= max_backups:
return 0
deleted_count = 0
for backup in backups[max_backups:]:
if self.delete_backup(backup["name"]):
deleted_count += 1
logger.info(f"Cleaned up {deleted_count} old backups")
return deleted_count
except Exception as e:
logger.error(f"Failed to cleanup old backups: {e}")
return 0
def export_anime_data(
self, output_file: str
) -> bool:
"""Export anime library data to JSON.
Args:
output_file: Path to export file.
Returns:
bool: True if export was successful.
"""
try:
# This would integrate with the anime service
# to export anime library data
export_data = {
"timestamp": datetime.now().isoformat(),
"anime_count": 0,
"data": [],
}
with open(output_file, "w") as f:
json.dump(export_data, f, indent=2)
logger.info(f"Anime data exported to: {output_file}")
return True
except Exception as e:
logger.error(f"Failed to export anime data: {e}")
return False
def import_anime_data(self, input_file: str) -> bool:
"""Import anime library data from JSON.
Args:
input_file: Path to import file.
Returns:
bool: True if import was successful.
"""
try:
if not os.path.exists(input_file):
logger.error(f"Import file not found: {input_file}")
return False
with open(input_file, "r") as f:
json.load(f) # Load and validate JSON
# This would integrate with the anime service
# to import anime library data
logger.info(f"Anime data imported from: {input_file}")
return True
except Exception as e:
logger.error(f"Failed to import anime data: {e}")
return False
# Global backup service instance
_backup_service: Optional[BackupService] = None
def get_backup_service() -> BackupService:
"""Get or create the global backup service instance.
Returns:
BackupService: The backup service instance.
"""
global _backup_service
if _backup_service is None:
_backup_service = BackupService()
return _backup_service

View File

@@ -0,0 +1,324 @@
"""Monitoring service for system resource tracking and metrics collection."""
import logging
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
import psutil
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from src.server.database.models import DownloadQueueItem
logger = logging.getLogger(__name__)
@dataclass
class QueueMetrics:
"""Download queue statistics and metrics."""
total_items: int = 0
pending_items: int = 0
downloading_items: int = 0
completed_items: int = 0
failed_items: int = 0
total_size_bytes: int = 0
downloaded_bytes: int = 0
average_speed_mbps: float = 0.0
estimated_time_remaining: Optional[timedelta] = None
success_rate: float = 0.0
@dataclass
class SystemMetrics:
"""System resource metrics at a point in time."""
timestamp: datetime
cpu_percent: float
memory_percent: float
memory_available_mb: float
disk_percent: float
disk_free_mb: float
uptime_seconds: float
@dataclass
class ErrorMetrics:
"""Error tracking and statistics."""
total_errors: int = 0
errors_24h: int = 0
most_common_errors: Dict[str, int] = field(default_factory=dict)
last_error_time: Optional[datetime] = None
error_rate_per_hour: float = 0.0
class MonitoringService:
"""Service for monitoring system resources and application metrics."""
def __init__(self):
"""Initialize monitoring service."""
self._error_log: List[tuple[datetime, str]] = []
self._performance_samples: List[SystemMetrics] = []
self._max_samples = 1440 # Keep 24 hours of minute samples
def get_system_metrics(self) -> SystemMetrics:
"""Get current system resource metrics.
Returns:
SystemMetrics: Current system metrics.
"""
try:
import time
cpu_percent = psutil.cpu_percent(interval=1)
memory_info = psutil.virtual_memory()
disk_info = psutil.disk_usage("/")
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
metrics = SystemMetrics(
timestamp=datetime.now(),
cpu_percent=cpu_percent,
memory_percent=memory_info.percent,
memory_available_mb=memory_info.available / (1024 * 1024),
disk_percent=disk_info.percent,
disk_free_mb=disk_info.free / (1024 * 1024),
uptime_seconds=uptime_seconds,
)
# Store sample
self._performance_samples.append(metrics)
if len(self._performance_samples) > self._max_samples:
self._performance_samples.pop(0)
return metrics
except Exception as e:
logger.error(f"Failed to get system metrics: {e}")
raise
async def get_queue_metrics(self, db: AsyncSession) -> QueueMetrics:
"""Get download queue metrics.
Args:
db: Database session.
Returns:
QueueMetrics: Queue statistics and progress.
"""
try:
# Get all queue items
result = await db.execute(select(DownloadQueueItem))
items = result.scalars().all()
if not items:
return QueueMetrics()
# Calculate metrics
total_items = len(items)
pending_items = sum(1 for i in items if i.status == "PENDING")
downloading_items = sum(
1 for i in items if i.status == "DOWNLOADING"
)
completed_items = sum(1 for i in items if i.status == "COMPLETED")
failed_items = sum(1 for i in items if i.status == "FAILED")
total_size_bytes = sum(
(i.total_bytes or 0) for i in items
)
downloaded_bytes = sum(
(i.downloaded_bytes or 0) for i in items
)
# Calculate average speed from active downloads
speeds = [
i.download_speed for i in items
if i.status == "DOWNLOADING" and i.download_speed
]
average_speed_mbps = (
sum(speeds) / len(speeds) / (1024 * 1024) if speeds else 0
)
# Calculate success rate
success_rate = (
(completed_items / total_items * 100) if total_items > 0 else 0
)
# Estimate time remaining
estimated_time_remaining = None
if average_speed_mbps > 0 and total_size_bytes > downloaded_bytes:
remaining_bytes = total_size_bytes - downloaded_bytes
remaining_seconds = remaining_bytes / average_speed_mbps
estimated_time_remaining = timedelta(seconds=remaining_seconds)
return QueueMetrics(
total_items=total_items,
pending_items=pending_items,
downloading_items=downloading_items,
completed_items=completed_items,
failed_items=failed_items,
total_size_bytes=total_size_bytes,
downloaded_bytes=downloaded_bytes,
average_speed_mbps=average_speed_mbps,
estimated_time_remaining=estimated_time_remaining,
success_rate=success_rate,
)
except Exception as e:
logger.error(f"Failed to get queue metrics: {e}")
raise
def log_error(self, error_message: str) -> None:
"""Log an error for tracking purposes.
Args:
error_message: The error message to log.
"""
self._error_log.append((datetime.now(), error_message))
logger.debug(f"Error logged: {error_message}")
def get_error_metrics(self) -> ErrorMetrics:
"""Get error tracking metrics.
Returns:
ErrorMetrics: Error statistics and trends.
"""
total_errors = len(self._error_log)
# Get errors from last 24 hours
cutoff_time = datetime.now() - timedelta(hours=24)
recent_errors = [
(time, msg) for time, msg in self._error_log
if time >= cutoff_time
]
errors_24h = len(recent_errors)
# Count error types
error_counts: Dict[str, int] = {}
for _, msg in recent_errors:
error_type = msg.split(":")[0]
error_counts[error_type] = error_counts.get(error_type, 0) + 1
# Sort by count
most_common_errors = dict(
sorted(error_counts.items(), key=lambda x: x[1], reverse=True)[:10]
)
# Get last error time
last_error_time = (
recent_errors[-1][0] if recent_errors else None
)
# Calculate error rate per hour
error_rate_per_hour = (
errors_24h / 24 if errors_24h > 0 else 0
)
return ErrorMetrics(
total_errors=total_errors,
errors_24h=errors_24h,
most_common_errors=most_common_errors,
last_error_time=last_error_time,
error_rate_per_hour=error_rate_per_hour,
)
def get_performance_summary(self) -> Dict[str, Any]:
"""Get performance summary from collected samples.
Returns:
dict: Performance statistics.
"""
if not self._performance_samples:
return {}
cpu_values = [m.cpu_percent for m in self._performance_samples]
memory_values = [m.memory_percent for m in self._performance_samples]
disk_values = [m.disk_percent for m in self._performance_samples]
return {
"cpu": {
"current": cpu_values[-1],
"average": sum(cpu_values) / len(cpu_values),
"max": max(cpu_values),
"min": min(cpu_values),
},
"memory": {
"current": memory_values[-1],
"average": sum(memory_values) / len(memory_values),
"max": max(memory_values),
"min": min(memory_values),
},
"disk": {
"current": disk_values[-1],
"average": sum(disk_values) / len(disk_values),
"max": max(disk_values),
"min": min(disk_values),
},
"sample_count": len(self._performance_samples),
}
async def get_comprehensive_status(
self, db: AsyncSession
) -> Dict[str, Any]:
"""Get comprehensive system status summary.
Args:
db: Database session.
Returns:
dict: Complete system status.
"""
try:
system_metrics = self.get_system_metrics()
queue_metrics = await self.get_queue_metrics(db)
error_metrics = self.get_error_metrics()
performance = self.get_performance_summary()
return {
"timestamp": datetime.now().isoformat(),
"system": {
"cpu_percent": system_metrics.cpu_percent,
"memory_percent": system_metrics.memory_percent,
"disk_percent": system_metrics.disk_percent,
"uptime_seconds": system_metrics.uptime_seconds,
},
"queue": {
"total_items": queue_metrics.total_items,
"pending": queue_metrics.pending_items,
"downloading": queue_metrics.downloading_items,
"completed": queue_metrics.completed_items,
"failed": queue_metrics.failed_items,
"success_rate": round(queue_metrics.success_rate, 2),
"average_speed_mbps": round(
queue_metrics.average_speed_mbps, 2
),
},
"errors": {
"total": error_metrics.total_errors,
"last_24h": error_metrics.errors_24h,
"rate_per_hour": round(
error_metrics.error_rate_per_hour, 2
),
"most_common": error_metrics.most_common_errors,
},
"performance": performance,
}
except Exception as e:
logger.error(f"Failed to get comprehensive status: {e}")
raise
# Global monitoring service instance
_monitoring_service: Optional[MonitoringService] = None
def get_monitoring_service() -> MonitoringService:
"""Get or create the global monitoring service instance.
Returns:
MonitoringService: The monitoring service instance.
"""
global _monitoring_service
if _monitoring_service is None:
_monitoring_service = MonitoringService()
return _monitoring_service

View File

@@ -0,0 +1,380 @@
"""Log management utilities for rotation, archival, and search."""
import gzip
import logging
import shutil
from dataclasses import dataclass
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class LogFile:
"""Information about a log file."""
filename: str
path: Path
size_bytes: int
created_time: datetime
modified_time: datetime
class LogManager:
"""Manage application logs."""
def __init__(self, log_dir: str = "logs"):
"""Initialize log manager.
Args:
log_dir: Directory containing log files.
"""
self.log_dir = Path(log_dir)
self.log_dir.mkdir(parents=True, exist_ok=True)
self.archived_dir = self.log_dir / "archived"
self.archived_dir.mkdir(exist_ok=True)
def get_log_files(self, pattern: str = "*.log") -> List[LogFile]:
"""Get list of log files.
Args:
pattern: Glob pattern for log files.
Returns:
list: List of LogFile objects.
"""
log_files = []
for log_path in self.log_dir.glob(pattern):
if log_path.is_file():
stat = log_path.stat()
log_files.append(
LogFile(
filename=log_path.name,
path=log_path,
size_bytes=stat.st_size,
created_time=datetime.fromtimestamp(
stat.st_ctime
),
modified_time=datetime.fromtimestamp(
stat.st_mtime
),
)
)
return sorted(log_files, key=lambda x: x.modified_time, reverse=True)
def rotate_log(
self, log_file: str, max_size_bytes: int = 10485760
) -> bool:
"""Rotate a log file if it exceeds max size.
Args:
log_file: Name of the log file.
max_size_bytes: Maximum size before rotation (default 10MB).
Returns:
bool: True if rotation was needed and successful.
"""
try:
log_path = self.log_dir / log_file
if not log_path.exists():
logger.warning(f"Log file not found: {log_file}")
return False
stat = log_path.stat()
if stat.st_size < max_size_bytes:
return False
# Create rotated filename with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
rotated_name = f"{log_path.stem}_{timestamp}.log"
rotated_path = self.log_dir / rotated_name
shutil.move(str(log_path), str(rotated_path))
# Compress the rotated file
self._compress_log(rotated_path)
logger.info(f"Rotated log file: {log_file} -> {rotated_name}")
return True
except Exception as e:
logger.error(f"Failed to rotate log file {log_file}: {e}")
return False
def _compress_log(self, log_path: Path) -> bool:
"""Compress a log file.
Args:
log_path: Path to the log file.
Returns:
bool: True if compression was successful.
"""
try:
gz_path = log_path.parent / f"{log_path.name}.gz"
with open(log_path, "rb") as f_in:
with gzip.open(gz_path, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
log_path.unlink()
logger.debug(f"Compressed log file: {log_path.name}")
return True
except Exception as e:
logger.error(f"Failed to compress log {log_path}: {e}")
return False
def archive_old_logs(
self, days_old: int = 30
) -> int:
"""Archive log files older than specified days.
Args:
days_old: Archive logs older than this many days.
Returns:
int: Number of logs archived.
"""
try:
cutoff_time = datetime.now() - timedelta(days=days_old)
archived_count = 0
for log_file in self.get_log_files():
if log_file.modified_time < cutoff_time:
try:
archived_path = (
self.archived_dir / log_file.filename
)
shutil.move(str(log_file.path), str(archived_path))
self._compress_log(archived_path)
archived_count += 1
logger.debug(
f"Archived log: {log_file.filename}"
)
except Exception as e:
logger.warning(
f"Failed to archive {log_file.filename}: {e}"
)
logger.info(f"Archived {archived_count} old log files")
return archived_count
except Exception as e:
logger.error(f"Failed to archive logs: {e}")
return 0
def search_logs(
self, search_term: str, case_sensitive: bool = False
) -> Dict[str, List[str]]:
"""Search for lines matching a term in log files.
Args:
search_term: Text to search for.
case_sensitive: Whether search is case-sensitive.
Returns:
dict: Dictionary mapping log files to matching lines.
"""
try:
results = {}
for log_file in self.get_log_files():
try:
with open(log_file.path, "r", encoding="utf-8") as f:
matching_lines = []
for line_num, line in enumerate(f, 1):
if case_sensitive:
if search_term in line:
matching_lines.append(
f"{line_num}: {line.strip()}"
)
else:
if search_term.lower() in line.lower():
matching_lines.append(
f"{line_num}: {line.strip()}"
)
if matching_lines:
results[log_file.filename] = matching_lines
except Exception as e:
logger.warning(
f"Failed to search {log_file.filename}: {e}"
)
logger.debug(
f"Search for '{search_term}' found {len(results)} log files"
)
return results
except Exception as e:
logger.error(f"Failed to search logs: {e}")
return {}
def export_logs(
self,
output_file: str,
log_pattern: str = "*.log",
compress: bool = True,
) -> bool:
"""Export logs to a file or archive.
Args:
output_file: Path to output file.
log_pattern: Pattern for logs to include.
compress: Whether to compress the output.
Returns:
bool: True if export was successful.
"""
try:
output_path = Path(output_file)
if compress:
import tarfile
tar_path = output_path.with_suffix(".tar.gz")
with tarfile.open(tar_path, "w:gz") as tar:
for log_file in self.get_log_files(log_pattern):
tar.add(
log_file.path,
arcname=log_file.filename,
)
logger.info(f"Exported logs to: {tar_path}")
return True
else:
# Concatenate all logs
with open(output_path, "w") as out_f:
for log_file in self.get_log_files(log_pattern):
out_f.write(f"\n\n=== {log_file.filename} ===\n\n")
with open(log_file.path, "r") as in_f:
out_f.write(in_f.read())
logger.info(f"Exported logs to: {output_path}")
return True
except Exception as e:
logger.error(f"Failed to export logs: {e}")
return False
def get_log_stats(self) -> Dict[str, Any]:
"""Get statistics about log files.
Returns:
dict: Log statistics.
"""
try:
log_files = self.get_log_files()
total_size = sum(log.size_bytes for log in log_files)
total_files = len(log_files)
if not log_files:
return {
"total_files": 0,
"total_size_bytes": 0,
"total_size_mb": 0,
"average_size_bytes": 0,
"largest_file": None,
"oldest_file": None,
"newest_file": None,
}
return {
"total_files": total_files,
"total_size_bytes": total_size,
"total_size_mb": total_size / (1024 * 1024),
"average_size_bytes": total_size // total_files,
"largest_file": max(
log_files, key=lambda x: x.size_bytes
).filename,
"oldest_file": log_files[-1].filename,
"newest_file": log_files[0].filename,
}
except Exception as e:
logger.error(f"Failed to get log stats: {e}")
return {}
def cleanup_logs(
self, max_total_size_mb: int = 100, keep_files: int = 5
) -> int:
"""Clean up old logs to maintain size limit.
Args:
max_total_size_mb: Maximum total log size in MB.
keep_files: Minimum files to keep.
Returns:
int: Number of files deleted.
"""
try:
max_bytes = max_total_size_mb * 1024 * 1024
log_files = self.get_log_files()
if len(log_files) <= keep_files:
return 0
total_size = sum(log.size_bytes for log in log_files)
deleted_count = 0
for log_file in reversed(log_files):
if (
total_size <= max_bytes
or len(log_files) <= keep_files
):
break
try:
log_file.path.unlink()
total_size -= log_file.size_bytes
deleted_count += 1
logger.debug(f"Deleted log file: {log_file.filename}")
except Exception as e:
logger.warning(
f"Failed to delete {log_file.filename}: {e}"
)
logger.info(f"Cleaned up {deleted_count} log files")
return deleted_count
except Exception as e:
logger.error(f"Failed to cleanup logs: {e}")
return 0
def set_log_level(self, logger_name: str, level: str) -> bool:
"""Set log level for a specific logger.
Args:
logger_name: Name of the logger.
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
Returns:
bool: True if successful.
"""
try:
log_level = getattr(logging, level.upper(), logging.INFO)
target_logger = logging.getLogger(logger_name)
target_logger.setLevel(log_level)
logger.info(f"Set {logger_name} log level to {level}")
return True
except Exception as e:
logger.error(f"Failed to set log level: {e}")
return False
# Global log manager instance
_log_manager: Optional[LogManager] = None
def get_log_manager() -> LogManager:
"""Get or create the global log manager instance.
Returns:
LogManager: The log manager instance.
"""
global _log_manager
if _log_manager is None:
_log_manager = LogManager()
return _log_manager

358
src/server/utils/metrics.py Normal file
View File

@@ -0,0 +1,358 @@
"""Metrics collection for Prometheus and custom business metrics."""
import logging
import time
from dataclasses import dataclass, field
from datetime import datetime
from enum import Enum
from threading import Lock
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class MetricType(Enum):
"""Types of metrics."""
COUNTER = "counter"
GAUGE = "gauge"
HISTOGRAM = "histogram"
SUMMARY = "summary"
@dataclass
class MetricValue:
"""A single metric value with metadata."""
name: str
value: float
metric_type: MetricType
labels: Dict[str, str] = field(default_factory=dict)
timestamp: datetime = field(default_factory=datetime.now)
help_text: str = ""
@dataclass
class HistogramBucket:
"""Histogram bucket for latency tracking."""
le: float # bucket upper bound in seconds
count: int = 0
class MetricsCollector:
"""Collect and export metrics for monitoring."""
def __init__(self):
"""Initialize metrics collector."""
self._metrics: Dict[str, MetricValue] = {}
self._request_timings: Dict[str, list[float]] = {}
self._download_stats: Dict[str, int] = {
"completed": 0,
"failed": 0,
"total_size_bytes": 0,
}
self._lock = Lock()
self._timers: Dict[str, float] = {}
def increment_counter(
self,
name: str,
value: float = 1.0,
labels: Optional[Dict[str, str]] = None,
help_text: str = "",
) -> None:
"""Increment a counter metric.
Args:
name: Metric name.
value: Amount to increment by.
labels: Optional labels for the metric.
help_text: Optional help text describing the metric.
"""
with self._lock:
if name not in self._metrics:
self._metrics[name] = MetricValue(
name=name,
value=value,
metric_type=MetricType.COUNTER,
labels=labels or {},
help_text=help_text,
)
else:
self._metrics[name].value += value
def set_gauge(
self,
name: str,
value: float,
labels: Optional[Dict[str, str]] = None,
help_text: str = "",
) -> None:
"""Set a gauge metric.
Args:
name: Metric name.
value: Gauge value.
labels: Optional labels for the metric.
help_text: Optional help text describing the metric.
"""
with self._lock:
self._metrics[name] = MetricValue(
name=name,
value=value,
metric_type=MetricType.GAUGE,
labels=labels or {},
help_text=help_text,
)
def observe_histogram(
self,
name: str,
value: float,
labels: Optional[Dict[str, str]] = None,
help_text: str = "",
) -> None:
"""Observe a value for histogram.
Args:
name: Metric name.
value: Value to record.
labels: Optional labels for the metric.
help_text: Optional help text describing the metric.
"""
with self._lock:
if name not in self._request_timings:
self._request_timings[name] = []
self._request_timings[name].append(value)
# Update histogram metric
if name not in self._metrics:
self._metrics[name] = MetricValue(
name=name,
value=value,
metric_type=MetricType.HISTOGRAM,
labels=labels or {},
help_text=help_text,
)
def start_timer(self, timer_name: str) -> None:
"""Start a timer for tracking operation duration.
Args:
timer_name: Name of the timer.
"""
self._timers[timer_name] = time.time()
def end_timer(
self,
timer_name: str,
metric_name: str,
labels: Optional[Dict[str, str]] = None,
) -> float:
"""End a timer and record the duration.
Args:
timer_name: Name of the timer to end.
metric_name: Name of the metric to record.
labels: Optional labels for the metric.
Returns:
Duration in seconds.
"""
if timer_name not in self._timers:
logger.warning(f"Timer {timer_name} not started")
return 0.0
duration = time.time() - self._timers[timer_name]
del self._timers[timer_name]
self.observe_histogram(
metric_name, duration, labels, "Request/operation duration"
)
return duration
def record_download_success(self, size_bytes: int) -> None:
"""Record a successful download.
Args:
size_bytes: Size of downloaded file in bytes.
"""
with self._lock:
self._download_stats["completed"] += 1
self._download_stats["total_size_bytes"] += size_bytes
self.increment_counter(
"downloads_completed_total",
help_text="Total successful downloads",
)
def record_download_failure(self) -> None:
"""Record a failed download."""
with self._lock:
self._download_stats["failed"] += 1
self.increment_counter(
"downloads_failed_total", help_text="Total failed downloads"
)
def get_download_stats(self) -> Dict[str, int]:
"""Get download statistics.
Returns:
dict: Download statistics.
"""
with self._lock:
return self._download_stats.copy()
def get_request_statistics(
self, metric_name: str
) -> Optional[Dict[str, float]]:
"""Get statistics for a request timing metric.
Args:
metric_name: Name of the metric to analyze.
Returns:
Statistics including count, sum, mean, min, max.
"""
with self._lock:
if metric_name not in self._request_timings:
return None
timings = self._request_timings[metric_name]
if not timings:
return None
return {
"count": len(timings),
"sum": sum(timings),
"mean": sum(timings) / len(timings),
"min": min(timings),
"max": max(timings),
"p50": sorted(timings)[len(timings) // 2],
"p99": sorted(timings)[int(len(timings) * 0.99)],
}
def export_prometheus_format(self) -> str:
"""Export metrics in Prometheus text format.
Returns:
str: Prometheus format metrics.
"""
with self._lock:
lines = []
for name, metric in self._metrics.items():
# Add help text if available
if metric.help_text:
lines.append(f"# HELP {name} {metric.help_text}")
lines.append(f"# TYPE {name} {metric.metric_type.value}")
# Format labels
label_str = ""
if metric.labels:
label_pairs = [
f'{k}="{v}"' for k, v in metric.labels.items()
]
label_str = "{" + ",".join(label_pairs) + "}"
# Add metric value
lines.append(f"{name}{label_str} {metric.value}")
return "\n".join(lines)
def export_json(self) -> Dict[str, Any]:
"""Export metrics as JSON.
Returns:
dict: Metrics in JSON-serializable format.
"""
with self._lock:
metrics_dict = {}
for name, metric in self._metrics.items():
metrics_dict[name] = {
"value": metric.value,
"type": metric.metric_type.value,
"labels": metric.labels,
"timestamp": metric.timestamp.isoformat(),
}
return {
"metrics": metrics_dict,
"downloads": self._download_stats,
"request_timings": {
name: self.get_request_statistics(name)
for name in self._request_timings
},
}
def reset_metrics(self) -> None:
"""Reset all collected metrics."""
with self._lock:
self._metrics.clear()
self._request_timings.clear()
self._download_stats = {
"completed": 0,
"failed": 0,
"total_size_bytes": 0,
}
def get_all_metrics(self) -> Dict[str, MetricValue]:
"""Get all collected metrics.
Returns:
dict: All metrics keyed by name.
"""
with self._lock:
return self._metrics.copy()
# Global metrics collector instance
_metrics_collector: Optional[MetricsCollector] = None
def get_metrics_collector() -> MetricsCollector:
"""Get or create the global metrics collector instance.
Returns:
MetricsCollector: The metrics collector instance.
"""
global _metrics_collector
if _metrics_collector is None:
_metrics_collector = MetricsCollector()
return _metrics_collector
class TimerContext:
"""Context manager for timing operations."""
def __init__(
self,
metric_name: str,
timer_name: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
):
"""Initialize timer context.
Args:
metric_name: Name of the metric to record.
timer_name: Optional name for the timer.
labels: Optional labels for the metric.
"""
self.metric_name = metric_name
self.timer_name = timer_name or metric_name
self.labels = labels
self.collector = get_metrics_collector()
def __enter__(self):
"""Start the timer."""
self.collector.start_timer(self.timer_name)
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""End the timer and record the metric."""
self.collector.end_timer(
self.timer_name, self.metric_name, self.labels
)

361
src/server/utils/system.py Normal file
View File

@@ -0,0 +1,361 @@
"""System utility functions for monitoring and management."""
import logging
import os
import shutil
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import psutil
logger = logging.getLogger(__name__)
@dataclass
class DiskInfo:
"""Information about disk usage."""
total_bytes: int
used_bytes: int
free_bytes: int
percent_used: float
path: str
@dataclass
class ProcessInfo:
"""Information about a process."""
pid: int
name: str
status: str
cpu_percent: float
memory_percent: float
memory_mb: float
create_time: datetime
class SystemUtilities:
"""Utilities for system monitoring and management."""
@staticmethod
def get_disk_usage(path: str = "/") -> Optional[DiskInfo]:
"""Get disk usage information.
Args:
path: Path to check disk usage for.
Returns:
DiskInfo: Disk usage information.
"""
try:
usage = psutil.disk_usage(path)
return DiskInfo(
total_bytes=usage.total,
used_bytes=usage.used,
free_bytes=usage.free,
percent_used=usage.percent,
path=path,
)
except Exception as e:
logger.error(f"Failed to get disk usage for {path}: {e}")
return None
@staticmethod
def get_all_disk_usage() -> List[DiskInfo]:
"""Get disk usage for all mounted partitions.
Returns:
list: List of DiskInfo for each partition.
"""
try:
partitions = psutil.disk_partitions()
disk_infos = []
for partition in partitions:
try:
usage = psutil.disk_usage(partition.mountpoint)
disk_infos.append(
DiskInfo(
total_bytes=usage.total,
used_bytes=usage.used,
free_bytes=usage.free,
percent_used=usage.percent,
path=partition.mountpoint,
)
)
except Exception as e:
logger.warning(
f"Failed to get usage for {partition.mountpoint}: {e}"
)
return disk_infos
except Exception as e:
logger.error(f"Failed to get all disk usage: {e}")
return []
@staticmethod
def cleanup_directory(
directory: str, pattern: str = "*", max_age_days: int = 30
) -> int:
"""Clean up files in a directory matching a pattern.
Args:
directory: Directory to clean.
pattern: File pattern to match (glob).
max_age_days: Only delete files older than this.
Returns:
int: Number of files deleted.
"""
try:
from datetime import timedelta
path = Path(directory)
if not path.exists():
logger.warning(f"Directory not found: {directory}")
return 0
deleted_count = 0
cutoff_time = datetime.now() - timedelta(days=max_age_days)
for file_path in path.glob(pattern):
if file_path.is_file():
file_time = datetime.fromtimestamp(
file_path.stat().st_mtime
)
if file_time < cutoff_time:
try:
file_path.unlink()
deleted_count += 1
logger.debug(f"Deleted file: {file_path}")
except Exception as e:
logger.warning(
f"Failed to delete {file_path}: {e}"
)
logger.info(f"Cleaned up {deleted_count} files from {directory}")
return deleted_count
except Exception as e:
logger.error(f"Failed to cleanup directory {directory}: {e}")
return 0
@staticmethod
def cleanup_empty_directories(directory: str) -> int:
"""Remove empty directories.
Args:
directory: Root directory to clean.
Returns:
int: Number of directories deleted.
"""
try:
path = Path(directory)
if not path.exists():
return 0
deleted_count = 0
# Walk from bottom to top to delete empty dirs
for root, dirs, files in os.walk(directory, topdown=False):
for dir_name in dirs:
dir_path = Path(root) / dir_name
try:
if not os.listdir(dir_path):
os.rmdir(dir_path)
deleted_count += 1
logger.debug(
f"Deleted empty directory: {dir_path}"
)
except Exception as e:
logger.debug(f"Cannot delete {dir_path}: {e}")
logger.info(f"Cleaned up {deleted_count} empty directories")
return deleted_count
except Exception as e:
logger.error(f"Failed to cleanup empty directories: {e}")
return 0
@staticmethod
def get_directory_size(directory: str) -> int:
"""Get total size of a directory.
Args:
directory: Directory path.
Returns:
int: Total size in bytes.
"""
try:
path = Path(directory)
if not path.exists():
return 0
total_size = 0
for entry in path.rglob("*"):
if entry.is_file():
total_size += entry.stat().st_size
return total_size
except Exception as e:
logger.error(f"Failed to get directory size for {directory}: {e}")
return 0
@staticmethod
def get_process_info(pid: Optional[int] = None) -> Optional[ProcessInfo]:
"""Get information about a process.
Args:
pid: Process ID. If None, uses current process.
Returns:
ProcessInfo: Process information.
"""
try:
if pid is None:
pid = os.getpid()
process = psutil.Process(pid)
with process.oneshot():
return ProcessInfo(
pid=process.pid,
name=process.name(),
status=process.status(),
cpu_percent=process.cpu_percent(),
memory_percent=process.memory_percent(),
memory_mb=process.memory_info().rss / (1024 * 1024),
create_time=datetime.fromtimestamp(
process.create_time()
),
)
except Exception as e:
logger.error(f"Failed to get process info for {pid}: {e}")
return None
@staticmethod
def get_all_processes() -> List[ProcessInfo]:
"""Get information about all running processes.
Returns:
list: List of ProcessInfo for each process.
"""
try:
processes = []
for proc in psutil.process_iter(
["pid", "name", "status", "cpu_num", "memory_percent"]
):
try:
info = SystemUtilities.get_process_info(proc.pid)
if info:
processes.append(info)
except Exception:
pass
return processes
except Exception as e:
logger.error(f"Failed to get all processes: {e}")
return []
@staticmethod
def get_system_info() -> Dict[str, Any]:
"""Get comprehensive system information.
Returns:
dict: System information.
"""
try:
import platform
return {
"platform": platform.platform(),
"processor": platform.processor(),
"cpu_count": psutil.cpu_count(logical=False),
"cpu_count_logical": psutil.cpu_count(logical=True),
"boot_time": datetime.fromtimestamp(
psutil.boot_time()
).isoformat(),
"hostname": platform.node(),
"python_version": platform.python_version(),
}
except Exception as e:
logger.error(f"Failed to get system info: {e}")
return {}
@staticmethod
def get_network_info() -> Dict[str, Any]:
"""Get network information.
Returns:
dict: Network statistics.
"""
try:
net_io = psutil.net_io_counters()
return {
"bytes_sent": net_io.bytes_sent,
"bytes_recv": net_io.bytes_recv,
"packets_sent": net_io.packets_sent,
"packets_recv": net_io.packets_recv,
"errors_in": net_io.errin,
"errors_out": net_io.errout,
"dropped_in": net_io.dropin,
"dropped_out": net_io.dropout,
}
except Exception as e:
logger.error(f"Failed to get network info: {e}")
return {}
@staticmethod
def copy_file_atomic(
src: str, dest: str, chunk_size: int = 1024 * 1024
) -> bool:
"""Copy a file atomically using temporary file.
Args:
src: Source file path.
dest: Destination file path.
chunk_size: Size of chunks for copying.
Returns:
bool: True if successful.
"""
try:
src_path = Path(src)
dest_path = Path(dest)
if not src_path.exists():
logger.error(f"Source file not found: {src}")
return False
# Create temporary file
temp_path = dest_path.parent / f"{dest_path.name}.tmp"
# Copy to temporary file
shutil.copyfile(src, temp_path)
# Atomic rename
temp_path.replace(dest_path)
logger.debug(f"Atomically copied {src} to {dest}")
return True
except Exception as e:
logger.error(f"Failed to copy file {src} to {dest}: {e}")
return False
# Global system utilities instance
_system_utilities: Optional[SystemUtilities] = None
def get_system_utilities() -> SystemUtilities:
"""Get or create the global system utilities instance.
Returns:
SystemUtilities: The system utilities instance.
"""
global _system_utilities
if _system_utilities is None:
_system_utilities = SystemUtilities()
return _system_utilities