backup
This commit is contained in:
380
src/server/utils/log_manager.py
Normal file
380
src/server/utils/log_manager.py
Normal file
@@ -0,0 +1,380 @@
|
||||
"""Log management utilities for rotation, archival, and search."""
|
||||
|
||||
import gzip
|
||||
import logging
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LogFile:
|
||||
"""Information about a log file."""
|
||||
|
||||
filename: str
|
||||
path: Path
|
||||
size_bytes: int
|
||||
created_time: datetime
|
||||
modified_time: datetime
|
||||
|
||||
|
||||
class LogManager:
|
||||
"""Manage application logs."""
|
||||
|
||||
def __init__(self, log_dir: str = "logs"):
|
||||
"""Initialize log manager.
|
||||
|
||||
Args:
|
||||
log_dir: Directory containing log files.
|
||||
"""
|
||||
self.log_dir = Path(log_dir)
|
||||
self.log_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.archived_dir = self.log_dir / "archived"
|
||||
self.archived_dir.mkdir(exist_ok=True)
|
||||
|
||||
def get_log_files(self, pattern: str = "*.log") -> List[LogFile]:
|
||||
"""Get list of log files.
|
||||
|
||||
Args:
|
||||
pattern: Glob pattern for log files.
|
||||
|
||||
Returns:
|
||||
list: List of LogFile objects.
|
||||
"""
|
||||
log_files = []
|
||||
|
||||
for log_path in self.log_dir.glob(pattern):
|
||||
if log_path.is_file():
|
||||
stat = log_path.stat()
|
||||
log_files.append(
|
||||
LogFile(
|
||||
filename=log_path.name,
|
||||
path=log_path,
|
||||
size_bytes=stat.st_size,
|
||||
created_time=datetime.fromtimestamp(
|
||||
stat.st_ctime
|
||||
),
|
||||
modified_time=datetime.fromtimestamp(
|
||||
stat.st_mtime
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
return sorted(log_files, key=lambda x: x.modified_time, reverse=True)
|
||||
|
||||
def rotate_log(
|
||||
self, log_file: str, max_size_bytes: int = 10485760
|
||||
) -> bool:
|
||||
"""Rotate a log file if it exceeds max size.
|
||||
|
||||
Args:
|
||||
log_file: Name of the log file.
|
||||
max_size_bytes: Maximum size before rotation (default 10MB).
|
||||
|
||||
Returns:
|
||||
bool: True if rotation was needed and successful.
|
||||
"""
|
||||
try:
|
||||
log_path = self.log_dir / log_file
|
||||
|
||||
if not log_path.exists():
|
||||
logger.warning(f"Log file not found: {log_file}")
|
||||
return False
|
||||
|
||||
stat = log_path.stat()
|
||||
if stat.st_size < max_size_bytes:
|
||||
return False
|
||||
|
||||
# Create rotated filename with timestamp
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
rotated_name = f"{log_path.stem}_{timestamp}.log"
|
||||
rotated_path = self.log_dir / rotated_name
|
||||
|
||||
shutil.move(str(log_path), str(rotated_path))
|
||||
|
||||
# Compress the rotated file
|
||||
self._compress_log(rotated_path)
|
||||
|
||||
logger.info(f"Rotated log file: {log_file} -> {rotated_name}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to rotate log file {log_file}: {e}")
|
||||
return False
|
||||
|
||||
def _compress_log(self, log_path: Path) -> bool:
|
||||
"""Compress a log file.
|
||||
|
||||
Args:
|
||||
log_path: Path to the log file.
|
||||
|
||||
Returns:
|
||||
bool: True if compression was successful.
|
||||
"""
|
||||
try:
|
||||
gz_path = log_path.parent / f"{log_path.name}.gz"
|
||||
|
||||
with open(log_path, "rb") as f_in:
|
||||
with gzip.open(gz_path, "wb") as f_out:
|
||||
shutil.copyfileobj(f_in, f_out)
|
||||
|
||||
log_path.unlink()
|
||||
logger.debug(f"Compressed log file: {log_path.name}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to compress log {log_path}: {e}")
|
||||
return False
|
||||
|
||||
def archive_old_logs(
|
||||
self, days_old: int = 30
|
||||
) -> int:
|
||||
"""Archive log files older than specified days.
|
||||
|
||||
Args:
|
||||
days_old: Archive logs older than this many days.
|
||||
|
||||
Returns:
|
||||
int: Number of logs archived.
|
||||
"""
|
||||
try:
|
||||
cutoff_time = datetime.now() - timedelta(days=days_old)
|
||||
archived_count = 0
|
||||
|
||||
for log_file in self.get_log_files():
|
||||
if log_file.modified_time < cutoff_time:
|
||||
try:
|
||||
archived_path = (
|
||||
self.archived_dir / log_file.filename
|
||||
)
|
||||
shutil.move(str(log_file.path), str(archived_path))
|
||||
self._compress_log(archived_path)
|
||||
archived_count += 1
|
||||
logger.debug(
|
||||
f"Archived log: {log_file.filename}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to archive {log_file.filename}: {e}"
|
||||
)
|
||||
|
||||
logger.info(f"Archived {archived_count} old log files")
|
||||
return archived_count
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to archive logs: {e}")
|
||||
return 0
|
||||
|
||||
def search_logs(
|
||||
self, search_term: str, case_sensitive: bool = False
|
||||
) -> Dict[str, List[str]]:
|
||||
"""Search for lines matching a term in log files.
|
||||
|
||||
Args:
|
||||
search_term: Text to search for.
|
||||
case_sensitive: Whether search is case-sensitive.
|
||||
|
||||
Returns:
|
||||
dict: Dictionary mapping log files to matching lines.
|
||||
"""
|
||||
try:
|
||||
results = {}
|
||||
|
||||
for log_file in self.get_log_files():
|
||||
try:
|
||||
with open(log_file.path, "r", encoding="utf-8") as f:
|
||||
matching_lines = []
|
||||
for line_num, line in enumerate(f, 1):
|
||||
if case_sensitive:
|
||||
if search_term in line:
|
||||
matching_lines.append(
|
||||
f"{line_num}: {line.strip()}"
|
||||
)
|
||||
else:
|
||||
if search_term.lower() in line.lower():
|
||||
matching_lines.append(
|
||||
f"{line_num}: {line.strip()}"
|
||||
)
|
||||
|
||||
if matching_lines:
|
||||
results[log_file.filename] = matching_lines
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to search {log_file.filename}: {e}"
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Search for '{search_term}' found {len(results)} log files"
|
||||
)
|
||||
return results
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to search logs: {e}")
|
||||
return {}
|
||||
|
||||
def export_logs(
|
||||
self,
|
||||
output_file: str,
|
||||
log_pattern: str = "*.log",
|
||||
compress: bool = True,
|
||||
) -> bool:
|
||||
"""Export logs to a file or archive.
|
||||
|
||||
Args:
|
||||
output_file: Path to output file.
|
||||
log_pattern: Pattern for logs to include.
|
||||
compress: Whether to compress the output.
|
||||
|
||||
Returns:
|
||||
bool: True if export was successful.
|
||||
"""
|
||||
try:
|
||||
output_path = Path(output_file)
|
||||
|
||||
if compress:
|
||||
import tarfile
|
||||
|
||||
tar_path = output_path.with_suffix(".tar.gz")
|
||||
|
||||
with tarfile.open(tar_path, "w:gz") as tar:
|
||||
for log_file in self.get_log_files(log_pattern):
|
||||
tar.add(
|
||||
log_file.path,
|
||||
arcname=log_file.filename,
|
||||
)
|
||||
|
||||
logger.info(f"Exported logs to: {tar_path}")
|
||||
return True
|
||||
else:
|
||||
# Concatenate all logs
|
||||
with open(output_path, "w") as out_f:
|
||||
for log_file in self.get_log_files(log_pattern):
|
||||
out_f.write(f"\n\n=== {log_file.filename} ===\n\n")
|
||||
with open(log_file.path, "r") as in_f:
|
||||
out_f.write(in_f.read())
|
||||
|
||||
logger.info(f"Exported logs to: {output_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to export logs: {e}")
|
||||
return False
|
||||
|
||||
def get_log_stats(self) -> Dict[str, Any]:
|
||||
"""Get statistics about log files.
|
||||
|
||||
Returns:
|
||||
dict: Log statistics.
|
||||
"""
|
||||
try:
|
||||
log_files = self.get_log_files()
|
||||
|
||||
total_size = sum(log.size_bytes for log in log_files)
|
||||
total_files = len(log_files)
|
||||
|
||||
if not log_files:
|
||||
return {
|
||||
"total_files": 0,
|
||||
"total_size_bytes": 0,
|
||||
"total_size_mb": 0,
|
||||
"average_size_bytes": 0,
|
||||
"largest_file": None,
|
||||
"oldest_file": None,
|
||||
"newest_file": None,
|
||||
}
|
||||
|
||||
return {
|
||||
"total_files": total_files,
|
||||
"total_size_bytes": total_size,
|
||||
"total_size_mb": total_size / (1024 * 1024),
|
||||
"average_size_bytes": total_size // total_files,
|
||||
"largest_file": max(
|
||||
log_files, key=lambda x: x.size_bytes
|
||||
).filename,
|
||||
"oldest_file": log_files[-1].filename,
|
||||
"newest_file": log_files[0].filename,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get log stats: {e}")
|
||||
return {}
|
||||
|
||||
def cleanup_logs(
|
||||
self, max_total_size_mb: int = 100, keep_files: int = 5
|
||||
) -> int:
|
||||
"""Clean up old logs to maintain size limit.
|
||||
|
||||
Args:
|
||||
max_total_size_mb: Maximum total log size in MB.
|
||||
keep_files: Minimum files to keep.
|
||||
|
||||
Returns:
|
||||
int: Number of files deleted.
|
||||
"""
|
||||
try:
|
||||
max_bytes = max_total_size_mb * 1024 * 1024
|
||||
log_files = self.get_log_files()
|
||||
|
||||
if len(log_files) <= keep_files:
|
||||
return 0
|
||||
|
||||
total_size = sum(log.size_bytes for log in log_files)
|
||||
|
||||
deleted_count = 0
|
||||
for log_file in reversed(log_files):
|
||||
if (
|
||||
total_size <= max_bytes
|
||||
or len(log_files) <= keep_files
|
||||
):
|
||||
break
|
||||
|
||||
try:
|
||||
log_file.path.unlink()
|
||||
total_size -= log_file.size_bytes
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted log file: {log_file.filename}")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete {log_file.filename}: {e}"
|
||||
)
|
||||
|
||||
logger.info(f"Cleaned up {deleted_count} log files")
|
||||
return deleted_count
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup logs: {e}")
|
||||
return 0
|
||||
|
||||
def set_log_level(self, logger_name: str, level: str) -> bool:
|
||||
"""Set log level for a specific logger.
|
||||
|
||||
Args:
|
||||
logger_name: Name of the logger.
|
||||
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
"""
|
||||
try:
|
||||
log_level = getattr(logging, level.upper(), logging.INFO)
|
||||
target_logger = logging.getLogger(logger_name)
|
||||
target_logger.setLevel(log_level)
|
||||
|
||||
logger.info(f"Set {logger_name} log level to {level}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to set log level: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Global log manager instance
|
||||
_log_manager: Optional[LogManager] = None
|
||||
|
||||
|
||||
def get_log_manager() -> LogManager:
|
||||
"""Get or create the global log manager instance.
|
||||
|
||||
Returns:
|
||||
LogManager: The log manager instance.
|
||||
"""
|
||||
global _log_manager
|
||||
if _log_manager is None:
|
||||
_log_manager = LogManager()
|
||||
return _log_manager
|
||||
358
src/server/utils/metrics.py
Normal file
358
src/server/utils/metrics.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""Metrics collection for Prometheus and custom business metrics."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetricType(Enum):
|
||||
"""Types of metrics."""
|
||||
|
||||
COUNTER = "counter"
|
||||
GAUGE = "gauge"
|
||||
HISTOGRAM = "histogram"
|
||||
SUMMARY = "summary"
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricValue:
|
||||
"""A single metric value with metadata."""
|
||||
|
||||
name: str
|
||||
value: float
|
||||
metric_type: MetricType
|
||||
labels: Dict[str, str] = field(default_factory=dict)
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
help_text: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistogramBucket:
|
||||
"""Histogram bucket for latency tracking."""
|
||||
|
||||
le: float # bucket upper bound in seconds
|
||||
count: int = 0
|
||||
|
||||
|
||||
class MetricsCollector:
|
||||
"""Collect and export metrics for monitoring."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize metrics collector."""
|
||||
self._metrics: Dict[str, MetricValue] = {}
|
||||
self._request_timings: Dict[str, list[float]] = {}
|
||||
self._download_stats: Dict[str, int] = {
|
||||
"completed": 0,
|
||||
"failed": 0,
|
||||
"total_size_bytes": 0,
|
||||
}
|
||||
self._lock = Lock()
|
||||
self._timers: Dict[str, float] = {}
|
||||
|
||||
def increment_counter(
|
||||
self,
|
||||
name: str,
|
||||
value: float = 1.0,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
help_text: str = "",
|
||||
) -> None:
|
||||
"""Increment a counter metric.
|
||||
|
||||
Args:
|
||||
name: Metric name.
|
||||
value: Amount to increment by.
|
||||
labels: Optional labels for the metric.
|
||||
help_text: Optional help text describing the metric.
|
||||
"""
|
||||
with self._lock:
|
||||
if name not in self._metrics:
|
||||
self._metrics[name] = MetricValue(
|
||||
name=name,
|
||||
value=value,
|
||||
metric_type=MetricType.COUNTER,
|
||||
labels=labels or {},
|
||||
help_text=help_text,
|
||||
)
|
||||
else:
|
||||
self._metrics[name].value += value
|
||||
|
||||
def set_gauge(
|
||||
self,
|
||||
name: str,
|
||||
value: float,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
help_text: str = "",
|
||||
) -> None:
|
||||
"""Set a gauge metric.
|
||||
|
||||
Args:
|
||||
name: Metric name.
|
||||
value: Gauge value.
|
||||
labels: Optional labels for the metric.
|
||||
help_text: Optional help text describing the metric.
|
||||
"""
|
||||
with self._lock:
|
||||
self._metrics[name] = MetricValue(
|
||||
name=name,
|
||||
value=value,
|
||||
metric_type=MetricType.GAUGE,
|
||||
labels=labels or {},
|
||||
help_text=help_text,
|
||||
)
|
||||
|
||||
def observe_histogram(
|
||||
self,
|
||||
name: str,
|
||||
value: float,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
help_text: str = "",
|
||||
) -> None:
|
||||
"""Observe a value for histogram.
|
||||
|
||||
Args:
|
||||
name: Metric name.
|
||||
value: Value to record.
|
||||
labels: Optional labels for the metric.
|
||||
help_text: Optional help text describing the metric.
|
||||
"""
|
||||
with self._lock:
|
||||
if name not in self._request_timings:
|
||||
self._request_timings[name] = []
|
||||
self._request_timings[name].append(value)
|
||||
|
||||
# Update histogram metric
|
||||
if name not in self._metrics:
|
||||
self._metrics[name] = MetricValue(
|
||||
name=name,
|
||||
value=value,
|
||||
metric_type=MetricType.HISTOGRAM,
|
||||
labels=labels or {},
|
||||
help_text=help_text,
|
||||
)
|
||||
|
||||
def start_timer(self, timer_name: str) -> None:
|
||||
"""Start a timer for tracking operation duration.
|
||||
|
||||
Args:
|
||||
timer_name: Name of the timer.
|
||||
"""
|
||||
self._timers[timer_name] = time.time()
|
||||
|
||||
def end_timer(
|
||||
self,
|
||||
timer_name: str,
|
||||
metric_name: str,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
) -> float:
|
||||
"""End a timer and record the duration.
|
||||
|
||||
Args:
|
||||
timer_name: Name of the timer to end.
|
||||
metric_name: Name of the metric to record.
|
||||
labels: Optional labels for the metric.
|
||||
|
||||
Returns:
|
||||
Duration in seconds.
|
||||
"""
|
||||
if timer_name not in self._timers:
|
||||
logger.warning(f"Timer {timer_name} not started")
|
||||
return 0.0
|
||||
|
||||
duration = time.time() - self._timers[timer_name]
|
||||
del self._timers[timer_name]
|
||||
|
||||
self.observe_histogram(
|
||||
metric_name, duration, labels, "Request/operation duration"
|
||||
)
|
||||
return duration
|
||||
|
||||
def record_download_success(self, size_bytes: int) -> None:
|
||||
"""Record a successful download.
|
||||
|
||||
Args:
|
||||
size_bytes: Size of downloaded file in bytes.
|
||||
"""
|
||||
with self._lock:
|
||||
self._download_stats["completed"] += 1
|
||||
self._download_stats["total_size_bytes"] += size_bytes
|
||||
|
||||
self.increment_counter(
|
||||
"downloads_completed_total",
|
||||
help_text="Total successful downloads",
|
||||
)
|
||||
|
||||
def record_download_failure(self) -> None:
|
||||
"""Record a failed download."""
|
||||
with self._lock:
|
||||
self._download_stats["failed"] += 1
|
||||
|
||||
self.increment_counter(
|
||||
"downloads_failed_total", help_text="Total failed downloads"
|
||||
)
|
||||
|
||||
def get_download_stats(self) -> Dict[str, int]:
|
||||
"""Get download statistics.
|
||||
|
||||
Returns:
|
||||
dict: Download statistics.
|
||||
"""
|
||||
with self._lock:
|
||||
return self._download_stats.copy()
|
||||
|
||||
def get_request_statistics(
|
||||
self, metric_name: str
|
||||
) -> Optional[Dict[str, float]]:
|
||||
"""Get statistics for a request timing metric.
|
||||
|
||||
Args:
|
||||
metric_name: Name of the metric to analyze.
|
||||
|
||||
Returns:
|
||||
Statistics including count, sum, mean, min, max.
|
||||
"""
|
||||
with self._lock:
|
||||
if metric_name not in self._request_timings:
|
||||
return None
|
||||
|
||||
timings = self._request_timings[metric_name]
|
||||
if not timings:
|
||||
return None
|
||||
|
||||
return {
|
||||
"count": len(timings),
|
||||
"sum": sum(timings),
|
||||
"mean": sum(timings) / len(timings),
|
||||
"min": min(timings),
|
||||
"max": max(timings),
|
||||
"p50": sorted(timings)[len(timings) // 2],
|
||||
"p99": sorted(timings)[int(len(timings) * 0.99)],
|
||||
}
|
||||
|
||||
def export_prometheus_format(self) -> str:
|
||||
"""Export metrics in Prometheus text format.
|
||||
|
||||
Returns:
|
||||
str: Prometheus format metrics.
|
||||
"""
|
||||
with self._lock:
|
||||
lines = []
|
||||
|
||||
for name, metric in self._metrics.items():
|
||||
# Add help text if available
|
||||
if metric.help_text:
|
||||
lines.append(f"# HELP {name} {metric.help_text}")
|
||||
lines.append(f"# TYPE {name} {metric.metric_type.value}")
|
||||
|
||||
# Format labels
|
||||
label_str = ""
|
||||
if metric.labels:
|
||||
label_pairs = [
|
||||
f'{k}="{v}"' for k, v in metric.labels.items()
|
||||
]
|
||||
label_str = "{" + ",".join(label_pairs) + "}"
|
||||
|
||||
# Add metric value
|
||||
lines.append(f"{name}{label_str} {metric.value}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def export_json(self) -> Dict[str, Any]:
|
||||
"""Export metrics as JSON.
|
||||
|
||||
Returns:
|
||||
dict: Metrics in JSON-serializable format.
|
||||
"""
|
||||
with self._lock:
|
||||
metrics_dict = {}
|
||||
|
||||
for name, metric in self._metrics.items():
|
||||
metrics_dict[name] = {
|
||||
"value": metric.value,
|
||||
"type": metric.metric_type.value,
|
||||
"labels": metric.labels,
|
||||
"timestamp": metric.timestamp.isoformat(),
|
||||
}
|
||||
|
||||
return {
|
||||
"metrics": metrics_dict,
|
||||
"downloads": self._download_stats,
|
||||
"request_timings": {
|
||||
name: self.get_request_statistics(name)
|
||||
for name in self._request_timings
|
||||
},
|
||||
}
|
||||
|
||||
def reset_metrics(self) -> None:
|
||||
"""Reset all collected metrics."""
|
||||
with self._lock:
|
||||
self._metrics.clear()
|
||||
self._request_timings.clear()
|
||||
self._download_stats = {
|
||||
"completed": 0,
|
||||
"failed": 0,
|
||||
"total_size_bytes": 0,
|
||||
}
|
||||
|
||||
def get_all_metrics(self) -> Dict[str, MetricValue]:
|
||||
"""Get all collected metrics.
|
||||
|
||||
Returns:
|
||||
dict: All metrics keyed by name.
|
||||
"""
|
||||
with self._lock:
|
||||
return self._metrics.copy()
|
||||
|
||||
|
||||
# Global metrics collector instance
|
||||
_metrics_collector: Optional[MetricsCollector] = None
|
||||
|
||||
|
||||
def get_metrics_collector() -> MetricsCollector:
|
||||
"""Get or create the global metrics collector instance.
|
||||
|
||||
Returns:
|
||||
MetricsCollector: The metrics collector instance.
|
||||
"""
|
||||
global _metrics_collector
|
||||
if _metrics_collector is None:
|
||||
_metrics_collector = MetricsCollector()
|
||||
return _metrics_collector
|
||||
|
||||
|
||||
class TimerContext:
|
||||
"""Context manager for timing operations."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
metric_name: str,
|
||||
timer_name: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
):
|
||||
"""Initialize timer context.
|
||||
|
||||
Args:
|
||||
metric_name: Name of the metric to record.
|
||||
timer_name: Optional name for the timer.
|
||||
labels: Optional labels for the metric.
|
||||
"""
|
||||
self.metric_name = metric_name
|
||||
self.timer_name = timer_name or metric_name
|
||||
self.labels = labels
|
||||
self.collector = get_metrics_collector()
|
||||
|
||||
def __enter__(self):
|
||||
"""Start the timer."""
|
||||
self.collector.start_timer(self.timer_name)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""End the timer and record the metric."""
|
||||
self.collector.end_timer(
|
||||
self.timer_name, self.metric_name, self.labels
|
||||
)
|
||||
361
src/server/utils/system.py
Normal file
361
src/server/utils/system.py
Normal file
@@ -0,0 +1,361 @@
|
||||
"""System utility functions for monitoring and management."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import psutil
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class DiskInfo:
|
||||
"""Information about disk usage."""
|
||||
|
||||
total_bytes: int
|
||||
used_bytes: int
|
||||
free_bytes: int
|
||||
percent_used: float
|
||||
path: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessInfo:
|
||||
"""Information about a process."""
|
||||
|
||||
pid: int
|
||||
name: str
|
||||
status: str
|
||||
cpu_percent: float
|
||||
memory_percent: float
|
||||
memory_mb: float
|
||||
create_time: datetime
|
||||
|
||||
|
||||
class SystemUtilities:
|
||||
"""Utilities for system monitoring and management."""
|
||||
|
||||
@staticmethod
|
||||
def get_disk_usage(path: str = "/") -> Optional[DiskInfo]:
|
||||
"""Get disk usage information.
|
||||
|
||||
Args:
|
||||
path: Path to check disk usage for.
|
||||
|
||||
Returns:
|
||||
DiskInfo: Disk usage information.
|
||||
"""
|
||||
try:
|
||||
usage = psutil.disk_usage(path)
|
||||
return DiskInfo(
|
||||
total_bytes=usage.total,
|
||||
used_bytes=usage.used,
|
||||
free_bytes=usage.free,
|
||||
percent_used=usage.percent,
|
||||
path=path,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get disk usage for {path}: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_all_disk_usage() -> List[DiskInfo]:
|
||||
"""Get disk usage for all mounted partitions.
|
||||
|
||||
Returns:
|
||||
list: List of DiskInfo for each partition.
|
||||
"""
|
||||
try:
|
||||
partitions = psutil.disk_partitions()
|
||||
disk_infos = []
|
||||
|
||||
for partition in partitions:
|
||||
try:
|
||||
usage = psutil.disk_usage(partition.mountpoint)
|
||||
disk_infos.append(
|
||||
DiskInfo(
|
||||
total_bytes=usage.total,
|
||||
used_bytes=usage.used,
|
||||
free_bytes=usage.free,
|
||||
percent_used=usage.percent,
|
||||
path=partition.mountpoint,
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to get usage for {partition.mountpoint}: {e}"
|
||||
)
|
||||
|
||||
return disk_infos
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get all disk usage: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def cleanup_directory(
|
||||
directory: str, pattern: str = "*", max_age_days: int = 30
|
||||
) -> int:
|
||||
"""Clean up files in a directory matching a pattern.
|
||||
|
||||
Args:
|
||||
directory: Directory to clean.
|
||||
pattern: File pattern to match (glob).
|
||||
max_age_days: Only delete files older than this.
|
||||
|
||||
Returns:
|
||||
int: Number of files deleted.
|
||||
"""
|
||||
try:
|
||||
from datetime import timedelta
|
||||
|
||||
path = Path(directory)
|
||||
if not path.exists():
|
||||
logger.warning(f"Directory not found: {directory}")
|
||||
return 0
|
||||
|
||||
deleted_count = 0
|
||||
cutoff_time = datetime.now() - timedelta(days=max_age_days)
|
||||
|
||||
for file_path in path.glob(pattern):
|
||||
if file_path.is_file():
|
||||
file_time = datetime.fromtimestamp(
|
||||
file_path.stat().st_mtime
|
||||
)
|
||||
if file_time < cutoff_time:
|
||||
try:
|
||||
file_path.unlink()
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted file: {file_path}")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete {file_path}: {e}"
|
||||
)
|
||||
|
||||
logger.info(f"Cleaned up {deleted_count} files from {directory}")
|
||||
return deleted_count
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup directory {directory}: {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def cleanup_empty_directories(directory: str) -> int:
|
||||
"""Remove empty directories.
|
||||
|
||||
Args:
|
||||
directory: Root directory to clean.
|
||||
|
||||
Returns:
|
||||
int: Number of directories deleted.
|
||||
"""
|
||||
try:
|
||||
path = Path(directory)
|
||||
if not path.exists():
|
||||
return 0
|
||||
|
||||
deleted_count = 0
|
||||
|
||||
# Walk from bottom to top to delete empty dirs
|
||||
for root, dirs, files in os.walk(directory, topdown=False):
|
||||
for dir_name in dirs:
|
||||
dir_path = Path(root) / dir_name
|
||||
try:
|
||||
if not os.listdir(dir_path):
|
||||
os.rmdir(dir_path)
|
||||
deleted_count += 1
|
||||
logger.debug(
|
||||
f"Deleted empty directory: {dir_path}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"Cannot delete {dir_path}: {e}")
|
||||
|
||||
logger.info(f"Cleaned up {deleted_count} empty directories")
|
||||
return deleted_count
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to cleanup empty directories: {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def get_directory_size(directory: str) -> int:
|
||||
"""Get total size of a directory.
|
||||
|
||||
Args:
|
||||
directory: Directory path.
|
||||
|
||||
Returns:
|
||||
int: Total size in bytes.
|
||||
"""
|
||||
try:
|
||||
path = Path(directory)
|
||||
if not path.exists():
|
||||
return 0
|
||||
|
||||
total_size = 0
|
||||
for entry in path.rglob("*"):
|
||||
if entry.is_file():
|
||||
total_size += entry.stat().st_size
|
||||
|
||||
return total_size
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get directory size for {directory}: {e}")
|
||||
return 0
|
||||
|
||||
@staticmethod
|
||||
def get_process_info(pid: Optional[int] = None) -> Optional[ProcessInfo]:
|
||||
"""Get information about a process.
|
||||
|
||||
Args:
|
||||
pid: Process ID. If None, uses current process.
|
||||
|
||||
Returns:
|
||||
ProcessInfo: Process information.
|
||||
"""
|
||||
try:
|
||||
if pid is None:
|
||||
pid = os.getpid()
|
||||
|
||||
process = psutil.Process(pid)
|
||||
with process.oneshot():
|
||||
return ProcessInfo(
|
||||
pid=process.pid,
|
||||
name=process.name(),
|
||||
status=process.status(),
|
||||
cpu_percent=process.cpu_percent(),
|
||||
memory_percent=process.memory_percent(),
|
||||
memory_mb=process.memory_info().rss / (1024 * 1024),
|
||||
create_time=datetime.fromtimestamp(
|
||||
process.create_time()
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get process info for {pid}: {e}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_all_processes() -> List[ProcessInfo]:
|
||||
"""Get information about all running processes.
|
||||
|
||||
Returns:
|
||||
list: List of ProcessInfo for each process.
|
||||
"""
|
||||
try:
|
||||
processes = []
|
||||
for proc in psutil.process_iter(
|
||||
["pid", "name", "status", "cpu_num", "memory_percent"]
|
||||
):
|
||||
try:
|
||||
info = SystemUtilities.get_process_info(proc.pid)
|
||||
if info:
|
||||
processes.append(info)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return processes
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get all processes: {e}")
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_system_info() -> Dict[str, Any]:
|
||||
"""Get comprehensive system information.
|
||||
|
||||
Returns:
|
||||
dict: System information.
|
||||
"""
|
||||
try:
|
||||
import platform
|
||||
|
||||
return {
|
||||
"platform": platform.platform(),
|
||||
"processor": platform.processor(),
|
||||
"cpu_count": psutil.cpu_count(logical=False),
|
||||
"cpu_count_logical": psutil.cpu_count(logical=True),
|
||||
"boot_time": datetime.fromtimestamp(
|
||||
psutil.boot_time()
|
||||
).isoformat(),
|
||||
"hostname": platform.node(),
|
||||
"python_version": platform.python_version(),
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get system info: {e}")
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def get_network_info() -> Dict[str, Any]:
|
||||
"""Get network information.
|
||||
|
||||
Returns:
|
||||
dict: Network statistics.
|
||||
"""
|
||||
try:
|
||||
net_io = psutil.net_io_counters()
|
||||
return {
|
||||
"bytes_sent": net_io.bytes_sent,
|
||||
"bytes_recv": net_io.bytes_recv,
|
||||
"packets_sent": net_io.packets_sent,
|
||||
"packets_recv": net_io.packets_recv,
|
||||
"errors_in": net_io.errin,
|
||||
"errors_out": net_io.errout,
|
||||
"dropped_in": net_io.dropin,
|
||||
"dropped_out": net_io.dropout,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get network info: {e}")
|
||||
return {}
|
||||
|
||||
@staticmethod
|
||||
def copy_file_atomic(
|
||||
src: str, dest: str, chunk_size: int = 1024 * 1024
|
||||
) -> bool:
|
||||
"""Copy a file atomically using temporary file.
|
||||
|
||||
Args:
|
||||
src: Source file path.
|
||||
dest: Destination file path.
|
||||
chunk_size: Size of chunks for copying.
|
||||
|
||||
Returns:
|
||||
bool: True if successful.
|
||||
"""
|
||||
try:
|
||||
src_path = Path(src)
|
||||
dest_path = Path(dest)
|
||||
|
||||
if not src_path.exists():
|
||||
logger.error(f"Source file not found: {src}")
|
||||
return False
|
||||
|
||||
# Create temporary file
|
||||
temp_path = dest_path.parent / f"{dest_path.name}.tmp"
|
||||
|
||||
# Copy to temporary file
|
||||
shutil.copyfile(src, temp_path)
|
||||
|
||||
# Atomic rename
|
||||
temp_path.replace(dest_path)
|
||||
|
||||
logger.debug(f"Atomically copied {src} to {dest}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to copy file {src} to {dest}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
# Global system utilities instance
|
||||
_system_utilities: Optional[SystemUtilities] = None
|
||||
|
||||
|
||||
def get_system_utilities() -> SystemUtilities:
|
||||
"""Get or create the global system utilities instance.
|
||||
|
||||
Returns:
|
||||
SystemUtilities: The system utilities instance.
|
||||
"""
|
||||
global _system_utilities
|
||||
if _system_utilities is None:
|
||||
_system_utilities = SystemUtilities()
|
||||
return _system_utilities
|
||||
Reference in New Issue
Block a user