462 lines
16 KiB
Python
462 lines
16 KiB
Python
"""
|
|
Error Handling & Recovery System for AniWorld App
|
|
|
|
This module provides comprehensive error handling for network failures,
|
|
download errors, and system recovery mechanisms.
|
|
"""
|
|
|
|
import logging
|
|
import time
|
|
import functools
|
|
import threading
|
|
from typing import Callable, Any, Dict, Optional, List
|
|
from datetime import datetime, timedelta
|
|
import requests
|
|
import socket
|
|
import ssl
|
|
from urllib3.exceptions import ConnectionError, TimeoutError, ReadTimeoutError
|
|
from requests.exceptions import RequestException, ConnectionError as ReqConnectionError
|
|
from flask import jsonify
|
|
import os
|
|
import hashlib
|
|
|
|
|
|
class NetworkError(Exception):
|
|
"""Base class for network-related errors."""
|
|
pass
|
|
|
|
|
|
class DownloadError(Exception):
|
|
"""Base class for download-related errors."""
|
|
pass
|
|
|
|
|
|
class RetryableError(Exception):
|
|
"""Base class for errors that can be retried."""
|
|
pass
|
|
|
|
|
|
class NonRetryableError(Exception):
|
|
"""Base class for errors that should not be retried."""
|
|
pass
|
|
|
|
|
|
class ErrorRecoveryManager:
|
|
"""Manages error recovery strategies and retry mechanisms."""
|
|
|
|
def __init__(self, max_retries: int = 3, base_delay: float = 1.0, max_delay: float = 60.0):
|
|
self.max_retries = max_retries
|
|
self.base_delay = base_delay
|
|
self.max_delay = max_delay
|
|
self.error_history: List[Dict] = []
|
|
self.blacklisted_urls: Dict[str, datetime] = {}
|
|
self.retry_counts: Dict[str, int] = {}
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def is_network_error(self, error: Exception) -> bool:
|
|
"""Check if error is network-related."""
|
|
network_errors = (
|
|
ConnectionError, TimeoutError, ReadTimeoutError,
|
|
ReqConnectionError, socket.timeout, socket.gaierror,
|
|
ssl.SSLError, requests.exceptions.Timeout,
|
|
requests.exceptions.ConnectionError
|
|
)
|
|
return isinstance(error, network_errors)
|
|
|
|
def is_retryable_error(self, error: Exception) -> bool:
|
|
"""Determine if an error should be retried."""
|
|
if isinstance(error, NonRetryableError):
|
|
return False
|
|
|
|
if isinstance(error, RetryableError):
|
|
return True
|
|
|
|
# Network errors are generally retryable
|
|
if self.is_network_error(error):
|
|
return True
|
|
|
|
# HTTP status codes that are retryable
|
|
if hasattr(error, 'response') and error.response:
|
|
status_code = error.response.status_code
|
|
retryable_codes = [408, 429, 500, 502, 503, 504]
|
|
return status_code in retryable_codes
|
|
|
|
return False
|
|
|
|
def calculate_delay(self, attempt: int) -> float:
|
|
"""Calculate exponential backoff delay."""
|
|
delay = self.base_delay * (2 ** (attempt - 1))
|
|
return min(delay, self.max_delay)
|
|
|
|
def log_error(self, error: Exception, context: str, attempt: int = None):
|
|
"""Log error with context information."""
|
|
error_info = {
|
|
'timestamp': datetime.now().isoformat(),
|
|
'error_type': type(error).__name__,
|
|
'error_message': str(error),
|
|
'context': context,
|
|
'attempt': attempt,
|
|
'retryable': self.is_retryable_error(error)
|
|
}
|
|
|
|
self.error_history.append(error_info)
|
|
|
|
# Keep only last 1000 errors
|
|
if len(self.error_history) > 1000:
|
|
self.error_history = self.error_history[-1000:]
|
|
|
|
log_level = logging.WARNING if self.is_retryable_error(error) else logging.ERROR
|
|
self.logger.log(log_level, f"Error in {context}: {error}", exc_info=True)
|
|
|
|
def add_to_blacklist(self, url: str, duration_minutes: int = 30):
|
|
"""Add URL to temporary blacklist."""
|
|
self.blacklisted_urls[url] = datetime.now() + timedelta(minutes=duration_minutes)
|
|
|
|
def is_blacklisted(self, url: str) -> bool:
|
|
"""Check if URL is currently blacklisted."""
|
|
if url in self.blacklisted_urls:
|
|
if datetime.now() < self.blacklisted_urls[url]:
|
|
return True
|
|
else:
|
|
del self.blacklisted_urls[url]
|
|
return False
|
|
|
|
def cleanup_blacklist(self):
|
|
"""Remove expired entries from blacklist."""
|
|
now = datetime.now()
|
|
expired_keys = [url for url, expiry in self.blacklisted_urls.items() if now >= expiry]
|
|
for key in expired_keys:
|
|
del self.blacklisted_urls[key]
|
|
|
|
|
|
class RetryMechanism:
|
|
"""Advanced retry mechanism with exponential backoff and jitter."""
|
|
|
|
def __init__(self, recovery_manager: ErrorRecoveryManager):
|
|
self.recovery_manager = recovery_manager
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def retry_with_backoff(
|
|
self,
|
|
func: Callable,
|
|
*args,
|
|
max_retries: int = None,
|
|
backoff_factor: float = 1.0,
|
|
jitter: bool = True,
|
|
retry_on: tuple = None,
|
|
context: str = None,
|
|
**kwargs
|
|
) -> Any:
|
|
"""
|
|
Retry function with exponential backoff and jitter.
|
|
|
|
Args:
|
|
func: Function to retry
|
|
max_retries: Maximum number of retries (uses recovery manager default if None)
|
|
backoff_factor: Multiplier for backoff delay
|
|
jitter: Add random jitter to prevent thundering herd
|
|
retry_on: Tuple of exception types to retry on
|
|
context: Context string for logging
|
|
|
|
Returns:
|
|
Function result
|
|
|
|
Raises:
|
|
Last exception if all retries fail
|
|
"""
|
|
if max_retries is None:
|
|
max_retries = self.recovery_manager.max_retries
|
|
|
|
if context is None:
|
|
context = f"{func.__name__}"
|
|
|
|
last_exception = None
|
|
|
|
for attempt in range(1, max_retries + 2): # +1 for initial attempt
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except Exception as e:
|
|
last_exception = e
|
|
|
|
# Check if we should retry this error
|
|
should_retry = (
|
|
retry_on is None and self.recovery_manager.is_retryable_error(e)
|
|
) or (
|
|
retry_on is not None and isinstance(e, retry_on)
|
|
)
|
|
|
|
if attempt > max_retries or not should_retry:
|
|
self.recovery_manager.log_error(e, context, attempt)
|
|
raise e
|
|
|
|
# Calculate delay with jitter
|
|
delay = self.recovery_manager.calculate_delay(attempt) * backoff_factor
|
|
if jitter:
|
|
import random
|
|
delay *= (0.5 + random.random() * 0.5) # Add 0-50% jitter
|
|
|
|
self.recovery_manager.log_error(e, context, attempt)
|
|
self.logger.info(f"Retrying {context} in {delay:.2f}s (attempt {attempt}/{max_retries})")
|
|
|
|
time.sleep(delay)
|
|
|
|
raise last_exception
|
|
|
|
|
|
class NetworkHealthChecker:
|
|
"""Monitor network connectivity and health."""
|
|
|
|
def __init__(self):
|
|
self.logger = logging.getLogger(__name__)
|
|
self.connectivity_cache = {}
|
|
self.cache_timeout = 60 # seconds
|
|
|
|
def check_connectivity(self, host: str = "8.8.8.8", port: int = 53, timeout: float = 3.0) -> bool:
|
|
"""Check basic network connectivity."""
|
|
cache_key = f"{host}:{port}"
|
|
now = time.time()
|
|
|
|
# Check cache
|
|
if cache_key in self.connectivity_cache:
|
|
timestamp, result = self.connectivity_cache[cache_key]
|
|
if now - timestamp < self.cache_timeout:
|
|
return result
|
|
|
|
try:
|
|
socket.setdefaulttimeout(timeout)
|
|
socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port))
|
|
result = True
|
|
except Exception:
|
|
result = False
|
|
|
|
self.connectivity_cache[cache_key] = (now, result)
|
|
return result
|
|
|
|
def check_url_reachability(self, url: str, timeout: float = 10.0) -> bool:
|
|
"""Check if a specific URL is reachable."""
|
|
try:
|
|
response = requests.head(url, timeout=timeout, allow_redirects=True)
|
|
return response.status_code < 400
|
|
except Exception as e:
|
|
self.logger.debug(f"URL {url} not reachable: {e}")
|
|
return False
|
|
|
|
def get_network_status(self) -> Dict[str, Any]:
|
|
"""Get comprehensive network status."""
|
|
return {
|
|
'basic_connectivity': self.check_connectivity(),
|
|
'dns_resolution': self.check_connectivity("1.1.1.1", 53),
|
|
'timestamp': datetime.now().isoformat()
|
|
}
|
|
|
|
|
|
class FileCorruptionDetector:
|
|
"""Detect and handle file corruption."""
|
|
|
|
def __init__(self):
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def calculate_checksum(self, file_path: str, algorithm: str = 'md5') -> str:
|
|
"""Calculate file checksum."""
|
|
hash_func = getattr(hashlib, algorithm)()
|
|
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
for chunk in iter(lambda: f.read(4096), b""):
|
|
hash_func.update(chunk)
|
|
return hash_func.hexdigest()
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to calculate checksum for {file_path}: {e}")
|
|
raise
|
|
|
|
def verify_file_size(self, file_path: str, expected_size: int = None, min_size: int = 1024) -> bool:
|
|
"""Verify file has reasonable size."""
|
|
try:
|
|
actual_size = os.path.getsize(file_path)
|
|
|
|
# Check minimum size
|
|
if actual_size < min_size:
|
|
self.logger.warning(f"File {file_path} too small: {actual_size} bytes")
|
|
return False
|
|
|
|
# Check expected size if provided
|
|
if expected_size and abs(actual_size - expected_size) > expected_size * 0.1: # 10% tolerance
|
|
self.logger.warning(f"File {file_path} size mismatch: expected {expected_size}, got {actual_size}")
|
|
return False
|
|
|
|
return True
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to verify file size for {file_path}: {e}")
|
|
return False
|
|
|
|
def is_valid_video_file(self, file_path: str) -> bool:
|
|
"""Basic validation for video files."""
|
|
if not os.path.exists(file_path):
|
|
return False
|
|
|
|
# Check file size
|
|
if not self.verify_file_size(file_path):
|
|
return False
|
|
|
|
# Check file extension
|
|
video_extensions = {'.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.webm'}
|
|
ext = os.path.splitext(file_path)[1].lower()
|
|
if ext not in video_extensions:
|
|
self.logger.warning(f"File {file_path} has unexpected extension: {ext}")
|
|
|
|
# Try to read first few bytes to check for valid headers
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
header = f.read(32)
|
|
# Common video file signatures
|
|
video_signatures = [
|
|
b'\x00\x00\x00\x18ftypmp4', # MP4
|
|
b'\x1a\x45\xdf\xa3', # MKV (Matroska)
|
|
b'RIFF', # AVI
|
|
]
|
|
|
|
for sig in video_signatures:
|
|
if header.startswith(sig):
|
|
return True
|
|
|
|
# If no specific signature matches, assume it's valid if size is reasonable
|
|
return True
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to read file header for {file_path}: {e}")
|
|
return False
|
|
|
|
|
|
class RecoveryStrategies:
|
|
"""Implement various recovery strategies for different error types."""
|
|
|
|
def __init__(self, recovery_manager: ErrorRecoveryManager):
|
|
self.recovery_manager = recovery_manager
|
|
self.retry_mechanism = RetryMechanism(recovery_manager)
|
|
self.health_checker = NetworkHealthChecker()
|
|
self.corruption_detector = FileCorruptionDetector()
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
def handle_network_failure(self, func: Callable, *args, **kwargs) -> Any:
|
|
"""Handle network failures with comprehensive recovery."""
|
|
def recovery_wrapper():
|
|
# Check basic connectivity first
|
|
if not self.health_checker.check_connectivity():
|
|
raise NetworkError("No internet connectivity")
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
return self.retry_mechanism.retry_with_backoff(
|
|
recovery_wrapper,
|
|
max_retries=5,
|
|
backoff_factor=1.5,
|
|
context=f"network_operation_{func.__name__}",
|
|
retry_on=(NetworkError, ConnectionError, TimeoutError)
|
|
)
|
|
|
|
def handle_download_failure(
|
|
self,
|
|
download_func: Callable,
|
|
file_path: str,
|
|
*args,
|
|
**kwargs
|
|
) -> Any:
|
|
"""Handle download failures with corruption checking and resume support."""
|
|
def download_with_verification():
|
|
result = download_func(*args, **kwargs)
|
|
|
|
# Verify downloaded file if it exists
|
|
if os.path.exists(file_path):
|
|
if not self.corruption_detector.is_valid_video_file(file_path):
|
|
self.logger.warning(f"Downloaded file appears corrupted: {file_path}")
|
|
# Remove corrupted file to force re-download
|
|
try:
|
|
os.remove(file_path)
|
|
except Exception as e:
|
|
self.logger.error(f"Failed to remove corrupted file {file_path}: {e}")
|
|
raise DownloadError("Downloaded file is corrupted")
|
|
|
|
return result
|
|
|
|
return self.retry_mechanism.retry_with_backoff(
|
|
download_with_verification,
|
|
max_retries=3,
|
|
backoff_factor=2.0,
|
|
context=f"download_{os.path.basename(file_path)}",
|
|
retry_on=(DownloadError, NetworkError, ConnectionError)
|
|
)
|
|
|
|
|
|
# Singleton instances
|
|
error_recovery_manager = ErrorRecoveryManager()
|
|
recovery_strategies = RecoveryStrategies(error_recovery_manager)
|
|
network_health_checker = NetworkHealthChecker()
|
|
file_corruption_detector = FileCorruptionDetector()
|
|
|
|
|
|
def with_error_recovery(max_retries: int = None, context: str = None):
|
|
"""Decorator for adding error recovery to functions."""
|
|
def decorator(func: Callable) -> Callable:
|
|
@functools.wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
return recovery_strategies.retry_mechanism.retry_with_backoff(
|
|
func,
|
|
*args,
|
|
max_retries=max_retries,
|
|
context=context or func.__name__,
|
|
**kwargs
|
|
)
|
|
return wrapper
|
|
return decorator
|
|
|
|
|
|
def handle_api_errors(func: Callable) -> Callable:
|
|
"""Decorator for consistent API error handling."""
|
|
@functools.wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
try:
|
|
return func(*args, **kwargs)
|
|
except NonRetryableError as e:
|
|
error_recovery_manager.log_error(e, f"api_{func.__name__}")
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Operation failed',
|
|
'error_type': 'non_retryable',
|
|
'retry_suggested': False
|
|
}), 400
|
|
except RetryableError as e:
|
|
error_recovery_manager.log_error(e, f"api_{func.__name__}")
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'Temporary failure, please try again',
|
|
'error_type': 'retryable',
|
|
'retry_suggested': True
|
|
}), 503
|
|
except Exception as e:
|
|
error_recovery_manager.log_error(e, f"api_{func.__name__}")
|
|
return jsonify({
|
|
'status': 'error',
|
|
'message': 'An unexpected error occurred',
|
|
'error_type': 'unknown',
|
|
'retry_suggested': error_recovery_manager.is_retryable_error(e)
|
|
}), 500
|
|
return wrapper
|
|
|
|
|
|
# Export main components
|
|
__all__ = [
|
|
'ErrorRecoveryManager',
|
|
'RetryMechanism',
|
|
'NetworkHealthChecker',
|
|
'FileCorruptionDetector',
|
|
'RecoveryStrategies',
|
|
'NetworkError',
|
|
'DownloadError',
|
|
'RetryableError',
|
|
'NonRetryableError',
|
|
'with_error_recovery',
|
|
'handle_api_errors',
|
|
'error_recovery_manager',
|
|
'recovery_strategies',
|
|
'network_health_checker',
|
|
'file_corruption_detector'
|
|
] |