Aniworld/src/server/web/middleware/error_handler.py

"""
Error Handling & Recovery System for AniWorld App

This module provides comprehensive error handling for network failures,
download errors, and system recovery mechanisms.
"""

import logging
import time
import functools
import threading
from typing import Callable, Any, Dict, Optional, List
from datetime import datetime, timedelta
import requests
import socket
import ssl
from urllib3.exceptions import ConnectionError, TimeoutError, ReadTimeoutError
from requests.exceptions import RequestException, ConnectionError as ReqConnectionError
from flask import jsonify
import os
import hashlib


class NetworkError(Exception):
    """Base class for network-related errors."""
    pass


class DownloadError(Exception):
    """Base class for download-related errors."""
    pass


class RetryableError(Exception):
    """Base class for errors that can be retried."""
    pass


class NonRetryableError(Exception):
    """Base class for errors that should not be retried."""
    pass


class ErrorRecoveryManager:
    """Manages error recovery strategies and retry mechanisms."""

    def __init__(self, max_retries: int = 3, base_delay: float = 1.0, max_delay: float = 60.0):
        self.max_retries = max_retries
        self.base_delay = base_delay
        self.max_delay = max_delay
        self.error_history: List[Dict] = []
        self.blacklisted_urls: Dict[str, datetime] = {}
        self.retry_counts: Dict[str, int] = {}
        self.logger = logging.getLogger(__name__)

    def is_network_error(self, error: Exception) -> bool:
        """Check if error is network-related."""
        network_errors = (
            ConnectionError, TimeoutError, ReadTimeoutError,
            ReqConnectionError, socket.timeout, socket.gaierror,
            ssl.SSLError, requests.exceptions.Timeout,
            requests.exceptions.ConnectionError
        )
        return isinstance(error, network_errors)

    def is_retryable_error(self, error: Exception) -> bool:
        """Determine if an error should be retried."""
        if isinstance(error, NonRetryableError):
            return False

        if isinstance(error, RetryableError):
            return True

        # Network errors are generally retryable
        if self.is_network_error(error):
            return True

        # HTTP status codes that are retryable
        if hasattr(error, 'response') and error.response:
            status_code = error.response.status_code
            retryable_codes = [408, 429, 500, 502, 503, 504]
            return status_code in retryable_codes

        return False

    def calculate_delay(self, attempt: int) -> float:
        """Calculate exponential backoff delay."""
        delay = self.base_delay * (2 ** (attempt - 1))
        return min(delay, self.max_delay)

    def log_error(self, error: Exception, context: str, attempt: int = None):
        """Log error with context information."""
        error_info = {
            'timestamp': datetime.now().isoformat(),
            'error_type': type(error).__name__,
            'error_message': str(error),
            'context': context,
            'attempt': attempt,
            'retryable': self.is_retryable_error(error)
        }

        self.error_history.append(error_info)

        # Keep only last 1000 errors
        if len(self.error_history) > 1000:
            self.error_history = self.error_history[-1000:]

        log_level = logging.WARNING if self.is_retryable_error(error) else logging.ERROR
        self.logger.log(log_level, f"Error in {context}: {error}", exc_info=True)

    def add_to_blacklist(self, url: str, duration_minutes: int = 30):
        """Add URL to temporary blacklist."""
        self.blacklisted_urls[url] = datetime.now() + timedelta(minutes=duration_minutes)

    def is_blacklisted(self, url: str) -> bool:
        """Check if URL is currently blacklisted."""
        if url in self.blacklisted_urls:
            if datetime.now() < self.blacklisted_urls[url]:
                return True
            else:
                del self.blacklisted_urls[url]
        return False

    def cleanup_blacklist(self):
        """Remove expired entries from blacklist."""
        now = datetime.now()
        expired_keys = [url for url, expiry in self.blacklisted_urls.items() if now >= expiry]
        for key in expired_keys:
            del self.blacklisted_urls[key]


class RetryMechanism:
    """Advanced retry mechanism with exponential backoff and jitter."""

    def __init__(self, recovery_manager: ErrorRecoveryManager):
        self.recovery_manager = recovery_manager
        self.logger = logging.getLogger(__name__)

    def retry_with_backoff(
        self,
        func: Callable,
        *args,
        max_retries: int = None,
        backoff_factor: float = 1.0,
        jitter: bool = True,
        retry_on: tuple = None,
        context: str = None,
        **kwargs
    ) -> Any:
        """
        Retry function with exponential backoff and jitter.

        Args:
            func: Function to retry
            max_retries: Maximum number of retries (uses recovery manager default if None)
            backoff_factor: Multiplier for backoff delay
            jitter: Add random jitter to prevent thundering herd
            retry_on: Tuple of exception types to retry on
            context: Context string for logging

        Returns:
            Function result

        Raises:
            Last exception if all retries fail
        """
        if max_retries is None:
            max_retries = self.recovery_manager.max_retries

        if context is None:
            context = f"{func.__name__}"

        last_exception = None

        for attempt in range(1, max_retries + 2):  # +1 for initial attempt
            try:
                return func(*args, **kwargs)
            except Exception as e:
                last_exception = e

                # Check if we should retry this error
                should_retry = (
                    retry_on is None and self.recovery_manager.is_retryable_error(e)
                ) or (
                    retry_on is not None and isinstance(e, retry_on)
                )

                if attempt > max_retries or not should_retry:
                    self.recovery_manager.log_error(e, context, attempt)
                    raise e

                # Calculate delay with jitter
                delay = self.recovery_manager.calculate_delay(attempt) * backoff_factor
                if jitter:
                    import random
                    delay *= (0.5 + random.random() * 0.5)  # Add 0-50% jitter

                self.recovery_manager.log_error(e, context, attempt)
                self.logger.info(f"Retrying {context} in {delay:.2f}s (attempt {attempt}/{max_retries})")

                time.sleep(delay)

        raise last_exception


class NetworkHealthChecker:
    """Monitor network connectivity and health."""

    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.connectivity_cache = {}
        self.cache_timeout = 60  # seconds

    def check_connectivity(self, host: str = "8.8.8.8", port: int = 53, timeout: float = 3.0) -> bool:
        """Check basic network connectivity."""
        cache_key = f"{host}:{port}"
        now = time.time()

        # Check cache
        if cache_key in self.connectivity_cache:
            timestamp, result = self.connectivity_cache[cache_key]
            if now - timestamp < self.cache_timeout:
                return result

        try:
            socket.setdefaulttimeout(timeout)
            socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect((host, port))
            result = True
        except Exception:
            result = False

        self.connectivity_cache[cache_key] = (now, result)
        return result

    def check_url_reachability(self, url: str, timeout: float = 10.0) -> bool:
        """Check if a specific URL is reachable."""
        try:
            response = requests.head(url, timeout=timeout, allow_redirects=True)
            return response.status_code < 400
        except Exception as e:
            self.logger.debug(f"URL {url} not reachable: {e}")
            return False

    def get_network_status(self) -> Dict[str, Any]:
        """Get comprehensive network status."""
        return {
            'basic_connectivity': self.check_connectivity(),
            'dns_resolution': self.check_connectivity("1.1.1.1", 53),
            'timestamp': datetime.now().isoformat()
        }


class FileCorruptionDetector:
    """Detect and handle file corruption."""

    def __init__(self):
        self.logger = logging.getLogger(__name__)

    def calculate_checksum(self, file_path: str, algorithm: str = 'md5') -> str:
        """Calculate file checksum."""
        hash_func = getattr(hashlib, algorithm)()

        try:
            with open(file_path, 'rb') as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    hash_func.update(chunk)
            return hash_func.hexdigest()
        except Exception as e:
            self.logger.error(f"Failed to calculate checksum for {file_path}: {e}")
            raise

    def verify_file_size(self, file_path: str, expected_size: int = None, min_size: int = 1024) -> bool:
        """Verify file has reasonable size."""
        try:
            actual_size = os.path.getsize(file_path)

            # Check minimum size
            if actual_size < min_size:
                self.logger.warning(f"File {file_path} too small: {actual_size} bytes")
                return False

            # Check expected size if provided
            if expected_size and abs(actual_size - expected_size) > expected_size * 0.1:  # 10% tolerance
                self.logger.warning(f"File {file_path} size mismatch: expected {expected_size}, got {actual_size}")
                return False

            return True
        except Exception as e:
            self.logger.error(f"Failed to verify file size for {file_path}: {e}")
            return False

    def is_valid_video_file(self, file_path: str) -> bool:
        """Basic validation for video files."""
        if not os.path.exists(file_path):
            return False

        # Check file size
        if not self.verify_file_size(file_path):
            return False

        # Check file extension
        video_extensions = {'.mp4', '.mkv', '.avi', '.mov', '.wmv', '.flv', '.webm'}
        ext = os.path.splitext(file_path)[1].lower()
        if ext not in video_extensions:
            self.logger.warning(f"File {file_path} has unexpected extension: {ext}")

        # Try to read first few bytes to check for valid headers
        try:
            with open(file_path, 'rb') as f:
                header = f.read(32)
                # Common video file signatures
                video_signatures = [
                    b'\x00\x00\x00\x18ftypmp4',  # MP4
                    b'\x1a\x45\xdf\xa3',          # MKV (Matroska)
                    b'RIFF',                      # AVI
                ]

                for sig in video_signatures:
                    if header.startswith(sig):
                        return True

                # If no specific signature matches, assume it's valid if size is reasonable
                return True
        except Exception as e:
            self.logger.error(f"Failed to read file header for {file_path}: {e}")
            return False


class RecoveryStrategies:
    """Implement various recovery strategies for different error types."""

    def __init__(self, recovery_manager: ErrorRecoveryManager):
        self.recovery_manager = recovery_manager
        self.retry_mechanism = RetryMechanism(recovery_manager)
        self.health_checker = NetworkHealthChecker()
        self.corruption_detector = FileCorruptionDetector()
        self.logger = logging.getLogger(__name__)

    def handle_network_failure(self, func: Callable, *args, **kwargs) -> Any:
        """Handle network failures with comprehensive recovery."""
        def recovery_wrapper():
            # Check basic connectivity first
            if not self.health_checker.check_connectivity():
                raise NetworkError("No internet connectivity")

            return func(*args, **kwargs)

        return self.retry_mechanism.retry_with_backoff(
            recovery_wrapper,
            max_retries=5,
            backoff_factor=1.5,
            context=f"network_operation_{func.__name__}",
            retry_on=(NetworkError, ConnectionError, TimeoutError)
        )

    def handle_download_failure(
        self,
        download_func: Callable,
        file_path: str,
        *args,
        **kwargs
    ) -> Any:
        """Handle download failures with corruption checking and resume support."""
        def download_with_verification():
            result = download_func(*args, **kwargs)

            # Verify downloaded file if it exists
            if os.path.exists(file_path):
                if not self.corruption_detector.is_valid_video_file(file_path):
                    self.logger.warning(f"Downloaded file appears corrupted: {file_path}")
                    # Remove corrupted file to force re-download
                    try:
                        os.remove(file_path)
                    except Exception as e:
                        self.logger.error(f"Failed to remove corrupted file {file_path}: {e}")
                    raise DownloadError("Downloaded file is corrupted")

            return result

        return self.retry_mechanism.retry_with_backoff(
            download_with_verification,
            max_retries=3,
            backoff_factor=2.0,
            context=f"download_{os.path.basename(file_path)}",
            retry_on=(DownloadError, NetworkError, ConnectionError)
        )


# Singleton instances
error_recovery_manager = ErrorRecoveryManager()
recovery_strategies = RecoveryStrategies(error_recovery_manager)
network_health_checker = NetworkHealthChecker()
file_corruption_detector = FileCorruptionDetector()


def with_error_recovery(max_retries: int = None, context: str = None):
    """Decorator for adding error recovery to functions."""
    def decorator(func: Callable) -> Callable:
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            return recovery_strategies.retry_mechanism.retry_with_backoff(
                func,
                *args,
                max_retries=max_retries,
                context=context or func.__name__,
                **kwargs
            )
        return wrapper
    return decorator


def handle_api_errors(func: Callable) -> Callable:
    """Decorator for consistent API error handling."""
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except NonRetryableError as e:
            error_recovery_manager.log_error(e, f"api_{func.__name__}")
            return jsonify({
                'status': 'error',
                'message': 'Operation failed',
                'error_type': 'non_retryable',
                'retry_suggested': False
            }), 400
        except RetryableError as e:
            error_recovery_manager.log_error(e, f"api_{func.__name__}")
            return jsonify({
                'status': 'error',
                'message': 'Temporary failure, please try again',
                'error_type': 'retryable',
                'retry_suggested': True
            }), 503
        except Exception as e:
            error_recovery_manager.log_error(e, f"api_{func.__name__}")
            return jsonify({
                'status': 'error',
                'message': 'An unexpected error occurred',
                'error_type': 'unknown',
                'retry_suggested': error_recovery_manager.is_retryable_error(e)
            }), 500
    return wrapper


# Export main components
__all__ = [
    'ErrorRecoveryManager',
    'RetryMechanism',
    'NetworkHealthChecker',
    'FileCorruptionDetector',
    'RecoveryStrategies',
    'NetworkError',
    'DownloadError',
    'RetryableError',
    'NonRetryableError',
    'with_error_recovery',
    'handle_api_errors',
    'error_recovery_manager',
    'recovery_strategies',
    'network_health_checker',
    'file_corruption_detector'
]