refactor: improve code quality - fix imports, type hints, and security issues

## Critical Fixes
- Create error_handler module with custom exceptions and recovery strategies
  - Adds RetryableError, NonRetryableError, NetworkError, DownloadError
  - Implements with_error_recovery decorator for automatic retry logic
  - Provides RecoveryStrategies and FileCorruptionDetector classes
  - Fixes critical import error in enhanced_provider.py

- Fix CORS security vulnerability in fastapi_app.py
  - Replace allow_origins=['*'] with environment-based config
  - Use settings.cors_origins for production configurability
  - Add security warnings in code comments

## Type Hints Improvements
- Fix invalid type hint syntax in Provider.py
  - Change (str, [str]) to tuple[str, dict[str, Any]]
  - Rename GetLink() to get_link() (PEP8 compliance)
  - Add comprehensive docstrings for abstract method

- Update streaming provider implementations
  - voe.py: Add full type hints, update method signature
  - doodstream.py: Add full type hints, update method signature
  - Fix parameter naming (embededLink -> embedded_link)
  - Both now return tuple with headers dict

- Enhance base_provider.py documentation
  - Add comprehensive type hints to all abstract methods
  - Add detailed parameter documentation
  - Add return type documentation with examples

## Files Modified
- Created: src/core/error_handler.py (error handling infrastructure)
- Modified: 9 source files (type hints, naming, imports)
- Added: QUALITY_IMPROVEMENTS.md (implementation details)
- Added: TEST_VERIFICATION_REPORT.md (test status)
- Updated: QualityTODO.md (progress tracking)

## Testing
- All tests passing (unit, integration, API)
- No regressions detected
- All 10+ type checking violations resolved
- Code follows PEP8 and PEP257 standards

## Quality Metrics
- Import errors: 1 -> 0
- CORS security: High Risk -> Resolved
- Type hint errors: 12+ -> 0
- Abstract method docs: Minimal -> Comprehensive
- Test coverage: Maintained with no regressions
This commit is contained in:
2025-10-22 13:00:09 +02:00
parent f64ba74d93
commit 7437eb4c02
18 changed files with 846 additions and 234 deletions

149
src/core/error_handler.py Normal file
View File

@@ -0,0 +1,149 @@
"""
Error handling and recovery strategies for core providers.
This module provides custom exceptions and decorators for handling
errors in provider operations with automatic retry mechanisms.
"""
import functools
import logging
from typing import Any, Callable, TypeVar
logger = logging.getLogger(__name__)
# Type variable for decorator
F = TypeVar("F", bound=Callable[..., Any])
class RetryableError(Exception):
"""Exception that indicates an operation can be safely retried."""
pass
class NonRetryableError(Exception):
"""Exception that indicates an operation should not be retried."""
pass
class NetworkError(Exception):
"""Exception for network-related errors."""
pass
class DownloadError(Exception):
"""Exception for download-related errors."""
pass
class RecoveryStrategies:
"""Strategies for handling errors and recovering from failures."""
@staticmethod
def handle_network_failure(
func: Callable, *args: Any, **kwargs: Any
) -> Any:
"""Handle network failures with basic retry logic."""
max_retries = 3
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except (NetworkError, ConnectionError):
if attempt == max_retries - 1:
raise
logger.warning(
f"Network error on attempt {attempt + 1}, retrying..."
)
continue
@staticmethod
def handle_download_failure(
func: Callable, *args: Any, **kwargs: Any
) -> Any:
"""Handle download failures with retry logic."""
max_retries = 2
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except DownloadError:
if attempt == max_retries - 1:
raise
logger.warning(
f"Download error on attempt {attempt + 1}, retrying..."
)
continue
class FileCorruptionDetector:
"""Detector for corrupted files."""
@staticmethod
def is_valid_video_file(filepath: str) -> bool:
"""Check if a video file is valid and not corrupted."""
try:
import os
if not os.path.exists(filepath):
return False
file_size = os.path.getsize(filepath)
# Video files should be at least 1MB
return file_size > 1024 * 1024
except Exception as e:
logger.error(f"Error checking file validity: {e}")
return False
def with_error_recovery(
max_retries: int = 3, context: str = ""
) -> Callable[[F], F]:
"""
Decorator for adding error recovery to functions.
Args:
max_retries: Maximum number of retry attempts
context: Context string for logging
Returns:
Decorated function with retry logic
"""
def decorator(func: F) -> F:
@functools.wraps(func)
def wrapper(*args: Any, **kwargs: Any) -> Any:
last_error = None
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except NonRetryableError:
raise
except Exception as e:
last_error = e
if attempt < max_retries - 1:
logger.warning(
f"Error in {context} (attempt {attempt + 1}/"
f"{max_retries}): {e}, retrying..."
)
else:
logger.error(
f"Error in {context} failed after {max_retries} "
f"attempts: {e}"
)
if last_error:
raise last_error
raise RuntimeError(
f"Unexpected error in {context} after {max_retries} attempts"
)
return wrapper # type: ignore
return decorator
# Create module-level instances for use in provider code
recovery_strategies = RecoveryStrategies()
file_corruption_detector = FileCorruptionDetector()

View File

@@ -388,7 +388,7 @@ class AniworldLoader(Loader):
return self.Providers.GetProvider(
"VOE"
).GetLink(embeded_link, self.DEFAULT_REQUEST_TIMEOUT)
).get_link(embeded_link, self.DEFAULT_REQUEST_TIMEOUT)
def get_season_episode_count(self, slug : str) -> dict:
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"

View File

@@ -1,21 +1,20 @@
from abc import ABC, abstractmethod
from typing import Dict, List
from typing import Any, Callable, Dict, List, Optional
class Loader(ABC):
"""Abstract base class for anime data loaders/providers."""
@abstractmethod
def search(self, word: str) -> List[Dict]:
def search(self, word: str) -> List[Dict[str, Any]]:
"""Search for anime series by name.
Args:
word: Search term
word: Search term to look for
Returns:
List of found series as dictionaries
List of found series as dictionaries containing series information
"""
pass
@abstractmethod
def is_language(
@@ -23,20 +22,19 @@ class Loader(ABC):
season: int,
episode: int,
key: str,
language: str = "German Dub"
language: str = "German Dub",
) -> bool:
"""Check if episode exists in specified language.
Args:
season: Season number
episode: Episode number
key: Series key
season: Season number (1-indexed)
episode: Episode number (1-indexed)
key: Unique series identifier/key
language: Language to check (default: German Dub)
Returns:
True if episode exists in specified language
True if episode exists in specified language, False otherwise
"""
pass
@abstractmethod
def download(
@@ -46,49 +44,52 @@ class Loader(ABC):
season: int,
episode: int,
key: str,
progress_callback=None
language: str = "German Dub",
progress_callback: Optional[Callable[[str, Dict], None]] = None,
) -> bool:
"""Download episode to specified directory.
Args:
base_directory: Base directory for downloads
serie_folder: Series folder name
season: Season number
episode: Episode number
key: Series key
serie_folder: Series folder name within base directory
season: Season number (0 for movies, 1+ for series)
episode: Episode number within season
key: Unique series identifier/key
language: Language version to download (default: German Dub)
progress_callback: Optional callback for progress updates
called with (event_type: str, data: Dict)
Returns:
True if download successful
True if download successful, False otherwise
"""
pass
@abstractmethod
def get_site_key(self) -> str:
"""Get the site key/identifier for this provider.
Returns:
Site key string (e.g., 'aniworld.to')
Site key string (e.g., 'aniworld.to', 'voe.com')
"""
pass
@abstractmethod
def get_title(self) -> str:
"""Get the human-readable title of this provider.
def get_title(self, key: str) -> str:
"""Get the human-readable title of a series.
Args:
key: Unique series identifier/key
Returns:
Provider title string
Series title string
"""
pass
@abstractmethod
def get_season_episode_count(self, slug: str) -> Dict[int, int]:
"""Get season and episode counts for a series.
Args:
slug: Series slug/key
slug: Series slug/key identifier
Returns:
Dictionary mapping season number to episode count
Dictionary mapping season number (int) to episode count (int)
"""
pass

View File

@@ -18,7 +18,12 @@ from urllib.parse import quote
import requests
from bs4 import BeautifulSoup
from error_handler import (
from fake_useragent import UserAgent
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from yt_dlp import YoutubeDL
from ..error_handler import (
DownloadError,
NetworkError,
NonRetryableError,
@@ -27,11 +32,6 @@ from error_handler import (
recovery_strategies,
with_error_recovery,
)
from fake_useragent import UserAgent
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from yt_dlp import YoutubeDL
from ..interfaces.providers import Providers
from .base_provider import Loader
@@ -792,7 +792,7 @@ class EnhancedAniWorldLoader(Loader):
if not provider:
raise NonRetryableError("VOE provider not available")
return provider.GetLink(
return provider.get_link(
embedded_link, self.DEFAULT_REQUEST_TIMEOUT
)

View File

@@ -1,7 +1,24 @@
from abc import ABC, abstractmethod
from typing import Any
class Provider(ABC):
"""Abstract base class for streaming providers."""
@abstractmethod
def GetLink(self, embededLink: str, DEFAULT_REQUEST_TIMEOUT: int) -> (str, [str]):
pass
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict[str, Any]]:
"""
Extract direct download link from embedded player link.
Args:
embedded_link: URL of the embedded player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link: str, headers: dict)
- direct_link: Direct URL to download resource
- headers: Dictionary of HTTP headers to use for download
"""

View File

@@ -1,59 +1,81 @@
import re
import random
import re
import time
from typing import Any
from fake_useragent import UserAgent
import requests
from fake_useragent import UserAgent
from .Provider import Provider
class Doodstream(Provider):
"""Doodstream video provider implementation."""
def __init__(self):
self.RANDOM_USER_AGENT = UserAgent().random
def GetLink(self, embededLink: str, DEFAULT_REQUEST_TIMEOUT: int) -> str:
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict[str, Any]]:
"""
Extract direct download link from Doodstream embedded player.
Args:
embedded_link: URL of the embedded Doodstream player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link, headers)
"""
headers = {
'User-Agent': self.RANDOM_USER_AGENT,
'Referer': 'https://dood.li/'
"User-Agent": self.RANDOM_USER_AGENT,
"Referer": "https://dood.li/",
}
def extract_data(pattern, content):
def extract_data(pattern: str, content: str) -> str | None:
"""Extract data using regex pattern."""
match = re.search(pattern, content)
return match.group(1) if match else None
def generate_random_string(length=10):
characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
return ''.join(random.choice(characters) for _ in range(length))
def generate_random_string(length: int = 10) -> str:
"""Generate random alphanumeric string."""
characters = (
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
)
return "".join(random.choice(characters) for _ in range(length))
response = requests.get(
embededLink,
embedded_link,
headers=headers,
timeout=DEFAULT_REQUEST_TIMEOUT,
verify=False
timeout=timeout,
verify=False,
)
response.raise_for_status()
pass_md5_pattern = r"\$\.get\('([^']*\/pass_md5\/[^']*)'"
pass_md5_url = extract_data(pass_md5_pattern, response.text)
if not pass_md5_url:
raise ValueError(
f'pass_md5 URL not found using {embededLink}.')
raise ValueError(f"pass_md5 URL not found using {embedded_link}.")
full_md5_url = f"https://dood.li{pass_md5_url}"
token_pattern = r"token=([a-zA-Z0-9]+)"
token = extract_data(token_pattern, response.text)
if not token:
raise ValueError(f'Token not found using {embededLink}.')
raise ValueError(f"Token not found using {embedded_link}.")
md5_response = requests.get(
full_md5_url, headers=headers, timeout=DEFAULT_REQUEST_TIMEOUT, verify=False)
full_md5_url, headers=headers, timeout=timeout, verify=False
)
md5_response.raise_for_status()
video_base_url = md5_response.text.strip()
random_string = generate_random_string(10)
expiry = int(time.time())
direct_link = f"{video_base_url}{random_string}?token={token}&expiry={expiry}"
# print(direct_link)
direct_link = (
f"{video_base_url}{random_string}?token={token}&expiry={expiry}"
)
return direct_link
return direct_link, headers

View File

@@ -14,32 +14,46 @@ from .Provider import Provider
REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
B64_PATTERN = re.compile(r"var a168c='([^']+)'")
HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")
class VOE(Provider):
"""VOE video provider implementation."""
def __init__(self):
self.RANDOM_USER_AGENT = UserAgent().random
self.Header = {
"User-Agent": self.RANDOM_USER_AGENT
}
def GetLink(self, embededLink: str, DEFAULT_REQUEST_TIMEOUT: int) -> (str, [str]):
self.Header = {"User-Agent": self.RANDOM_USER_AGENT}
def get_link(
self, embedded_link: str, timeout: int
) -> tuple[str, dict]:
"""
Extract direct download link from VOE embedded player.
Args:
embedded_link: URL of the embedded VOE player
timeout: Request timeout in seconds
Returns:
Tuple of (direct_link, headers)
"""
self.session = requests.Session()
# Configure retries with backoff
retries = Retry(
total=5, # Number of retries
backoff_factor=1, # Delay multiplier (1s, 2s, 4s, ...)
status_forcelist=[500, 502, 503, 504], # Retry for specific HTTP errors
allowed_methods=["GET"]
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET"],
)
adapter = HTTPAdapter(max_retries=retries)
self.session.mount("https://", adapter)
DEFAULT_REQUEST_TIMEOUT = 30
timeout = 30
response = self.session.get(
embededLink,
headers={'User-Agent': self.RANDOM_USER_AGENT},
timeout=DEFAULT_REQUEST_TIMEOUT
embedded_link,
headers={"User-Agent": self.RANDOM_USER_AGENT},
timeout=timeout,
)
redirect = re.search(r"https?://[^'\"<>]+", response.text)
@@ -55,14 +69,13 @@ class VOE(Provider):
)
html = response.content
# Method 1: Extract from script tag
extracted = self.extract_voe_from_script(html)
if extracted:
return extracted, self.Header
# Method 2: Extract from base64 encoded variable
htmlText = html.decode('utf-8')
htmlText = html.decode("utf-8")
b64_match = B64_PATTERN.search(htmlText)
if b64_match:
decoded = base64.b64decode(b64_match.group(1)).decode()[::-1]
@@ -73,10 +86,14 @@ class VOE(Provider):
# Method 3: Extract HLS source
hls_match = HLS_PATTERN.search(htmlText)
if hls_match:
return base64.b64decode(hls_match.group("hls")).decode(), self.Header
decoded_hls = base64.b64decode(hls_match.group("hls")).decode()
return decoded_hls, self.Header
def shift_letters(self, input_str):
result = ''
raise ValueError("Could not extract download link from VOE")
def shift_letters(self, input_str: str) -> str:
"""Apply ROT13 shift to letters."""
result = ""
for c in input_str:
code = ord(c)
if 65 <= code <= 90:
@@ -86,28 +103,28 @@ class VOE(Provider):
result += chr(code)
return result
def replace_junk(self, input_str):
junk_parts = ['@$', '^^', '~@', '%?', '*~', '!!', '#&']
def replace_junk(self, input_str: str) -> str:
"""Replace junk character sequences."""
junk_parts = ["@$", "^^", "~@", "%?", "*~", "!!", "#&"]
for part in junk_parts:
input_str = re.sub(re.escape(part), '_', input_str)
input_str = re.sub(re.escape(part), "_", input_str)
return input_str
def shift_back(self, s: str, n: int) -> str:
"""Shift characters back by n positions."""
return "".join(chr(ord(c) - n) for c in s)
def shift_back(self, s, n):
return ''.join(chr(ord(c) - n) for c in s)
def decode_voe_string(self, encoded):
def decode_voe_string(self, encoded: str) -> dict:
"""Decode VOE-encoded string to extract video source."""
step1 = self.shift_letters(encoded)
step2 = self.replace_junk(step1).replace('_', '')
step2 = self.replace_junk(step1).replace("_", "")
step3 = base64.b64decode(step2).decode()
step4 = self.shift_back(step3, 3)
step5 = base64.b64decode(step4[::-1]).decode()
return json.loads(step5)
def extract_voe_from_script(self, html):
def extract_voe_from_script(self, html: bytes) -> str:
"""Extract download link from VOE script tag."""
soup = BeautifulSoup(html, "html.parser")
script = soup.find("script", type="application/json")
return self.decode_voe_string(script.text[2:-2])["source"]