cleanup
This commit is contained in:
@@ -577,6 +577,10 @@ class SeriesApp:
|
||||
"""
|
||||
return self.series_list
|
||||
|
||||
def refresh_series_list(self) -> None:
|
||||
"""Reload the cached series list from the underlying data store."""
|
||||
self.__InitList__()
|
||||
|
||||
def get_operation_status(self) -> OperationStatus:
|
||||
"""
|
||||
Get the current operation status.
|
||||
|
||||
@@ -5,14 +5,12 @@ This module extends the original AniWorldLoader with comprehensive
|
||||
error handling, retry mechanisms, and recovery strategies.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import html
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
@@ -148,13 +146,28 @@ class EnhancedAniWorldLoader(Loader):
|
||||
"""Create a session with robust retry and error handling configuration."""
|
||||
session = requests.Session()
|
||||
|
||||
# Enhanced retry strategy
|
||||
# Configure retries so transient network problems are retried while we
|
||||
# still fail fast on permanent errors. The status codes cover
|
||||
# timeouts, rate limits, and the Cloudflare-origin 52x responses that
|
||||
# AniWorld occasionally emits under load.
|
||||
retries = Retry(
|
||||
total=5,
|
||||
backoff_factor=2, # More aggressive backoff
|
||||
status_forcelist=[408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524],
|
||||
status_forcelist=[
|
||||
408,
|
||||
429,
|
||||
500,
|
||||
502,
|
||||
503,
|
||||
504,
|
||||
520,
|
||||
521,
|
||||
522,
|
||||
523,
|
||||
524,
|
||||
],
|
||||
allowed_methods=["GET", "POST", "HEAD"],
|
||||
raise_on_status=False # Handle status errors manually
|
||||
raise_on_status=False, # Handle status errors manually
|
||||
)
|
||||
|
||||
adapter = HTTPAdapter(
|
||||
@@ -255,9 +268,9 @@ class EnhancedAniWorldLoader(Loader):
|
||||
|
||||
clean_text = response_text.strip()
|
||||
|
||||
# Try multiple parsing strategies. We progressively relax the parsing
|
||||
# requirements to handle HTML-escaped payloads, stray BOM markers, and
|
||||
# control characters injected by the upstream service.
|
||||
# Attempt increasingly permissive parsing strategies to cope with
|
||||
# upstream anomalies such as HTML escaping, stray BOM markers, and
|
||||
# injected control characters.
|
||||
parsing_strategies = [
|
||||
lambda text: json.loads(html.unescape(text)),
|
||||
lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
|
||||
|
||||
@@ -21,4 +21,7 @@ class Provider(ABC):
|
||||
- direct_link: Direct URL to download resource
|
||||
- headers: Dictionary of HTTP headers to use for download
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Streaming providers must implement get_link"
|
||||
)
|
||||
|
||||
|
||||
@@ -8,10 +8,14 @@ from aniworld import config
|
||||
# import jsbeautifier.unpackers.packer as packer
|
||||
|
||||
|
||||
# Match the embedded ``iframe`` pointing to the actual Filemoon player.
|
||||
REDIRECT_REGEX = re.compile(
|
||||
r'<iframe *(?:[^>]+ )?src=(?:\'([^\']+)\'|"([^"]+)")[^>]*>')
|
||||
# The player HTML hides an ``eval`` wrapped script with ``data-cfasync``
|
||||
# disabled; capture the entire script body for unpacking.
|
||||
SCRIPT_REGEX = re.compile(
|
||||
r'(?s)<script\s+[^>]*?data-cfasync=["\']?false["\']?[^>]*>(.+?)</script>')
|
||||
# Extract the direct ``file:"<m3u8>"`` URL once the script is unpacked.
|
||||
VIDEO_URL_REGEX = re.compile(r'file:\s*"([^"]+\.m3u8[^"]*)"')
|
||||
|
||||
# TODO Implement this script fully
|
||||
|
||||
@@ -19,6 +19,8 @@ def fetch_page_content(url):
|
||||
|
||||
|
||||
def extract_video_data(page_content):
|
||||
# ``videos_manifest`` lines embed a JSON blob with the stream metadata
|
||||
# inside a larger script tag; grab that entire line for further parsing.
|
||||
match = re.search(r'^.*videos_manifest.*$', page_content, re.MULTILINE)
|
||||
if not match:
|
||||
raise ValueError("Failed to extract video manifest from the response.")
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from aniworld import config
|
||||
|
||||
|
||||
@@ -25,6 +24,8 @@ def get_direct_link_from_luluvdo(embeded_luluvdo_link, arguments=None):
|
||||
timeout=config.DEFAULT_REQUEST_TIMEOUT)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Capture the ``file:"<url>"`` assignment embedded in the player
|
||||
# configuration so we can return the stream URL.
|
||||
pattern = r'file:\s*"([^"]+)"'
|
||||
matches = re.findall(pattern, str(response.text))
|
||||
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import re
|
||||
import base64
|
||||
import requests
|
||||
import re
|
||||
|
||||
import requests
|
||||
from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
|
||||
|
||||
# Capture the base64 payload hidden inside the obfuscated ``_0x5opu234``
|
||||
# assignment. The named group lets us pull out the encoded blob directly.
|
||||
SPEEDFILES_PATTERN = re.compile(r'var _0x5opu234 = "(?P<encoded_data>.*?)";')
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
|
||||
@@ -16,6 +15,8 @@ def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
scripts = soup.find_all('script')
|
||||
|
||||
# Match the ``file:"<url>"`` assignment inside the obfuscated player
|
||||
# script so we can recover the direct MP4 source URL.
|
||||
file_link_pattern = r'file:\s*"(https?://.*?)"'
|
||||
|
||||
for script in scripts:
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
import re
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:
|
||||
@@ -17,6 +16,8 @@ def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:
|
||||
|
||||
for tag in soup.find_all('script'):
|
||||
if 'sourcesCode:' in tag.text:
|
||||
# Script blocks contain a ``sourcesCode`` object with ``src``
|
||||
# assignments; extract the first URL between the quotes.
|
||||
match = re.search(r'src: "(.*?)"', tag.text)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
@@ -10,7 +10,13 @@ from urllib3.util.retry import Retry
|
||||
|
||||
from .Provider import Provider
|
||||
|
||||
# Compile regex patterns once for better performance
|
||||
# Precompile the different pattern matchers used during extraction:
|
||||
# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
|
||||
# script so we can follow the provider's hand-off.
|
||||
# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
|
||||
# field once decoded.
|
||||
# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
|
||||
# no direct MP4 link is present.
|
||||
REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
|
||||
B64_PATTERN = re.compile(r"var a168c='([^']+)'")
|
||||
HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")
|
||||
|
||||
Reference in New Issue
Block a user