This commit is contained in:
2025-10-22 15:54:36 +02:00
parent 92795cf9b3
commit 6db850c2ad
13 changed files with 330 additions and 338 deletions

View File

@@ -577,6 +577,10 @@ class SeriesApp:
"""
return self.series_list
def refresh_series_list(self) -> None:
"""Reload the cached series list from the underlying data store."""
self.__InitList__()
def get_operation_status(self) -> OperationStatus:
"""
Get the current operation status.

View File

@@ -5,14 +5,12 @@ This module extends the original AniWorldLoader with comprehensive
error handling, retry mechanisms, and recovery strategies.
"""
import hashlib
import html
import json
import logging
import os
import re
import shutil
import time
from typing import Any, Callable, Dict, Optional
from urllib.parse import quote
@@ -148,13 +146,28 @@ class EnhancedAniWorldLoader(Loader):
"""Create a session with robust retry and error handling configuration."""
session = requests.Session()
# Enhanced retry strategy
# Configure retries so transient network problems are retried while we
# still fail fast on permanent errors. The status codes cover
# timeouts, rate limits, and the Cloudflare-origin 52x responses that
# AniWorld occasionally emits under load.
retries = Retry(
total=5,
backoff_factor=2, # More aggressive backoff
status_forcelist=[408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524],
status_forcelist=[
408,
429,
500,
502,
503,
504,
520,
521,
522,
523,
524,
],
allowed_methods=["GET", "POST", "HEAD"],
raise_on_status=False # Handle status errors manually
raise_on_status=False, # Handle status errors manually
)
adapter = HTTPAdapter(
@@ -255,9 +268,9 @@ class EnhancedAniWorldLoader(Loader):
clean_text = response_text.strip()
# Try multiple parsing strategies. We progressively relax the parsing
# requirements to handle HTML-escaped payloads, stray BOM markers, and
# control characters injected by the upstream service.
# Attempt increasingly permissive parsing strategies to cope with
# upstream anomalies such as HTML escaping, stray BOM markers, and
# injected control characters.
parsing_strategies = [
lambda text: json.loads(html.unescape(text)),
lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),

View File

@@ -21,4 +21,7 @@ class Provider(ABC):
- direct_link: Direct URL to download resource
- headers: Dictionary of HTTP headers to use for download
"""
raise NotImplementedError(
"Streaming providers must implement get_link"
)

View File

@@ -8,10 +8,14 @@ from aniworld import config
# import jsbeautifier.unpackers.packer as packer
# Match the embedded ``iframe`` pointing to the actual Filemoon player.
REDIRECT_REGEX = re.compile(
r'<iframe *(?:[^>]+ )?src=(?:\'([^\']+)\'|"([^"]+)")[^>]*>')
# The player HTML hides an ``eval`` wrapped script with ``data-cfasync``
# disabled; capture the entire script body for unpacking.
SCRIPT_REGEX = re.compile(
r'(?s)<script\s+[^>]*?data-cfasync=["\']?false["\']?[^>]*>(.+?)</script>')
# Extract the direct ``file:"<m3u8>"`` URL once the script is unpacked.
VIDEO_URL_REGEX = re.compile(r'file:\s*"([^"]+\.m3u8[^"]*)"')
# TODO Implement this script fully

View File

@@ -19,6 +19,8 @@ def fetch_page_content(url):
def extract_video_data(page_content):
# ``videos_manifest`` lines embed a JSON blob with the stream metadata
# inside a larger script tag; grab that entire line for further parsing.
match = re.search(r'^.*videos_manifest.*$', page_content, re.MULTILINE)
if not match:
raise ValueError("Failed to extract video manifest from the response.")

View File

@@ -1,7 +1,6 @@
import re
import requests
from aniworld import config
@@ -25,6 +24,8 @@ def get_direct_link_from_luluvdo(embeded_luluvdo_link, arguments=None):
timeout=config.DEFAULT_REQUEST_TIMEOUT)
if response.status_code == 200:
# Capture the ``file:"<url>"`` assignment embedded in the player
# configuration so we can return the stream URL.
pattern = r'file:\s*"([^"]+)"'
matches = re.findall(pattern, str(response.text))

View File

@@ -1,9 +1,11 @@
import re
import base64
import requests
import re
import requests
from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
# Capture the base64 payload hidden inside the obfuscated ``_0x5opu234``
# assignment. The named group lets us pull out the encoded blob directly.
SPEEDFILES_PATTERN = re.compile(r'var _0x5opu234 = "(?P<encoded_data>.*?)";')

View File

@@ -1,9 +1,8 @@
import re
import requests
from bs4 import BeautifulSoup
from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
from bs4 import BeautifulSoup
def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
@@ -16,6 +15,8 @@ def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
soup = BeautifulSoup(html_content, 'html.parser')
scripts = soup.find_all('script')
# Match the ``file:"<url>"`` assignment inside the obfuscated player
# script so we can recover the direct MP4 source URL.
file_link_pattern = r'file:\s*"(https?://.*?)"'
for script in scripts:

View File

@@ -1,9 +1,8 @@
import re
import requests
from bs4 import BeautifulSoup
from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
from bs4 import BeautifulSoup
def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:
@@ -17,6 +16,8 @@ def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:
for tag in soup.find_all('script'):
if 'sourcesCode:' in tag.text:
# Script blocks contain a ``sourcesCode`` object with ``src``
# assignments; extract the first URL between the quotes.
match = re.search(r'src: "(.*?)"', tag.text)
if match:
return match.group(1)

View File

@@ -10,7 +10,13 @@ from urllib3.util.retry import Retry
from .Provider import Provider
# Compile regex patterns once for better performance
# Precompile the different pattern matchers used during extraction:
# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
# script so we can follow the provider's hand-off.
# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
# field once decoded.
# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
# no direct MP4 link is present.
REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
B64_PATTERN = re.compile(r"var a168c='([^']+)'")
HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")