cleanup

2025-10-22 15:54:36 +02:00
parent 92795cf9b3
commit 6db850c2ad
13 changed files with 330 additions and 338 deletions
--- a/src/core/SeriesApp.py
+++ b/src/core/SeriesApp.py
@@ -577,6 +577,10 @@ class SeriesApp:
        """
        return self.series_list

+    def refresh_series_list(self) -> None:
+        """Reload the cached series list from the underlying data store."""
+        self.__InitList__()
+
    def get_operation_status(self) -> OperationStatus:
        """
        Get the current operation status.
--- a/src/core/providers/enhanced_provider.py
+++ b/src/core/providers/enhanced_provider.py
@@ -5,14 +5,12 @@ This module extends the original AniWorldLoader with comprehensive
 error handling, retry mechanisms, and recovery strategies.
 """

-import hashlib
 import html
 import json
 import logging
 import os
 import re
 import shutil
-import time
 from typing import Any, Callable, Dict, Optional
 from urllib.parse import quote

@@ -148,13 +146,28 @@ class EnhancedAniWorldLoader(Loader):
        """Create a session with robust retry and error handling configuration."""
        session = requests.Session()
        
-        # Enhanced retry strategy
+        # Configure retries so transient network problems are retried while we
+        # still fail fast on permanent errors. The status codes cover
+        # timeouts, rate limits, and the Cloudflare-origin 52x responses that
+        # AniWorld occasionally emits under load.
        retries = Retry(
            total=5,
            backoff_factor=2,  # More aggressive backoff
-            status_forcelist=[408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524],
+            status_forcelist=[
+                408,
+                429,
+                500,
+                502,
+                503,
+                504,
+                520,
+                521,
+                522,
+                523,
+                524,
+            ],
            allowed_methods=["GET", "POST", "HEAD"],
-            raise_on_status=False  # Handle status errors manually
+            raise_on_status=False,  # Handle status errors manually
        )
        
        adapter = HTTPAdapter(
@@ -255,9 +268,9 @@ class EnhancedAniWorldLoader(Loader):
        
        clean_text = response_text.strip()
        
-        # Try multiple parsing strategies. We progressively relax the parsing
-        # requirements to handle HTML-escaped payloads, stray BOM markers, and
-        # control characters injected by the upstream service.
+    # Attempt increasingly permissive parsing strategies to cope with
+    # upstream anomalies such as HTML escaping, stray BOM markers, and
+    # injected control characters.
        parsing_strategies = [
            lambda text: json.loads(html.unescape(text)),
            lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
--- a/src/core/providers/streaming/Provider.py
+++ b/src/core/providers/streaming/Provider.py
@@ -21,4 +21,7 @@ class Provider(ABC):
                - direct_link: Direct URL to download resource
                - headers: Dictionary of HTTP headers to use for download
        """
+        raise NotImplementedError(
+            "Streaming providers must implement get_link"
+        )

--- a/src/core/providers/streaming/filemoon.py
+++ b/src/core/providers/streaming/filemoon.py
@@ -8,10 +8,14 @@ from aniworld import config
 # import jsbeautifier.unpackers.packer as packer


+# Match the embedded ``iframe`` pointing to the actual Filemoon player.
 REDIRECT_REGEX = re.compile(
    r'<iframe *(?:[^>]+ )?src=(?:\'([^\']+)\'|"([^"]+)")[^>]*>')
+# The player HTML hides an ``eval`` wrapped script with ``data-cfasync``
+# disabled; capture the entire script body for unpacking.
 SCRIPT_REGEX = re.compile(
    r'(?s)<script\s+[^>]*?data-cfasync=["\']?false["\']?[^>]*>(.+?)</script>')
+# Extract the direct ``file:"<m3u8>"`` URL once the script is unpacked.
 VIDEO_URL_REGEX = re.compile(r'file:\s*"([^"]+\.m3u8[^"]*)"')

 # TODO Implement this script fully
--- a/src/core/providers/streaming/hanime.py
+++ b/src/core/providers/streaming/hanime.py
@@ -19,6 +19,8 @@ def fetch_page_content(url):


 def extract_video_data(page_content):
+    # ``videos_manifest`` lines embed a JSON blob with the stream metadata
+    # inside a larger script tag; grab that entire line for further parsing.
    match = re.search(r'^.*videos_manifest.*$', page_content, re.MULTILINE)
    if not match:
        raise ValueError("Failed to extract video manifest from the response.")
--- a/src/core/providers/streaming/luluvdo.py
+++ b/src/core/providers/streaming/luluvdo.py
@@ -1,7 +1,6 @@
 import re

 import requests
-
 from aniworld import config


@@ -25,6 +24,8 @@ def get_direct_link_from_luluvdo(embeded_luluvdo_link, arguments=None):
                            timeout=config.DEFAULT_REQUEST_TIMEOUT)

    if response.status_code == 200:
+        # Capture the ``file:"<url>"`` assignment embedded in the player
+        # configuration so we can return the stream URL.
        pattern = r'file:\s*"([^"]+)"'
        matches = re.findall(pattern, str(response.text))

--- a/src/core/providers/streaming/speedfiles.py
+++ b/src/core/providers/streaming/speedfiles.py
@@ -1,9 +1,11 @@
-import re
 import base64
-import requests
+import re

+import requests
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT

+# Capture the base64 payload hidden inside the obfuscated ``_0x5opu234``
+# assignment. The named group lets us pull out the encoded blob directly.
 SPEEDFILES_PATTERN = re.compile(r'var _0x5opu234 = "(?P<encoded_data>.*?)";')


--- a/src/core/providers/streaming/vidmoly.py
+++ b/src/core/providers/streaming/vidmoly.py
@@ -1,9 +1,8 @@
 import re

 import requests
-from bs4 import BeautifulSoup
-
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
+from bs4 import BeautifulSoup


 def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
@@ -16,6 +15,8 @@ def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
    soup = BeautifulSoup(html_content, 'html.parser')
    scripts = soup.find_all('script')

+    # Match the ``file:"<url>"`` assignment inside the obfuscated player
+    # script so we can recover the direct MP4 source URL.
    file_link_pattern = r'file:\s*"(https?://.*?)"'

    for script in scripts:
--- a/src/core/providers/streaming/vidoza.py
+++ b/src/core/providers/streaming/vidoza.py
@@ -1,9 +1,8 @@
 import re

 import requests
-from bs4 import BeautifulSoup
-
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
+from bs4 import BeautifulSoup


 def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:
@@ -17,6 +16,8 @@ def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:

    for tag in soup.find_all('script'):
        if 'sourcesCode:' in tag.text:
+            # Script blocks contain a ``sourcesCode`` object with ``src``
+            # assignments; extract the first URL between the quotes.
            match = re.search(r'src: "(.*?)"', tag.text)
            if match:
                return match.group(1)
--- a/src/core/providers/streaming/voe.py
+++ b/src/core/providers/streaming/voe.py
@@ -10,7 +10,13 @@ from urllib3.util.retry import Retry

 from .Provider import Provider

-# Compile regex patterns once for better performance
+# Precompile the different pattern matchers used during extraction:
+# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
+#   script so we can follow the provider's hand-off.
+# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
+#   field once decoded.
+# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
+#   no direct MP4 link is present.
 REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
 B64_PATTERN = re.compile(r"var a168c='([^']+)'")
 HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")