cleanup

2025-10-22 15:54:36 +02:00
parent 92795cf9b3
commit 6db850c2ad
13 changed files with 330 additions and 338 deletions
--- a/src/core/providers/streaming/Provider.py
+++ b/src/core/providers/streaming/Provider.py
@@ -21,4 +21,7 @@ class Provider(ABC):
                - direct_link: Direct URL to download resource
                - headers: Dictionary of HTTP headers to use for download
        """
+        raise NotImplementedError(
+            "Streaming providers must implement get_link"
+        )

--- a/src/core/providers/streaming/filemoon.py
+++ b/src/core/providers/streaming/filemoon.py
@@ -8,10 +8,14 @@ from aniworld import config
 # import jsbeautifier.unpackers.packer as packer


+# Match the embedded ``iframe`` pointing to the actual Filemoon player.
 REDIRECT_REGEX = re.compile(
    r'<iframe *(?:[^>]+ )?src=(?:\'([^\']+)\'|"([^"]+)")[^>]*>')
+# The player HTML hides an ``eval`` wrapped script with ``data-cfasync``
+# disabled; capture the entire script body for unpacking.
 SCRIPT_REGEX = re.compile(
    r'(?s)<script\s+[^>]*?data-cfasync=["\']?false["\']?[^>]*>(.+?)</script>')
+# Extract the direct ``file:"<m3u8>"`` URL once the script is unpacked.
 VIDEO_URL_REGEX = re.compile(r'file:\s*"([^"]+\.m3u8[^"]*)"')

 # TODO Implement this script fully
--- a/src/core/providers/streaming/hanime.py
+++ b/src/core/providers/streaming/hanime.py
@@ -19,6 +19,8 @@ def fetch_page_content(url):


 def extract_video_data(page_content):
+    # ``videos_manifest`` lines embed a JSON blob with the stream metadata
+    # inside a larger script tag; grab that entire line for further parsing.
    match = re.search(r'^.*videos_manifest.*$', page_content, re.MULTILINE)
    if not match:
        raise ValueError("Failed to extract video manifest from the response.")
--- a/src/core/providers/streaming/luluvdo.py
+++ b/src/core/providers/streaming/luluvdo.py
@@ -1,7 +1,6 @@
 import re

 import requests
-
 from aniworld import config


@@ -25,6 +24,8 @@ def get_direct_link_from_luluvdo(embeded_luluvdo_link, arguments=None):
                            timeout=config.DEFAULT_REQUEST_TIMEOUT)

    if response.status_code == 200:
+        # Capture the ``file:"<url>"`` assignment embedded in the player
+        # configuration so we can return the stream URL.
        pattern = r'file:\s*"([^"]+)"'
        matches = re.findall(pattern, str(response.text))

--- a/src/core/providers/streaming/speedfiles.py
+++ b/src/core/providers/streaming/speedfiles.py
@@ -1,9 +1,11 @@
-import re
 import base64
-import requests
+import re

+import requests
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT

+# Capture the base64 payload hidden inside the obfuscated ``_0x5opu234``
+# assignment. The named group lets us pull out the encoded blob directly.
 SPEEDFILES_PATTERN = re.compile(r'var _0x5opu234 = "(?P<encoded_data>.*?)";')


--- a/src/core/providers/streaming/vidmoly.py
+++ b/src/core/providers/streaming/vidmoly.py
@@ -1,9 +1,8 @@
 import re

 import requests
-from bs4 import BeautifulSoup
-
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
+from bs4 import BeautifulSoup


 def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
@@ -16,6 +15,8 @@ def get_direct_link_from_vidmoly(embeded_vidmoly_link: str):
    soup = BeautifulSoup(html_content, 'html.parser')
    scripts = soup.find_all('script')

+    # Match the ``file:"<url>"`` assignment inside the obfuscated player
+    # script so we can recover the direct MP4 source URL.
    file_link_pattern = r'file:\s*"(https?://.*?)"'

    for script in scripts:
--- a/src/core/providers/streaming/vidoza.py
+++ b/src/core/providers/streaming/vidoza.py
@@ -1,9 +1,8 @@
 import re

 import requests
-from bs4 import BeautifulSoup
-
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT, RANDOM_USER_AGENT
+from bs4 import BeautifulSoup


 def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:
@@ -17,6 +16,8 @@ def get_direct_link_from_vidoza(embeded_vidoza_link: str) -> str:

    for tag in soup.find_all('script'):
        if 'sourcesCode:' in tag.text:
+            # Script blocks contain a ``sourcesCode`` object with ``src``
+            # assignments; extract the first URL between the quotes.
            match = re.search(r'src: "(.*?)"', tag.text)
            if match:
                return match.group(1)
--- a/src/core/providers/streaming/voe.py
+++ b/src/core/providers/streaming/voe.py
@@ -10,7 +10,13 @@ from urllib3.util.retry import Retry

 from .Provider import Provider

-# Compile regex patterns once for better performance
+# Precompile the different pattern matchers used during extraction:
+# - REDIRECT_PATTERN pulls the intermediate redirect URL from the bootstrap
+#   script so we can follow the provider's hand-off.
+# - B64_PATTERN isolates the base64 encoded payload containing the ``source``
+#   field once decoded.
+# - HLS_PATTERN captures the base64 encoded HLS manifest for fallback when
+#   no direct MP4 link is present.
 REDIRECT_PATTERN = re.compile(r"https?://[^'\"<>]+")
 B64_PATTERN = re.compile(r"var a168c='([^']+)'")
 HLS_PATTERN = re.compile(r"'hls': '(?P<hls>[^']+)'")