Improve docs and security defaults

2025-10-22 15:22:58 +02:00
parent ebb0769ed4
commit 92795cf9b3
16 changed files with 283 additions and 180 deletions
--- a/src/core/providers/enhanced_provider.py
+++ b/src/core/providers/enhanced_provider.py
@@ -37,7 +37,10 @@ from .base_provider import Loader


 class EnhancedAniWorldLoader(Loader):
-    """Enhanced AniWorld loader with comprehensive error handling."""
+    """Aniworld provider with retry and recovery strategies.
+
+    Also exposes metrics hooks for download statistics.
+    """
    
    def __init__(self):
        super().__init__()
@@ -211,7 +214,9 @@ class EnhancedAniWorldLoader(Loader):
        if not word or not word.strip():
            raise ValueError("Search term cannot be empty")
        
-        search_url = f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
+        search_url = (
+            f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
+        )
        
        try:
            return self._fetch_anime_list_with_recovery(search_url)
@@ -250,7 +255,9 @@ class EnhancedAniWorldLoader(Loader):
        
        clean_text = response_text.strip()
        
-        # Try multiple parsing strategies
+        # Try multiple parsing strategies. We progressively relax the parsing
+        # requirements to handle HTML-escaped payloads, stray BOM markers, and
+        # control characters injected by the upstream service.
        parsing_strategies = [
            lambda text: json.loads(html.unescape(text)),
            lambda text: json.loads(text.encode('utf-8').decode('utf-8-sig')),
--- a/src/core/providers/streaming/doodstream.py
+++ b/src/core/providers/streaming/doodstream.py
@@ -1,5 +1,8 @@
+"""Resolve Doodstream embed players into direct download URLs."""
+
 import random
 import re
+import string
 import time
 from typing import Any

@@ -8,6 +11,12 @@ from fake_useragent import UserAgent

 from .Provider import Provider

+# Precompiled regex patterns to extract the ``pass_md5`` endpoint and the
+# session token embedded in the obfuscated player script. Compiling once keeps
+# repeated invocations fast and documents the parsing intent.
+PASS_MD5_PATTERN = re.compile(r"\$\.get\('([^']*/pass_md5/[^']*)'")
+TOKEN_PATTERN = re.compile(r"token=([a-zA-Z0-9]+)")
+

 class Doodstream(Provider):
    """Doodstream video provider implementation."""
@@ -33,17 +42,15 @@ class Doodstream(Provider):
            "Referer": "https://dood.li/",
        }

-        def extract_data(pattern: str, content: str) -> str | None:
-            """Extract data using regex pattern."""
-            match = re.search(pattern, content)
+        def extract_data(pattern: re.Pattern[str], content: str) -> str | None:
+            """Extract data using a compiled regex pattern."""
+            match = pattern.search(content)
            return match.group(1) if match else None

        def generate_random_string(length: int = 10) -> str:
            """Generate random alphanumeric string."""
-            characters = (
-                "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
-            )
-            return "".join(random.choice(characters) for _ in range(length))
+            charset = string.ascii_letters + string.digits
+            return "".join(random.choices(charset, k=length))

        response = requests.get(
            embedded_link,
@@ -53,15 +60,13 @@ class Doodstream(Provider):
        )
        response.raise_for_status()

-        pass_md5_pattern = r"\$\.get\('([^']*\/pass_md5\/[^']*)'"
-        pass_md5_url = extract_data(pass_md5_pattern, response.text)
+        pass_md5_url = extract_data(PASS_MD5_PATTERN, response.text)
        if not pass_md5_url:
            raise ValueError(f"pass_md5 URL not found using {embedded_link}.")

        full_md5_url = f"https://dood.li{pass_md5_url}"

-        token_pattern = r"token=([a-zA-Z0-9]+)"
-        token = extract_data(token_pattern, response.text)
+        token = extract_data(TOKEN_PATTERN, response.text)
        if not token:
            raise ValueError(f"Token not found using {embedded_link}.")

--- a/src/core/providers/streaming/filemoon.py
+++ b/src/core/providers/streaming/filemoon.py
@@ -1,8 +1,12 @@
+"""Resolve Filemoon embed pages into direct streaming asset URLs."""
+
 import re
+
 import requests
+from aniworld import config
+
 # import jsbeautifier.unpackers.packer as packer

-from aniworld import config

 REDIRECT_REGEX = re.compile(
    r'<iframe *(?:[^>]+ )?src=(?:\'([^\']+)\'|"([^"]+)")[^>]*>')
--- a/src/core/providers/streaming/hanime.py
+++ b/src/core/providers/streaming/hanime.py
@@ -1,6 +1,9 @@
-import re
+"""Helpers for extracting direct stream URLs from hanime.tv pages."""
+
 import json
+import re
 import sys
+
 import requests
 from aniworld.config import DEFAULT_REQUEST_TIMEOUT

@@ -83,7 +86,7 @@ def get_direct_link_from_hanime(url=None):
        except ValueError as e:
            print(f"Error: {e}")
    except KeyboardInterrupt:
-        pass
+        print("\nOperation cancelled by user.")


 if __name__ == "__main__":