This commit is contained in:
2025-10-23 18:10:34 +02:00
parent 5c2691b070
commit 9a64ca5b01
14 changed files with 598 additions and 149 deletions

View File

@@ -48,8 +48,22 @@ class SerieScanner:
basePath: Base directory containing anime series
loader: Loader instance for fetching series information
callback_manager: Optional callback manager for progress updates
Raises:
ValueError: If basePath is invalid or doesn't exist
"""
self.directory: str = basePath
# Validate basePath to prevent directory traversal attacks
if not basePath or not basePath.strip():
raise ValueError("Base path cannot be empty")
# Resolve to absolute path and validate it exists
abs_path = os.path.abspath(basePath)
if not os.path.exists(abs_path):
raise ValueError(f"Base path does not exist: {abs_path}")
if not os.path.isdir(abs_path):
raise ValueError(f"Base path is not a directory: {abs_path}")
self.directory: str = abs_path
self.folderDict: dict[str, Serie] = {}
self.loader: Loader = loader
self._callback_manager: CallbackManager = (
@@ -57,7 +71,7 @@ class SerieScanner:
)
self._current_operation_id: Optional[str] = None
logger.info("Initialized SerieScanner with base path: %s", basePath)
logger.info("Initialized SerieScanner with base path: %s", abs_path)
@property
def callback_manager(self) -> CallbackManager:

View File

@@ -4,6 +4,7 @@ import logging
import os
import re
import shutil
from pathlib import Path
from urllib.parse import quote
import requests
@@ -27,15 +28,27 @@ from .provider_config import (
# Configure persistent loggers but don't add duplicate handlers when module
# is imported multiple times (common in test environments).
# Use absolute paths for log files to prevent security issues
# Determine project root (assuming this file is in src/core/providers/)
_module_dir = Path(__file__).parent
_project_root = _module_dir.parent.parent.parent
_logs_dir = _project_root / "logs"
# Ensure logs directory exists
_logs_dir.mkdir(parents=True, exist_ok=True)
download_error_logger = logging.getLogger("DownloadErrors")
if not download_error_logger.handlers:
download_error_handler = logging.FileHandler("../../download_errors.log")
log_path = _logs_dir / "download_errors.log"
download_error_handler = logging.FileHandler(str(log_path))
download_error_handler.setLevel(logging.ERROR)
download_error_logger.addHandler(download_error_handler)
noKeyFound_logger = logging.getLogger("NoKeyFound")
if not noKeyFound_logger.handlers:
noKeyFound_handler = logging.FileHandler("../../NoKeyFound.log")
log_path = _logs_dir / "no_key_found.log"
noKeyFound_handler = logging.FileHandler(str(log_path))
noKeyFound_handler.setLevel(logging.ERROR)
noKeyFound_logger.addHandler(noKeyFound_handler)
@@ -258,23 +271,52 @@ class AniworldLoader(Loader):
return ""
def _get_key_html(self, key: str):
"""Get cached HTML for series key."""
"""Get cached HTML for series key.
Args:
key: Series identifier (will be URL-encoded for safety)
Returns:
Cached or fetched HTML response
"""
if key in self._KeyHTMLDict:
return self._KeyHTMLDict[key]
# Sanitize key parameter for URL
safe_key = quote(key, safe='')
self._KeyHTMLDict[key] = self.session.get(
f"{self.ANIWORLD_TO}/anime/stream/{key}",
f"{self.ANIWORLD_TO}/anime/stream/{safe_key}",
timeout=self.DEFAULT_REQUEST_TIMEOUT
)
return self._KeyHTMLDict[key]
def _get_episode_html(self, season: int, episode: int, key: str):
"""Get cached HTML for episode."""
"""Get cached HTML for episode.
Args:
season: Season number (validated to be positive)
episode: Episode number (validated to be positive)
key: Series identifier (will be URL-encoded for safety)
Returns:
Cached or fetched HTML response
Raises:
ValueError: If season or episode are invalid
"""
# Validate season and episode numbers
if season < 1 or season > 999:
raise ValueError(f"Invalid season number: {season}")
if episode < 1 or episode > 9999:
raise ValueError(f"Invalid episode number: {episode}")
if key in self._EpisodeHTMLDict:
return self._EpisodeHTMLDict[(key, season, episode)]
# Sanitize key parameter for URL
safe_key = quote(key, safe='')
link = (
f"{self.ANIWORLD_TO}/anime/stream/{key}/"
f"{self.ANIWORLD_TO}/anime/stream/{safe_key}/"
f"staffel-{season}/episode-{episode}"
)
html = self.session.get(link, timeout=self.DEFAULT_REQUEST_TIMEOUT)
@@ -396,7 +438,17 @@ class AniworldLoader(Loader):
).get_link(embeded_link, self.DEFAULT_REQUEST_TIMEOUT)
def get_season_episode_count(self, slug: str) -> dict:
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"
"""Get episode count for each season.
Args:
slug: Series identifier (will be URL-encoded for safety)
Returns:
Dictionary mapping season numbers to episode counts
"""
# Sanitize slug parameter for URL
safe_slug = quote(slug, safe='')
base_url = f"{self.ANIWORLD_TO}/anime/stream/{safe_slug}/"
response = requests.get(base_url, timeout=self.DEFAULT_REQUEST_TIMEOUT)
soup = BeautifulSoup(response.content, 'html.parser')

View File

@@ -596,7 +596,33 @@ class EnhancedAniWorldLoader(Loader):
@with_error_recovery(max_retries=2, context="get_episode_html")
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
"""Get cached HTML for specific episode."""
"""Get cached HTML for specific episode.
Args:
season: Season number (must be 1-999)
episode: Episode number (must be 1-9999)
key: Series identifier (should be non-empty)
Returns:
Cached or fetched HTML response
Raises:
ValueError: If parameters are invalid
NonRetryableError: If episode not found (404)
RetryableError: If HTTP error occurs
"""
# Validate parameters
if not key or not key.strip():
raise ValueError("Series key cannot be empty")
if season < 1 or season > 999:
raise ValueError(
f"Invalid season number: {season} (must be 1-999)"
)
if episode < 1 or episode > 9999:
raise ValueError(
f"Invalid episode number: {episode} (must be 1-9999)"
)
cache_key = (key, season, episode)
if cache_key in self._EpisodeHTMLDict:
return self._EpisodeHTMLDict[cache_key]

View File

@@ -52,11 +52,13 @@ class Doodstream(Provider):
charset = string.ascii_letters + string.digits
return "".join(random.choices(charset, k=length))
# WARNING: SSL verification disabled for doodstream compatibility
# This is a known limitation with this streaming provider
response = requests.get(
embedded_link,
headers=headers,
timeout=timeout,
verify=False,
verify=True, # Changed from False for security
)
response.raise_for_status()
@@ -71,7 +73,7 @@ class Doodstream(Provider):
raise ValueError(f"Token not found using {embedded_link}.")
md5_response = requests.get(
full_md5_url, headers=headers, timeout=timeout, verify=False
full_md5_url, headers=headers, timeout=timeout, verify=True
)
md5_response.raise_for_status()
video_base_url = md5_response.text.strip()

View File

@@ -1,13 +1,32 @@
import requests
import json
from urllib.parse import urlparse
import requests
# TODO Doesn't work on download yet and has to be implemented
def get_direct_link_from_loadx(embeded_loadx_link: str):
"""Extract direct download link from LoadX streaming provider.
Args:
embeded_loadx_link: Embedded LoadX link
Returns:
str: Direct video URL
Raises:
ValueError: If link extraction fails
"""
# Default timeout for network requests
timeout = 30
response = requests.head(
embeded_loadx_link, allow_redirects=True, verify=False)
embeded_loadx_link,
allow_redirects=True,
verify=True,
timeout=timeout
)
parsed_url = urlparse(response.url)
path_parts = parsed_url.path.split("/")
@@ -19,7 +38,12 @@ def get_direct_link_from_loadx(embeded_loadx_link: str):
post_url = f"https://{host}/player/index.php?data={id_hash}&do=getVideo"
headers = {"X-Requested-With": "XMLHttpRequest"}
response = requests.post(post_url, headers=headers, verify=False)
response = requests.post(
post_url,
headers=headers,
verify=True,
timeout=timeout
)
data = json.loads(response.text)
print(data)