fix(providers): rotate, probe and fall back on 404
Iterate providers actually advertised on the episode page (ordered by SUPPORTED_PROVIDERS preference) instead of always re-resolving VOE. Each candidate is HEAD-probed before yt-dlp runs, so dead links are skipped immediately; direct video URLs use a streaming fast path that bypasses yt-dlp; total failure now logs the exhausted provider list. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -249,6 +249,118 @@ class AniworldLoader(Loader):
|
||||
logger.debug("Available languages for S%02dE%03d: %s, requested: %s, available: %s", season, episode, languages, language_code, is_available)
|
||||
return is_available
|
||||
|
||||
def _check_url_alive(
|
||||
self,
|
||||
url: str,
|
||||
headers: dict | None = None,
|
||||
timeout: int = 10,
|
||||
) -> bool:
|
||||
"""Probe a provider URL with HEAD before committing to yt-dlp.
|
||||
|
||||
Skips dead providers quickly so the failover loop never blocks
|
||||
waiting for yt-dlp to fail on a 404. Falls back to a streaming
|
||||
GET when HEAD is not allowed by the upstream server.
|
||||
|
||||
Args:
|
||||
url: URL to probe.
|
||||
headers: Optional headers to forward with the probe.
|
||||
timeout: Per-request timeout (seconds).
|
||||
|
||||
Returns:
|
||||
True when the URL responds with a non-4xx status, else False.
|
||||
"""
|
||||
try:
|
||||
response = self.session.head(
|
||||
url,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
allow_redirects=True,
|
||||
)
|
||||
if response.status_code == 405:
|
||||
response = self.session.get(
|
||||
url,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
stream=True,
|
||||
allow_redirects=True,
|
||||
)
|
||||
response.close()
|
||||
if 400 <= response.status_code < 500:
|
||||
logger.warning(
|
||||
"Provider URL returned HTTP %s: %s",
|
||||
response.status_code, url
|
||||
)
|
||||
return False
|
||||
return True
|
||||
except requests.RequestException as exc:
|
||||
logger.warning("Provider URL unreachable %s: %s", url, exc)
|
||||
return False
|
||||
|
||||
def _try_direct_stream(
|
||||
self,
|
||||
link: str,
|
||||
output_path: str,
|
||||
headers: dict | None,
|
||||
timeout: int,
|
||||
) -> bool:
|
||||
"""Stream a direct video URL to disk without yt-dlp.
|
||||
|
||||
Used as a fast-path when the resolved provider link already points
|
||||
at a downloadable video file (``Content-Type: video/*`` or
|
||||
``application/octet-stream``). HLS and other non-video payloads
|
||||
are rejected so the caller can fall back to yt-dlp.
|
||||
|
||||
Args:
|
||||
link: Direct download URL.
|
||||
output_path: Destination file path.
|
||||
headers: Optional HTTP headers.
|
||||
timeout: Per-request timeout (seconds).
|
||||
|
||||
Returns:
|
||||
True on a successful save, False when the link is not a
|
||||
direct video or the download fails.
|
||||
"""
|
||||
try:
|
||||
with self.session.get(
|
||||
link,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
stream=True,
|
||||
) as response:
|
||||
if not response.ok:
|
||||
logger.debug(
|
||||
"Direct stream HEAD returned %s for %s",
|
||||
response.status_code, link[:80]
|
||||
)
|
||||
return False
|
||||
content_type = response.headers.get("Content-Type", "")
|
||||
if not (
|
||||
content_type.startswith("video/")
|
||||
or content_type == "application/octet-stream"
|
||||
):
|
||||
logger.debug(
|
||||
"Direct stream skipped, Content-Type=%s",
|
||||
content_type
|
||||
)
|
||||
return False
|
||||
logger.info(
|
||||
"Direct stream download starting (type=%s)",
|
||||
content_type
|
||||
)
|
||||
with open(output_path, "wb") as fh:
|
||||
for chunk in response.iter_content(chunk_size=1024 * 1024):
|
||||
if self._cancel_flag.is_set():
|
||||
logger.info(
|
||||
"Cancellation detected during direct stream"
|
||||
)
|
||||
return False
|
||||
if chunk:
|
||||
fh.write(chunk)
|
||||
return True
|
||||
except requests.RequestException as exc:
|
||||
logger.warning("Direct stream download failed: %s", exc)
|
||||
return False
|
||||
|
||||
def download(
|
||||
self,
|
||||
base_directory: str,
|
||||
@@ -259,7 +371,12 @@ class AniworldLoader(Loader):
|
||||
language: str = "German Dub"
|
||||
) -> bool:
|
||||
"""Download episode to specified directory.
|
||||
|
||||
|
||||
Iterates the providers actually advertised on the episode page
|
||||
(ordered by SUPPORTED_PROVIDERS preference), probing each URL
|
||||
before attempting an extraction so dead providers are skipped
|
||||
immediately instead of stalling yt-dlp on a 404.
|
||||
|
||||
Args:
|
||||
base_directory: Base download directory path
|
||||
serie_folder: Filesystem folder name (metadata only, used for
|
||||
@@ -308,12 +425,78 @@ class AniworldLoader(Loader):
|
||||
temp_path = os.path.join(temp_dir, output_file)
|
||||
logger.debug("Temporary path: %s", temp_path)
|
||||
|
||||
for provider in self.SUPPORTED_PROVIDERS:
|
||||
logger.debug("Attempting download with provider: %s", provider)
|
||||
link, header = self._get_direct_link_from_provider(
|
||||
candidate_providers = self._select_providers_for_episode(
|
||||
season, episode, key, language
|
||||
)
|
||||
if not candidate_providers:
|
||||
logger.error(
|
||||
"No providers advertised for S%02dE%03d (%s) in %s",
|
||||
season, episode, key, language
|
||||
)
|
||||
logger.debug("Direct link obtained from provider")
|
||||
self.clear_cache()
|
||||
return False
|
||||
|
||||
tried: list[str] = []
|
||||
for provider_name, redirect_url in candidate_providers:
|
||||
tried.append(provider_name)
|
||||
logger.debug("Attempting download with provider: %s", provider_name)
|
||||
|
||||
probe_headers = {"User-Agent": self.RANDOM_USER_AGENT}
|
||||
if not self._check_url_alive(
|
||||
redirect_url,
|
||||
headers=probe_headers,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||||
):
|
||||
logger.info(
|
||||
"Skipping provider %s, redirect URL not reachable",
|
||||
provider_name
|
||||
)
|
||||
continue
|
||||
|
||||
try:
|
||||
resolved = self._resolve_direct_link(
|
||||
redirect_url, provider_name
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Provider %s link resolution failed: %s: %s",
|
||||
provider_name, type(exc).__name__, exc
|
||||
)
|
||||
continue
|
||||
|
||||
if resolved is None:
|
||||
logger.info(
|
||||
"Provider %s returned no direct link", provider_name
|
||||
)
|
||||
continue
|
||||
|
||||
link, header = resolved
|
||||
|
||||
if self._cancel_flag.is_set():
|
||||
logger.info("Cancellation requested before download start")
|
||||
_cleanup_temp_file(temp_path)
|
||||
self.clear_cache()
|
||||
return False
|
||||
|
||||
if self._try_direct_stream(
|
||||
link,
|
||||
temp_path,
|
||||
header,
|
||||
self.DEFAULT_REQUEST_TIMEOUT,
|
||||
) and os.path.exists(temp_path):
|
||||
logger.debug(
|
||||
"Direct stream succeeded with provider %s", provider_name
|
||||
)
|
||||
shutil.copyfile(temp_path, output_path)
|
||||
os.remove(temp_path)
|
||||
logger.info(
|
||||
"Download completed successfully (direct): %s",
|
||||
output_file
|
||||
)
|
||||
self.clear_cache()
|
||||
return True
|
||||
|
||||
_cleanup_temp_file(temp_path)
|
||||
|
||||
cancel_flag = self._cancel_flag
|
||||
|
||||
@@ -321,7 +504,6 @@ class AniworldLoader(Loader):
|
||||
if cancel_flag.is_set():
|
||||
logger.info("Cancellation detected in progress hook")
|
||||
raise DownloadCancelled("Download cancelled by user")
|
||||
# Fire the event for progress
|
||||
self.events.download_progress(d)
|
||||
|
||||
ydl_opts = {
|
||||
@@ -333,7 +515,6 @@ class AniworldLoader(Loader):
|
||||
'nocheckcertificate': True,
|
||||
'logger': logger,
|
||||
'progress_hooks': [events_progress_hook],
|
||||
# Use ffmpeg for HLS streams and transport stream format
|
||||
'downloader': 'ffmpeg',
|
||||
'hls_use_mpegts': True,
|
||||
}
|
||||
@@ -343,9 +524,11 @@ class AniworldLoader(Loader):
|
||||
logger.debug("Using custom headers for download")
|
||||
|
||||
try:
|
||||
logger.info("Starting download: %s", output_file)
|
||||
logger.info(
|
||||
"Starting yt-dlp download with %s: %s",
|
||||
provider_name, output_file
|
||||
)
|
||||
logger.debug("Download link: %s...", link[:100])
|
||||
logger.debug("YDL options: %s", ydl_opts)
|
||||
|
||||
with YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(link, download=True)
|
||||
@@ -356,39 +539,151 @@ class AniworldLoader(Loader):
|
||||
|
||||
if os.path.exists(temp_path):
|
||||
logger.debug("Moving file from temp to final destination")
|
||||
# Use copyfile instead of copy to avoid metadata permission issues
|
||||
shutil.copyfile(temp_path, output_path)
|
||||
os.remove(temp_path)
|
||||
logger.info("Download completed successfully: %s", output_file)
|
||||
logger.info(
|
||||
"Download completed successfully: %s", output_file
|
||||
)
|
||||
self.clear_cache()
|
||||
return True
|
||||
else:
|
||||
logger.error("Download failed: temp file not found at %s", temp_path)
|
||||
self.clear_cache()
|
||||
return False
|
||||
except BrokenPipeError as e:
|
||||
logger.error(
|
||||
"Broken pipe error with provider %s: %s. "
|
||||
"This usually means the stream connection was closed.",
|
||||
provider, e
|
||||
"Download failed: temp file not found at %s", temp_path
|
||||
)
|
||||
except DownloadCancelled:
|
||||
logger.info("Download cancelled by user")
|
||||
_cleanup_temp_file(temp_path)
|
||||
self.clear_cache()
|
||||
return False
|
||||
except BrokenPipeError as exc:
|
||||
logger.error(
|
||||
"Broken pipe error with provider %s: %s",
|
||||
provider_name, exc
|
||||
)
|
||||
_cleanup_temp_file(temp_path)
|
||||
continue
|
||||
except Exception as e:
|
||||
except Exception as exc:
|
||||
logger.error(
|
||||
"YoutubeDL download failed with provider %s: %s: %s",
|
||||
provider, type(e).__name__, e
|
||||
provider_name, type(exc).__name__, exc
|
||||
)
|
||||
_cleanup_temp_file(temp_path)
|
||||
continue
|
||||
break
|
||||
|
||||
# If we get here, all providers failed
|
||||
logger.error("All download providers failed")
|
||||
logger.error(
|
||||
"All download providers failed for S%02dE%03d (%s) in %s. "
|
||||
"Tried: %s. Episode may be unavailable on the source site.",
|
||||
season, episode, key, language, ", ".join(tried) or "none"
|
||||
)
|
||||
download_error_logger.error(
|
||||
"All providers failed for %s S%02dE%03d (%s); tried=%s",
|
||||
key, season, episode, language, tried
|
||||
)
|
||||
_cleanup_temp_file(temp_path)
|
||||
self.clear_cache()
|
||||
return False
|
||||
|
||||
def _select_providers_for_episode(
|
||||
self,
|
||||
season: int,
|
||||
episode: int,
|
||||
key: str,
|
||||
language: str,
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Return ``[(provider_name, redirect_url), ...]`` for an episode.
|
||||
|
||||
Filters by requested language and orders results by
|
||||
``SUPPORTED_PROVIDERS`` preference so the failover chain matches
|
||||
operator expectations. Returns an empty list when nothing is
|
||||
advertised on the page.
|
||||
"""
|
||||
if not self.is_language(season, episode, key, language):
|
||||
logger.warning(
|
||||
"Language %s not advertised for S%02dE%03d (%s)",
|
||||
language, season, episode, key
|
||||
)
|
||||
return []
|
||||
language_code = self._get_language_key(language)
|
||||
providers = self._get_provider_from_html(season, episode, key)
|
||||
ordered: list[tuple[str, str]] = []
|
||||
preferred = list(self.SUPPORTED_PROVIDERS)
|
||||
for name in preferred:
|
||||
lang_map = providers.get(name)
|
||||
if lang_map and language_code in lang_map:
|
||||
ordered.append((name, lang_map[language_code]))
|
||||
for name, lang_map in providers.items():
|
||||
if name in preferred:
|
||||
continue
|
||||
if language_code in lang_map:
|
||||
ordered.append((name, lang_map[language_code]))
|
||||
return ordered
|
||||
|
||||
def _resolve_direct_link(
|
||||
self,
|
||||
redirect_url: str,
|
||||
provider_name: str,
|
||||
) -> tuple[str, dict] | None:
|
||||
"""Resolve a provider redirect URL into a direct stream link.
|
||||
|
||||
Follows the redirect to the embedded player, then delegates to a
|
||||
provider-specific extractor (when registered) or returns the
|
||||
embed URL itself so yt-dlp can attempt extraction.
|
||||
|
||||
Args:
|
||||
redirect_url: AniWorld redirect URL.
|
||||
provider_name: Provider key (e.g. ``"VOE"``).
|
||||
|
||||
Returns:
|
||||
``(direct_link, headers)`` tuple or None when extraction fails.
|
||||
"""
|
||||
try:
|
||||
embedded = self.session.get(
|
||||
redirect_url,
|
||||
timeout=self.DEFAULT_REQUEST_TIMEOUT,
|
||||
headers={"User-Agent": self.RANDOM_USER_AGENT},
|
||||
allow_redirects=True,
|
||||
).url
|
||||
except requests.RequestException as exc:
|
||||
logger.warning(
|
||||
"Failed resolving redirect for %s: %s", provider_name, exc
|
||||
)
|
||||
return None
|
||||
|
||||
try:
|
||||
extractor = self.Providers.GetProvider(provider_name)
|
||||
except (KeyError, AttributeError):
|
||||
extractor = None
|
||||
|
||||
if extractor is not None:
|
||||
try:
|
||||
return extractor.get_link(
|
||||
embedded, self.DEFAULT_REQUEST_TIMEOUT
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning(
|
||||
"Custom extractor %s failed: %s",
|
||||
provider_name, exc
|
||||
)
|
||||
return None
|
||||
|
||||
header_list = self.PROVIDER_HEADERS.get(provider_name)
|
||||
header_dict = self._parse_provider_headers(header_list)
|
||||
return embedded, header_dict
|
||||
|
||||
@staticmethod
|
||||
def _parse_provider_headers(
|
||||
header_list: list | None,
|
||||
) -> dict[str, str]:
|
||||
"""Convert legacy ``"Name: value"`` header strings to a dict."""
|
||||
if not header_list:
|
||||
return {}
|
||||
parsed: dict[str, str] = {}
|
||||
for entry in header_list:
|
||||
if not isinstance(entry, str) or ":" not in entry:
|
||||
continue
|
||||
name, _, value = entry.partition(":")
|
||||
parsed[name.strip()] = value.strip().strip('"')
|
||||
return parsed
|
||||
|
||||
def get_site_key(self) -> str:
|
||||
"""Get the site key for this provider."""
|
||||
return "aniworld.to"
|
||||
|
||||
Reference in New Issue
Block a user