fix(providers): rotate, probe and fall back on 404

Iterate providers actually advertised on the episode page (ordered by
SUPPORTED_PROVIDERS preference) instead of always re-resolving VOE.
Each candidate is HEAD-probed before yt-dlp runs, so dead links are
skipped immediately; direct video URLs use a streaming fast path that
bypasses yt-dlp; total failure now logs the exhausted provider list.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-25 14:32:10 +02:00
parent c579235af0
commit a115215416
3 changed files with 629 additions and 24 deletions

View File

@@ -249,6 +249,118 @@ class AniworldLoader(Loader):
logger.debug("Available languages for S%02dE%03d: %s, requested: %s, available: %s", season, episode, languages, language_code, is_available)
return is_available
def _check_url_alive(
self,
url: str,
headers: dict | None = None,
timeout: int = 10,
) -> bool:
"""Probe a provider URL with HEAD before committing to yt-dlp.
Skips dead providers quickly so the failover loop never blocks
waiting for yt-dlp to fail on a 404. Falls back to a streaming
GET when HEAD is not allowed by the upstream server.
Args:
url: URL to probe.
headers: Optional headers to forward with the probe.
timeout: Per-request timeout (seconds).
Returns:
True when the URL responds with a non-4xx status, else False.
"""
try:
response = self.session.head(
url,
headers=headers,
timeout=timeout,
allow_redirects=True,
)
if response.status_code == 405:
response = self.session.get(
url,
headers=headers,
timeout=timeout,
stream=True,
allow_redirects=True,
)
response.close()
if 400 <= response.status_code < 500:
logger.warning(
"Provider URL returned HTTP %s: %s",
response.status_code, url
)
return False
return True
except requests.RequestException as exc:
logger.warning("Provider URL unreachable %s: %s", url, exc)
return False
def _try_direct_stream(
self,
link: str,
output_path: str,
headers: dict | None,
timeout: int,
) -> bool:
"""Stream a direct video URL to disk without yt-dlp.
Used as a fast-path when the resolved provider link already points
at a downloadable video file (``Content-Type: video/*`` or
``application/octet-stream``). HLS and other non-video payloads
are rejected so the caller can fall back to yt-dlp.
Args:
link: Direct download URL.
output_path: Destination file path.
headers: Optional HTTP headers.
timeout: Per-request timeout (seconds).
Returns:
True on a successful save, False when the link is not a
direct video or the download fails.
"""
try:
with self.session.get(
link,
headers=headers,
timeout=timeout,
stream=True,
) as response:
if not response.ok:
logger.debug(
"Direct stream HEAD returned %s for %s",
response.status_code, link[:80]
)
return False
content_type = response.headers.get("Content-Type", "")
if not (
content_type.startswith("video/")
or content_type == "application/octet-stream"
):
logger.debug(
"Direct stream skipped, Content-Type=%s",
content_type
)
return False
logger.info(
"Direct stream download starting (type=%s)",
content_type
)
with open(output_path, "wb") as fh:
for chunk in response.iter_content(chunk_size=1024 * 1024):
if self._cancel_flag.is_set():
logger.info(
"Cancellation detected during direct stream"
)
return False
if chunk:
fh.write(chunk)
return True
except requests.RequestException as exc:
logger.warning("Direct stream download failed: %s", exc)
return False
def download(
self,
base_directory: str,
@@ -259,7 +371,12 @@ class AniworldLoader(Loader):
language: str = "German Dub"
) -> bool:
"""Download episode to specified directory.
Iterates the providers actually advertised on the episode page
(ordered by SUPPORTED_PROVIDERS preference), probing each URL
before attempting an extraction so dead providers are skipped
immediately instead of stalling yt-dlp on a 404.
Args:
base_directory: Base download directory path
serie_folder: Filesystem folder name (metadata only, used for
@@ -308,12 +425,78 @@ class AniworldLoader(Loader):
temp_path = os.path.join(temp_dir, output_file)
logger.debug("Temporary path: %s", temp_path)
for provider in self.SUPPORTED_PROVIDERS:
logger.debug("Attempting download with provider: %s", provider)
link, header = self._get_direct_link_from_provider(
candidate_providers = self._select_providers_for_episode(
season, episode, key, language
)
if not candidate_providers:
logger.error(
"No providers advertised for S%02dE%03d (%s) in %s",
season, episode, key, language
)
logger.debug("Direct link obtained from provider")
self.clear_cache()
return False
tried: list[str] = []
for provider_name, redirect_url in candidate_providers:
tried.append(provider_name)
logger.debug("Attempting download with provider: %s", provider_name)
probe_headers = {"User-Agent": self.RANDOM_USER_AGENT}
if not self._check_url_alive(
redirect_url,
headers=probe_headers,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
):
logger.info(
"Skipping provider %s, redirect URL not reachable",
provider_name
)
continue
try:
resolved = self._resolve_direct_link(
redirect_url, provider_name
)
except Exception as exc:
logger.warning(
"Provider %s link resolution failed: %s: %s",
provider_name, type(exc).__name__, exc
)
continue
if resolved is None:
logger.info(
"Provider %s returned no direct link", provider_name
)
continue
link, header = resolved
if self._cancel_flag.is_set():
logger.info("Cancellation requested before download start")
_cleanup_temp_file(temp_path)
self.clear_cache()
return False
if self._try_direct_stream(
link,
temp_path,
header,
self.DEFAULT_REQUEST_TIMEOUT,
) and os.path.exists(temp_path):
logger.debug(
"Direct stream succeeded with provider %s", provider_name
)
shutil.copyfile(temp_path, output_path)
os.remove(temp_path)
logger.info(
"Download completed successfully (direct): %s",
output_file
)
self.clear_cache()
return True
_cleanup_temp_file(temp_path)
cancel_flag = self._cancel_flag
@@ -321,7 +504,6 @@ class AniworldLoader(Loader):
if cancel_flag.is_set():
logger.info("Cancellation detected in progress hook")
raise DownloadCancelled("Download cancelled by user")
# Fire the event for progress
self.events.download_progress(d)
ydl_opts = {
@@ -333,7 +515,6 @@ class AniworldLoader(Loader):
'nocheckcertificate': True,
'logger': logger,
'progress_hooks': [events_progress_hook],
# Use ffmpeg for HLS streams and transport stream format
'downloader': 'ffmpeg',
'hls_use_mpegts': True,
}
@@ -343,9 +524,11 @@ class AniworldLoader(Loader):
logger.debug("Using custom headers for download")
try:
logger.info("Starting download: %s", output_file)
logger.info(
"Starting yt-dlp download with %s: %s",
provider_name, output_file
)
logger.debug("Download link: %s...", link[:100])
logger.debug("YDL options: %s", ydl_opts)
with YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(link, download=True)
@@ -356,39 +539,151 @@ class AniworldLoader(Loader):
if os.path.exists(temp_path):
logger.debug("Moving file from temp to final destination")
# Use copyfile instead of copy to avoid metadata permission issues
shutil.copyfile(temp_path, output_path)
os.remove(temp_path)
logger.info("Download completed successfully: %s", output_file)
logger.info(
"Download completed successfully: %s", output_file
)
self.clear_cache()
return True
else:
logger.error("Download failed: temp file not found at %s", temp_path)
self.clear_cache()
return False
except BrokenPipeError as e:
logger.error(
"Broken pipe error with provider %s: %s. "
"This usually means the stream connection was closed.",
provider, e
"Download failed: temp file not found at %s", temp_path
)
except DownloadCancelled:
logger.info("Download cancelled by user")
_cleanup_temp_file(temp_path)
self.clear_cache()
return False
except BrokenPipeError as exc:
logger.error(
"Broken pipe error with provider %s: %s",
provider_name, exc
)
_cleanup_temp_file(temp_path)
continue
except Exception as e:
except Exception as exc:
logger.error(
"YoutubeDL download failed with provider %s: %s: %s",
provider, type(e).__name__, e
provider_name, type(exc).__name__, exc
)
_cleanup_temp_file(temp_path)
continue
break
# If we get here, all providers failed
logger.error("All download providers failed")
logger.error(
"All download providers failed for S%02dE%03d (%s) in %s. "
"Tried: %s. Episode may be unavailable on the source site.",
season, episode, key, language, ", ".join(tried) or "none"
)
download_error_logger.error(
"All providers failed for %s S%02dE%03d (%s); tried=%s",
key, season, episode, language, tried
)
_cleanup_temp_file(temp_path)
self.clear_cache()
return False
def _select_providers_for_episode(
self,
season: int,
episode: int,
key: str,
language: str,
) -> list[tuple[str, str]]:
"""Return ``[(provider_name, redirect_url), ...]`` for an episode.
Filters by requested language and orders results by
``SUPPORTED_PROVIDERS`` preference so the failover chain matches
operator expectations. Returns an empty list when nothing is
advertised on the page.
"""
if not self.is_language(season, episode, key, language):
logger.warning(
"Language %s not advertised for S%02dE%03d (%s)",
language, season, episode, key
)
return []
language_code = self._get_language_key(language)
providers = self._get_provider_from_html(season, episode, key)
ordered: list[tuple[str, str]] = []
preferred = list(self.SUPPORTED_PROVIDERS)
for name in preferred:
lang_map = providers.get(name)
if lang_map and language_code in lang_map:
ordered.append((name, lang_map[language_code]))
for name, lang_map in providers.items():
if name in preferred:
continue
if language_code in lang_map:
ordered.append((name, lang_map[language_code]))
return ordered
def _resolve_direct_link(
self,
redirect_url: str,
provider_name: str,
) -> tuple[str, dict] | None:
"""Resolve a provider redirect URL into a direct stream link.
Follows the redirect to the embedded player, then delegates to a
provider-specific extractor (when registered) or returns the
embed URL itself so yt-dlp can attempt extraction.
Args:
redirect_url: AniWorld redirect URL.
provider_name: Provider key (e.g. ``"VOE"``).
Returns:
``(direct_link, headers)`` tuple or None when extraction fails.
"""
try:
embedded = self.session.get(
redirect_url,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
headers={"User-Agent": self.RANDOM_USER_AGENT},
allow_redirects=True,
).url
except requests.RequestException as exc:
logger.warning(
"Failed resolving redirect for %s: %s", provider_name, exc
)
return None
try:
extractor = self.Providers.GetProvider(provider_name)
except (KeyError, AttributeError):
extractor = None
if extractor is not None:
try:
return extractor.get_link(
embedded, self.DEFAULT_REQUEST_TIMEOUT
)
except Exception as exc:
logger.warning(
"Custom extractor %s failed: %s",
provider_name, exc
)
return None
header_list = self.PROVIDER_HEADERS.get(provider_name)
header_dict = self._parse_provider_headers(header_list)
return embedded, header_dict
@staticmethod
def _parse_provider_headers(
header_list: list | None,
) -> dict[str, str]:
"""Convert legacy ``"Name: value"`` header strings to a dict."""
if not header_list:
return {}
parsed: dict[str, str] = {}
for entry in header_list:
if not isinstance(entry, str) or ":" not in entry:
continue
name, _, value = entry.partition(":")
parsed[name.strip()] = value.strip().strip('"')
return parsed
def get_site_key(self) -> str:
"""Get the site key for this provider."""
return "aniworld.to"