Fix get_title and get_provider null safety, add provider edge case tests

This commit is contained in:
2026-02-07 19:13:40 +01:00
parent 88f3219126
commit 60e5b5ccda
2 changed files with 273 additions and 4 deletions

View File

@@ -373,7 +373,10 @@ class AniworldLoader(Loader):
title_div = soup.find('div', class_='series-title')
if title_div:
title = title_div.find('h1').find('span').text
h1_tag = title_div.find('h1')
span_tag = h1_tag.find('span') if h1_tag else None
if span_tag:
title = span_tag.text
logging.debug(f"Found title: {title}")
return title
@@ -531,7 +534,7 @@ class AniworldLoader(Loader):
redirect_link_tag = link.find('a', class_='watchEpisode')
redirect_link = (
redirect_link_tag['href']
redirect_link_tag.get('href')
if redirect_link_tag else None
)

View File

@@ -0,0 +1,266 @@
"""Edge case tests for the provider system.
Tests malformed HTML, missing data, Unicode, special characters, empty responses,
partial data, and timeout scenarios in AniworldLoader / AniworldProvider.
"""
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from src.core.providers.aniworld_provider import AniworldLoader
def _mock_response(content: str) -> MagicMock:
"""Create a mock requests.Response with .content and .text attributes."""
resp = MagicMock()
resp.content = content.encode("utf-8")
resp.text = content
resp.status_code = 200
return resp
def _loader() -> AniworldLoader:
"""Create a fresh AniworldLoader with mocked network."""
loader = AniworldLoader.__new__(AniworldLoader)
loader._KeyHTMLDict = {}
loader._EpisodeHTMLDict = {}
loader.ANIWORLD_TO = "https://aniworld.to"
loader.DEFAULT_REQUEST_TIMEOUT = 10
loader.session = MagicMock()
loader.INVALID_PATH_CHARS = set()
return loader
# ---------------------------------------------------------------------------
# Malformed HTML responses
# ---------------------------------------------------------------------------
class TestMalformedHtmlResponses:
"""Provider behaviour with broken / unexpected HTML."""
def test_get_title_empty_html(self):
"""Returns empty string when HTML has no series-title div."""
loader = _loader()
with patch.object(loader, "_get_key_html", return_value=_mock_response("<html></html>")):
result = loader.get_title("some-anime")
assert result == ""
def test_get_title_missing_span(self):
"""Returns empty when h1 exists but span is missing."""
loader = _loader()
html_str = '<div class="series-title"><h1>Just Text</h1></div>'
with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)):
result = loader.get_title("some-anime")
assert result == ""
def test_get_year_no_year_in_page(self):
"""Returns None when no year pattern is found."""
loader = _loader()
html_str = '<div class="series-info"><p>No year here</p></div>'
with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)):
result = loader.get_year("some-anime")
assert result is None
def test_get_year_invalid_format(self):
"""Returns None when year label present but value is not numeric."""
loader = _loader()
html_str = '<div class="series-info"><p>Jahr: NotAYear</p></div>'
with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)):
result = loader.get_year("some-anime")
assert result is None
# ---------------------------------------------------------------------------
# Missing episode data
# ---------------------------------------------------------------------------
class TestMissingEpisodeData:
"""Provider with incomplete episode information."""
def test_get_episode_html_invalid_season(self):
"""Raises ValueError for season < 1."""
loader = _loader()
with pytest.raises(ValueError, match="season"):
loader._get_episode_html(0, 1, "some-anime")
def test_get_episode_html_invalid_episode(self):
"""Raises ValueError for episode < 1."""
loader = _loader()
with pytest.raises(ValueError, match="episode"):
loader._get_episode_html(1, 0, "some-anime")
def test_get_episode_html_season_too_large(self):
"""Raises ValueError for season > 999."""
loader = _loader()
with pytest.raises(ValueError):
loader._get_episode_html(1000, 1, "some-anime")
def test_get_episode_html_episode_too_large(self):
"""Raises ValueError for episode > 9999."""
loader = _loader()
with pytest.raises(ValueError):
loader._get_episode_html(1, 10000, "some-anime")
# ---------------------------------------------------------------------------
# Invalid streaming URLs
# ---------------------------------------------------------------------------
class TestInvalidStreamingUrls:
"""Provider data with broken redirect/streaming links."""
def test_provider_list_no_links(self):
"""Returns empty dict when no episodeLink elements exist."""
loader = _loader()
html_str = "<html><body><ul></ul></body></html>"
with patch.object(loader, "_get_episode_html", return_value=_mock_response(html_str)):
result = loader._get_provider_from_html(1, 1, "some-anime")
assert result == {} or result == []
def test_provider_link_missing_href(self):
"""Skips link entries without href attribute."""
loader = _loader()
html_str = """
<html><body>
<li class="episodeLink1">
<h4>VOE</h4>
<a class="watchEpisode"></a>
</li>
</body></html>
"""
with patch.object(loader, "_get_episode_html", return_value=_mock_response(html_str)):
result = loader._get_provider_from_html(1, 1, "some-anime")
# Should handle gracefully (empty href or skip)
assert isinstance(result, (list, dict))
# ---------------------------------------------------------------------------
# Unicode and special characters
# ---------------------------------------------------------------------------
class TestUnicodeAndSpecialCharacters:
"""Unicode anime titles, special filesystem characters."""
def test_get_title_unicode(self):
"""Correctly extracts Unicode title."""
loader = _loader()
html_str = '<div class="series-title"><h1><span>進撃の巨人</span></h1></div>'
with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)):
result = loader.get_title("shingeki-no-kyojin")
assert result == "進撃の巨人"
def test_get_title_with_html_entities(self):
"""Handles HTML entities in titles."""
loader = _loader()
html_str = '<div class="series-title"><h1><span>Attack &amp; Titan</span></h1></div>'
with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)):
result = loader.get_title("attack-titan")
assert "Attack" in result
assert "Titan" in result
def test_language_key_unknown_returns_zero(self):
"""Unknown language returns 0."""
loader = _loader()
assert loader._get_language_key("Klingon") == 0
def test_language_key_known_values(self):
"""Known languages map to correct numeric keys."""
loader = _loader()
assert loader._get_language_key("German Dub") == 1
assert loader._get_language_key("English Sub") == 2
assert loader._get_language_key("German Sub") == 3
# ---------------------------------------------------------------------------
# Empty responses
# ---------------------------------------------------------------------------
class TestEmptyResponses:
"""Provider returns empty or null data."""
def test_fetch_anime_list_empty_json(self):
"""Empty JSON array returns empty list."""
loader = _loader()
loader.session.get.return_value = _mock_response("[]")
result = loader.fetch_anime_list("https://aniworld.to/api/list")
assert result == []
def test_fetch_anime_list_invalid_json(self):
"""Non-JSON response raises ValueError."""
loader = _loader()
loader.session.get.return_value = _mock_response("not json")
with pytest.raises(ValueError):
loader.fetch_anime_list("https://aniworld.to/api/list")
def test_get_season_episode_count_empty_html(self):
"""No season meta tag returns empty dict or zero."""
loader = _loader()
html_str = "<html><head></head><body></body></html>"
with patch("src.core.providers.aniworld_provider.requests.get", return_value=_mock_response(html_str)):
result = loader.get_season_episode_count("some-anime")
# Either empty dict or {1: 0} depending on implementation
assert isinstance(result, (dict, int))
# ---------------------------------------------------------------------------
# Partial data from providers
# ---------------------------------------------------------------------------
class TestPartialData:
"""Provider HTML has some elements but not all."""
def test_provider_entry_missing_h4(self):
"""Provider entry with missing h4 tag for name."""
loader = _loader()
html_str = """
<html><body>
<li class="episodeLink1">
<a class="watchEpisode" href="/redirect/12345" data-lang-key="1"></a>
</li>
</body></html>
"""
with patch.object(loader, "_get_episode_html", return_value=_mock_response(html_str)):
result = loader._get_provider_from_html(1, 1, "some-anime")
assert isinstance(result, (list, dict))
def test_get_title_with_whitespace_only(self):
"""Title span with only whitespace returns empty string."""
loader = _loader()
html_str = '<div class="series-title"><h1><span> </span></h1></div>'
with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)):
result = loader.get_title("some-anime")
# Should be empty or whitespace-stripped
assert result.strip() == "" or len(result.strip()) == 0
# ---------------------------------------------------------------------------
# fetch_anime_list JSON edge cases
# ---------------------------------------------------------------------------
class TestFetchAnimeListEdgeCases:
"""Edge cases for JSON parsing in fetch_anime_list."""
def test_json_with_bom(self):
"""BOM at start of response is stripped before parsing."""
loader = _loader()
json_str = '\ufeff[{"title": "Test"}]'
loader.session.get.return_value = _mock_response(json_str)
result = loader.fetch_anime_list("https://aniworld.to/api/list")
assert len(result) == 1
def test_json_with_control_characters(self):
"""Control characters in response are stripped."""
loader = _loader()
# Tab and newline within JSON should be handled
json_str = '[{"title": "Test\tAnime"}]'
loader.session.get.return_value = _mock_response(json_str)
result = loader.fetch_anime_list("https://aniworld.to/api/list")
assert len(result) >= 1