diff --git a/src/core/providers/aniworld_provider.py b/src/core/providers/aniworld_provider.py index 65d169e..19f743a 100644 --- a/src/core/providers/aniworld_provider.py +++ b/src/core/providers/aniworld_provider.py @@ -373,9 +373,12 @@ class AniworldLoader(Loader): title_div = soup.find('div', class_='series-title') if title_div: - title = title_div.find('h1').find('span').text - logging.debug(f"Found title: {title}") - return title + h1_tag = title_div.find('h1') + span_tag = h1_tag.find('span') if h1_tag else None + if span_tag: + title = span_tag.text + logging.debug(f"Found title: {title}") + return title logging.warning(f"No title found for key: {key}") return "" @@ -531,7 +534,7 @@ class AniworldLoader(Loader): redirect_link_tag = link.find('a', class_='watchEpisode') redirect_link = ( - redirect_link_tag['href'] + redirect_link_tag.get('href') if redirect_link_tag else None ) diff --git a/tests/unit/test_provider_edge_cases.py b/tests/unit/test_provider_edge_cases.py new file mode 100644 index 0000000..daaa5ec --- /dev/null +++ b/tests/unit/test_provider_edge_cases.py @@ -0,0 +1,266 @@ +"""Edge case tests for the provider system. + +Tests malformed HTML, missing data, Unicode, special characters, empty responses, +partial data, and timeout scenarios in AniworldLoader / AniworldProvider. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.core.providers.aniworld_provider import AniworldLoader + + +def _mock_response(content: str) -> MagicMock: + """Create a mock requests.Response with .content and .text attributes.""" + resp = MagicMock() + resp.content = content.encode("utf-8") + resp.text = content + resp.status_code = 200 + return resp + + +def _loader() -> AniworldLoader: + """Create a fresh AniworldLoader with mocked network.""" + loader = AniworldLoader.__new__(AniworldLoader) + loader._KeyHTMLDict = {} + loader._EpisodeHTMLDict = {} + loader.ANIWORLD_TO = "https://aniworld.to" + loader.DEFAULT_REQUEST_TIMEOUT = 10 + loader.session = MagicMock() + loader.INVALID_PATH_CHARS = set() + return loader + + +# --------------------------------------------------------------------------- +# Malformed HTML responses +# --------------------------------------------------------------------------- + + +class TestMalformedHtmlResponses: + """Provider behaviour with broken / unexpected HTML.""" + + def test_get_title_empty_html(self): + """Returns empty string when HTML has no series-title div.""" + loader = _loader() + with patch.object(loader, "_get_key_html", return_value=_mock_response("")): + result = loader.get_title("some-anime") + assert result == "" + + def test_get_title_missing_span(self): + """Returns empty when h1 exists but span is missing.""" + loader = _loader() + html_str = '

Just Text

' + with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)): + result = loader.get_title("some-anime") + assert result == "" + + def test_get_year_no_year_in_page(self): + """Returns None when no year pattern is found.""" + loader = _loader() + html_str = '

No year here

' + with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)): + result = loader.get_year("some-anime") + assert result is None + + def test_get_year_invalid_format(self): + """Returns None when year label present but value is not numeric.""" + loader = _loader() + html_str = '

Jahr: NotAYear

' + with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)): + result = loader.get_year("some-anime") + assert result is None + + +# --------------------------------------------------------------------------- +# Missing episode data +# --------------------------------------------------------------------------- + + +class TestMissingEpisodeData: + """Provider with incomplete episode information.""" + + def test_get_episode_html_invalid_season(self): + """Raises ValueError for season < 1.""" + loader = _loader() + with pytest.raises(ValueError, match="season"): + loader._get_episode_html(0, 1, "some-anime") + + def test_get_episode_html_invalid_episode(self): + """Raises ValueError for episode < 1.""" + loader = _loader() + with pytest.raises(ValueError, match="episode"): + loader._get_episode_html(1, 0, "some-anime") + + def test_get_episode_html_season_too_large(self): + """Raises ValueError for season > 999.""" + loader = _loader() + with pytest.raises(ValueError): + loader._get_episode_html(1000, 1, "some-anime") + + def test_get_episode_html_episode_too_large(self): + """Raises ValueError for episode > 9999.""" + loader = _loader() + with pytest.raises(ValueError): + loader._get_episode_html(1, 10000, "some-anime") + + +# --------------------------------------------------------------------------- +# Invalid streaming URLs +# --------------------------------------------------------------------------- + + +class TestInvalidStreamingUrls: + """Provider data with broken redirect/streaming links.""" + + def test_provider_list_no_links(self): + """Returns empty dict when no episodeLink elements exist.""" + loader = _loader() + html_str = "" + with patch.object(loader, "_get_episode_html", return_value=_mock_response(html_str)): + result = loader._get_provider_from_html(1, 1, "some-anime") + assert result == {} or result == [] + + def test_provider_link_missing_href(self): + """Skips link entries without href attribute.""" + loader = _loader() + html_str = """ + +
  • +

    VOE

    + +
  • + + """ + with patch.object(loader, "_get_episode_html", return_value=_mock_response(html_str)): + result = loader._get_provider_from_html(1, 1, "some-anime") + # Should handle gracefully (empty href or skip) + assert isinstance(result, (list, dict)) + + +# --------------------------------------------------------------------------- +# Unicode and special characters +# --------------------------------------------------------------------------- + + +class TestUnicodeAndSpecialCharacters: + """Unicode anime titles, special filesystem characters.""" + + def test_get_title_unicode(self): + """Correctly extracts Unicode title.""" + loader = _loader() + html_str = '

    進撃の巨人

    ' + with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)): + result = loader.get_title("shingeki-no-kyojin") + assert result == "進撃の巨人" + + def test_get_title_with_html_entities(self): + """Handles HTML entities in titles.""" + loader = _loader() + html_str = '

    Attack & Titan

    ' + with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)): + result = loader.get_title("attack-titan") + assert "Attack" in result + assert "Titan" in result + + def test_language_key_unknown_returns_zero(self): + """Unknown language returns 0.""" + loader = _loader() + assert loader._get_language_key("Klingon") == 0 + + def test_language_key_known_values(self): + """Known languages map to correct numeric keys.""" + loader = _loader() + assert loader._get_language_key("German Dub") == 1 + assert loader._get_language_key("English Sub") == 2 + assert loader._get_language_key("German Sub") == 3 + + +# --------------------------------------------------------------------------- +# Empty responses +# --------------------------------------------------------------------------- + + +class TestEmptyResponses: + """Provider returns empty or null data.""" + + def test_fetch_anime_list_empty_json(self): + """Empty JSON array returns empty list.""" + loader = _loader() + loader.session.get.return_value = _mock_response("[]") + result = loader.fetch_anime_list("https://aniworld.to/api/list") + assert result == [] + + def test_fetch_anime_list_invalid_json(self): + """Non-JSON response raises ValueError.""" + loader = _loader() + loader.session.get.return_value = _mock_response("not json") + with pytest.raises(ValueError): + loader.fetch_anime_list("https://aniworld.to/api/list") + + def test_get_season_episode_count_empty_html(self): + """No season meta tag returns empty dict or zero.""" + loader = _loader() + html_str = "" + with patch("src.core.providers.aniworld_provider.requests.get", return_value=_mock_response(html_str)): + result = loader.get_season_episode_count("some-anime") + # Either empty dict or {1: 0} depending on implementation + assert isinstance(result, (dict, int)) + + +# --------------------------------------------------------------------------- +# Partial data from providers +# --------------------------------------------------------------------------- + + +class TestPartialData: + """Provider HTML has some elements but not all.""" + + def test_provider_entry_missing_h4(self): + """Provider entry with missing h4 tag for name.""" + loader = _loader() + html_str = """ + +
  • + +
  • + + """ + with patch.object(loader, "_get_episode_html", return_value=_mock_response(html_str)): + result = loader._get_provider_from_html(1, 1, "some-anime") + assert isinstance(result, (list, dict)) + + def test_get_title_with_whitespace_only(self): + """Title span with only whitespace returns empty string.""" + loader = _loader() + html_str = '

    ' + with patch.object(loader, "_get_key_html", return_value=_mock_response(html_str)): + result = loader.get_title("some-anime") + # Should be empty or whitespace-stripped + assert result.strip() == "" or len(result.strip()) == 0 + + +# --------------------------------------------------------------------------- +# fetch_anime_list JSON edge cases +# --------------------------------------------------------------------------- + + +class TestFetchAnimeListEdgeCases: + """Edge cases for JSON parsing in fetch_anime_list.""" + + def test_json_with_bom(self): + """BOM at start of response is stripped before parsing.""" + loader = _loader() + json_str = '\ufeff[{"title": "Test"}]' + loader.session.get.return_value = _mock_response(json_str) + result = loader.fetch_anime_list("https://aniworld.to/api/list") + assert len(result) == 1 + + def test_json_with_control_characters(self): + """Control characters in response are stripped.""" + loader = _loader() + # Tab and newline within JSON should be handled + json_str = '[{"title": "Test\tAnime"}]' + loader.session.get.return_value = _mock_response(json_str) + result = loader.fetch_anime_list("https://aniworld.to/api/list") + assert len(result) >= 1