"""Integration tests for series parsing edge cases. This module tests series folder name parsing, year extraction, special characters, Unicode names, and malformed folder structures. """ import os import tempfile from pathlib import Path from unittest.mock import Mock import pytest from src.core.entities.series import Serie from src.core.providers.base_provider import Loader from src.core.SerieScanner import SerieScanner @pytest.fixture def mock_loader(): """Create a mock loader for testing.""" loader = Mock(spec=Loader) loader.get_year = Mock(return_value=2023) loader.get_missing_episodes = Mock(return_value={}) return loader @pytest.fixture def temp_anime_dir(): """Create a temporary anime directory for testing.""" with tempfile.TemporaryDirectory() as tmpdir: yield Path(tmpdir) class TestYearVariations: """Test series folder names with various year formats.""" def test_year_in_parentheses(self, temp_anime_dir, mock_loader): """Test year extraction from folder name (YYYY).""" # Create folder with year folder = temp_anime_dir / "Attack on Titan (2013)" folder.mkdir() (folder / "key").write_text("attack-on-titan") scanner = SerieScanner(str(temp_anime_dir), mock_loader) # Extract year year = scanner._extract_year_from_folder_name("Attack on Titan (2013)") assert year == 2013 def test_year_in_brackets(self, temp_anime_dir, mock_loader): """Test year extraction from folder name [YYYY].""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) # Brackets format not supported - should return None year = scanner._extract_year_from_folder_name("Attack on Titan [2013]") assert year is None def test_year_at_start(self, temp_anime_dir, mock_loader): """Test year at start of folder name.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) # Year at start in parentheses year = scanner._extract_year_from_folder_name("(2013) Attack on Titan") # Should extract year from anywhere in the name assert year == 2013 def test_year_in_middle(self, temp_anime_dir, mock_loader): """Test year in middle of folder name.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) year = scanner._extract_year_from_folder_name("Attack (2013) on Titan") assert year == 2013 def test_multiple_years(self, temp_anime_dir, mock_loader): """Test folder with multiple years - should take first match.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) year = scanner._extract_year_from_folder_name("Series (2010) Remake (2020)") # Should extract first year found assert year == 2010 def test_no_year(self, temp_anime_dir, mock_loader): """Test folder name without year.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) year = scanner._extract_year_from_folder_name("Attack on Titan") assert year is None def test_invalid_year_format(self, temp_anime_dir, mock_loader): """Test invalid year formats.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) # Too short year1 = scanner._extract_year_from_folder_name("Series (202)") assert year1 is None # Too long year2 = scanner._extract_year_from_folder_name("Series (20202)") assert year2 is None # Non-numeric year3 = scanner._extract_year_from_folder_name("Series (ABCD)") assert year3 is None def test_year_out_of_range(self, temp_anime_dir, mock_loader): """Test year outside valid range (1900-2100).""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) # Too old year1 = scanner._extract_year_from_folder_name("Series (1899)") assert year1 is None # Too far in future year2 = scanner._extract_year_from_folder_name("Series (2101)") assert year2 is None # Valid edges year3 = scanner._extract_year_from_folder_name("Series (1900)") assert year3 == 1900 year4 = scanner._extract_year_from_folder_name("Series (2100)") assert year4 == 2100 class TestSpecialCharacters: """Test series names with special characters.""" def test_colon_in_name(self, temp_anime_dir, mock_loader): """Test series name with colon.""" serie = Serie( key="re-zero", name="Re:Zero - Starting Life in Another World", site="aniworld.to", folder="Re Zero", episodeDict={} ) # Sanitized folder should remove colon sanitized = serie.sanitized_folder assert ":" not in sanitized assert "Re" in sanitized assert "Zero" in sanitized def test_slash_in_name(self, temp_anime_dir, mock_loader): """Test series name with slash.""" serie = Serie( key="fate-stay-night", name="Fate/Stay Night: Unlimited Blade Works", site="aniworld.to", folder="Fate Stay Night", episodeDict={} ) sanitized = serie.sanitized_folder assert "/" not in sanitized assert "\\" not in sanitized def test_question_mark_in_name(self, temp_anime_dir, mock_loader): """Test series name with question mark.""" serie = Serie( key="is-it-wrong", name="Is It Wrong to Try to Pick Up Girls in a Dungeon?", site="aniworld.to", folder="Is It Wrong", episodeDict={} ) sanitized = serie.sanitized_folder assert "?" not in sanitized def test_asterisk_in_name(self, temp_anime_dir, mock_loader): """Test series name with asterisk.""" serie = Serie( key="series", name="Series * Special", site="aniworld.to", folder="Series Special", episodeDict={} ) sanitized = serie.sanitized_folder assert "*" not in sanitized def test_pipe_in_name(self, temp_anime_dir, mock_loader): """Test series name with pipe character.""" serie = Serie( key="series", name="Series | Part 2", site="aniworld.to", folder="Series Part 2", episodeDict={} ) sanitized = serie.sanitized_folder assert "|" not in sanitized def test_quotes_in_name(self, temp_anime_dir, mock_loader): """Test series name with quotes.""" serie = Serie( key="series", name='Series "Subtitle" Edition', site="aniworld.to", folder="Series Subtitle Edition", episodeDict={} ) sanitized = serie.sanitized_folder # Quotes should be removed or replaced assert '"' not in sanitized or sanitized.count('"') == 0 def test_less_greater_than_in_name(self, temp_anime_dir, mock_loader): """Test series name with < and >.""" serie = Serie( key="series", name="Series Edition", site="aniworld.to", folder="Series Special Edition", episodeDict={} ) sanitized = serie.sanitized_folder assert "<" not in sanitized assert ">" not in sanitized def test_multiple_special_chars(self, temp_anime_dir, mock_loader): """Test series name with multiple special characters.""" serie = Serie( key="complex", name="Re:Zero / Fate * Special? ", site="aniworld.to", folder="Re Zero Fate Special Edition", episodeDict={} ) sanitized = serie.sanitized_folder # Should remove all special chars invalid_chars = [':', '/', '*', '?', '<', '>'] for char in invalid_chars: assert char not in sanitized class TestMultipleSpaces: """Test series names with multiple spaces.""" def test_double_spaces(self, temp_anime_dir, mock_loader): """Test series name with double spaces.""" serie = Serie( key="series", name="Attack on Titan", site="aniworld.to", folder="Attack on Titan", episodeDict={} ) sanitized = serie.sanitized_folder # Multiple spaces should be preserved or normalized to single space assert "Attack" in sanitized assert "Titan" in sanitized def test_leading_trailing_spaces(self, temp_anime_dir, mock_loader): """Test series name with leading/trailing spaces.""" serie = Serie( key="series", name=" Attack on Titan ", site="aniworld.to", folder="Attack on Titan", episodeDict={} ) sanitized = serie.sanitized_folder # Leading/trailing spaces should be stripped assert not sanitized.startswith(" ") assert not sanitized.endswith(" ") def test_tabs_in_name(self, temp_anime_dir, mock_loader): """Test series name with tab characters.""" serie = Serie( key="series", name="Attack\ton\tTitan", site="aniworld.to", folder="Attack on Titan", episodeDict={} ) sanitized = serie.sanitized_folder # Tabs should be handled (removed or replaced) assert "\t" not in sanitized or sanitized.replace("\t", " ") class TestUnicodeNames: """Test series names in different languages (Unicode).""" def test_japanese_name(self, temp_anime_dir, mock_loader): """Test series name in Japanese.""" serie = Serie( key="shingeki", name="進撃の巨人", site="aniworld.to", folder="進撃の巨人", episodeDict={} ) sanitized = serie.sanitized_folder # Unicode should be preserved assert "進撃の巨人" in sanitized def test_chinese_name(self, temp_anime_dir, mock_loader): """Test series name in Chinese.""" serie = Serie( key="series", name="进击的巨人", site="aniworld.to", folder="进击的巨人", episodeDict={} ) sanitized = serie.sanitized_folder assert "进击的巨人" in sanitized def test_korean_name(self, temp_anime_dir, mock_loader): """Test series name in Korean.""" serie = Serie( key="series", name="진격의 거인", site="aniworld.to", folder="진격의 거인", episodeDict={} ) sanitized = serie.sanitized_folder assert "진격의" in sanitized def test_arabic_name(self, temp_anime_dir, mock_loader): """Test series name in Arabic.""" serie = Serie( key="series", name="هجوم العمالقة", site="aniworld.to", folder="هجوم العمالقة", episodeDict={} ) sanitized = serie.sanitized_folder assert "هجوم" in sanitized def test_cyrillic_name(self, temp_anime_dir, mock_loader): """Test series name in Cyrillic.""" serie = Serie( key="series", name="Атака Титанов", site="aniworld.to", folder="Атака Титанов", episodeDict={} ) sanitized = serie.sanitized_folder assert "Атака" in sanitized def test_mixed_languages(self, temp_anime_dir, mock_loader): """Test series name with mixed languages.""" serie = Serie( key="series", name="Attack on Titan - 進撃の巨人", site="aniworld.to", folder="Attack on Titan", episodeDict={} ) sanitized = serie.sanitized_folder assert "Attack" in sanitized assert "進撃の巨人" in sanitized def test_emoji_in_name(self, temp_anime_dir, mock_loader): """Test series name with emoji.""" serie = Serie( key="series", name="Series ⚔️ Special", site="aniworld.to", folder="Series Special", episodeDict={} ) sanitized = serie.sanitized_folder # Emoji should be handled gracefully assert "Series" in sanitized class TestMalformedFolderStructures: """Test handling of malformed folder structures.""" def test_empty_folder_name(self, temp_anime_dir, mock_loader): """Test handling of empty folder name.""" with pytest.raises(ValueError, match="Series folder cannot be empty"): scanner = SerieScanner(str(temp_anime_dir), mock_loader) scanner.scan_single_series("test-key", "") def test_whitespace_only_folder(self, temp_anime_dir, mock_loader): """Test handling of whitespace-only folder name.""" with pytest.raises(ValueError, match="Series folder cannot be empty"): scanner = SerieScanner(str(temp_anime_dir), mock_loader) scanner.scan_single_series("test-key", " ") def test_folder_with_newlines(self, temp_anime_dir, mock_loader): """Test folder name with newline characters.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) # Newlines should be handled year = scanner._extract_year_from_folder_name("Series\n(2023)") # Still should extract year assert year == 2023 def test_very_long_folder_name(self, temp_anime_dir, mock_loader): """Test handling of very long folder names.""" long_name = "A" * 300 # Very long name serie = Serie( key="long", name=long_name, site="aniworld.to", folder=long_name, episodeDict={} ) # Should handle long names without error sanitized = serie.sanitized_folder assert len(sanitized) > 0 def test_folder_name_with_dots(self, temp_anime_dir, mock_loader): """Test folder name with dots.""" scanner = SerieScanner(str(temp_anime_dir), mock_loader) year = scanner._extract_year_from_folder_name("Series.Name.2023.(2023)") assert year == 2023 def test_folder_name_with_underscores(self, temp_anime_dir, mock_loader): """Test folder name with underscores.""" serie = Serie( key="series", name="Attack_on_Titan", site="aniworld.to", folder="Attack_on_Titan", episodeDict={} ) sanitized = serie.sanitized_folder # Underscores are valid filesystem chars assert "Attack" in sanitized class TestNameWithYearProperty: """Test Serie.name_with_year property.""" def test_name_with_year_adds_year(self): """Test that name_with_year adds year in parentheses.""" serie = Serie( key="dororo", name="Dororo", site="aniworld.to", folder="Dororo", episodeDict={}, year=2025 ) assert serie.name_with_year == "Dororo (2025)" def test_name_with_year_no_year(self): """Test name_with_year without year returns just name.""" serie = Serie( key="dororo", name="Dororo", site="aniworld.to", folder="Dororo", episodeDict={} ) assert serie.name_with_year == "Dororo" def test_name_with_year_used_in_sanitized_folder(self): """Test that sanitized_folder uses name_with_year.""" serie = Serie( key="attack", name="Attack on Titan", site="aniworld.to", folder="Attack on Titan", episodeDict={}, year=2013 ) sanitized = serie.sanitized_folder assert "(2013)" in sanitized assert "Attack on Titan" in sanitized class TestEnsureFolderWithYear: """Test Serie.ensure_folder_with_year method.""" def test_ensure_folder_adds_year_when_missing(self): """Test that ensure_folder_with_year adds year to folder.""" serie = Serie( key="attack", name="Attack on Titan", site="aniworld.to", folder="Attack on Titan", episodeDict={}, year=2013 ) result = serie.ensure_folder_with_year() assert "(2013)" in result assert serie.folder == result def test_ensure_folder_doesnt_duplicate_year(self): """Test that year isn't added if already present.""" serie = Serie( key="attack", name="Attack on Titan", site="aniworld.to", folder="Attack on Titan (2013)", episodeDict={}, year=2013 ) original_folder = serie.folder result = serie.ensure_folder_with_year() # Should not change assert result.count("(2013)") == 1 def test_ensure_folder_no_year_unchanged(self): """Test that folder unchanged when no year available.""" serie = Serie( key="attack", name="Attack on Titan", site="aniworld.to", folder="Attack on Titan", episodeDict={} ) original_folder = serie.folder result = serie.ensure_folder_with_year() assert result == original_folder class TestRealWorldScenarios: """Test real-world anime title scenarios.""" def test_real_anime_titles(self): """Test with actual anime titles.""" test_cases = [ ("fate-stay-night", "Fate/Stay Night: UBW", "Fate Stay Night UBW"), ("86", "86: Eighty-Six", "86 Eighty-Six"), ("steins-gate", "Steins;Gate", "Steins Gate"), ("re-zero", "Re:Zero - Starting Life in Another World", "Re Zero"), ("demon-slayer", "Demon Slayer: Kimetsu no Yaiba", "Demon Slayer"), ] for key, name, expected_part in test_cases: serie = Serie( key=key, name=name, site="aniworld.to", folder="old-folder", episodeDict={} ) sanitized = serie.sanitized_folder # Check that expected part is in sanitized name assert any(word in sanitized for word in expected_part.split()) # Check invalid chars removed (< > : " / \ | ? *) assert ":" not in sanitized assert "/" not in sanitized assert "\\" not in sanitized def test_series_with_year_variations(self): """Test series with different year formats in name.""" test_cases = [ "Dororo (2019)", "Attack on Titan (2013)", "Perfect Blue (1997)", "Ghost in the Shell (1995)", ] for folder_name in test_cases: scanner = SerieScanner("/tmp", Mock(spec=Loader)) year = scanner._extract_year_from_folder_name(folder_name) # Should extract year from all formats assert year is not None assert 1900 <= year <= 2100