This commit is contained in:
2025-10-22 13:38:46 +02:00
parent 1f39f07c5d
commit 04799633b4
9 changed files with 411 additions and 571 deletions

View File

@@ -10,7 +10,7 @@ import os
import re
import traceback
import uuid
from typing import Callable, Optional
from typing import Callable, Iterable, Iterator, Optional
from src.core.entities.series import Serie
from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundException
@@ -40,7 +40,7 @@ class SerieScanner:
basePath: str,
loader: Loader,
callback_manager: Optional[CallbackManager] = None
):
) -> None:
"""
Initialize the SerieScanner.
@@ -49,10 +49,12 @@ class SerieScanner:
loader: Loader instance for fetching series information
callback_manager: Optional callback manager for progress updates
"""
self.directory = basePath
self.directory: str = basePath
self.folderDict: dict[str, Serie] = {}
self.loader = loader
self._callback_manager = callback_manager or CallbackManager()
self.loader: Loader = loader
self._callback_manager: CallbackManager = (
callback_manager or CallbackManager()
)
self._current_operation_id: Optional[str] = None
logger.info("Initialized SerieScanner with base path: %s", basePath)
@@ -62,22 +64,22 @@ class SerieScanner:
"""Get the callback manager instance."""
return self._callback_manager
def reinit(self):
def reinit(self) -> None:
"""Reinitialize the folder dictionary."""
self.folderDict: dict[str, Serie] = {}
def is_null_or_whitespace(self, s):
def is_null_or_whitespace(self, value: Optional[str]) -> bool:
"""Check if a string is None or whitespace.
Args:
s: String value to check
value: String value to check
Returns:
True if string is None or contains only whitespace
"""
return s is None or s.strip() == ""
return value is None or value.strip() == ""
def get_total_to_scan(self):
def get_total_to_scan(self) -> int:
"""Get the total number of folders to scan.
Returns:
@@ -86,7 +88,10 @@ class SerieScanner:
result = self.__find_mp4_files()
return sum(1 for _ in result)
def scan(self, callback: Optional[Callable[[str, int], None]] = None):
def scan(
self,
callback: Optional[Callable[[str, int], None]] = None
) -> None:
"""
Scan directories for anime series and missing episodes.
@@ -127,10 +132,10 @@ class SerieScanner:
counter += 1
# Calculate progress
percentage = (
(counter / total_to_scan * 100)
if total_to_scan > 0 else 0
)
if total_to_scan > 0:
percentage = (counter / total_to_scan) * 100
else:
percentage = 0.0
# Notify progress
self._callback_manager.notify_progress(
@@ -262,13 +267,13 @@ class SerieScanner:
raise
def __find_mp4_files(self):
def __find_mp4_files(self) -> Iterator[tuple[str, list[str]]]:
"""Find all .mp4 files in the directory structure."""
logger.info("Scanning for .mp4 files")
for anime_name in os.listdir(self.directory):
anime_path = os.path.join(self.directory, anime_name)
if os.path.isdir(anime_path):
mp4_files = []
mp4_files: list[str] = []
has_files = False
for root, _, files in os.walk(anime_path):
for file in files:
@@ -277,7 +282,7 @@ class SerieScanner:
has_files = True
yield anime_name, mp4_files if has_files else []
def __remove_year(self, input_string: str):
def __remove_year(self, input_string: str) -> str:
"""Remove year information from input string."""
cleaned_string = re.sub(r'\(\d{4}\)', '', input_string).strip()
logger.debug(
@@ -287,7 +292,7 @@ class SerieScanner:
)
return cleaned_string
def __read_data_from_file(self, folder_name: str):
def __read_data_from_file(self, folder_name: str) -> Optional[Serie]:
"""Read serie data from file or key file.
Args:
@@ -322,7 +327,7 @@ class SerieScanner:
return None
def __get_episode_and_season(self, filename: str):
def __get_episode_and_season(self, filename: str) -> tuple[int, int]:
"""Extract season and episode numbers from filename.
Args:
@@ -355,7 +360,10 @@ class SerieScanner:
"Season and episode pattern not found in the filename."
)
def __get_episodes_and_seasons(self, mp4_files: list):
def __get_episodes_and_seasons(
self,
mp4_files: Iterable[str]
) -> dict[int, list[int]]:
"""Get episodes grouped by season from mp4 files.
Args:
@@ -364,7 +372,7 @@ class SerieScanner:
Returns:
Dictionary mapping season to list of episode numbers
"""
episodes_dict = {}
episodes_dict: dict[int, list[int]] = {}
for file in mp4_files:
season, episode = self.__get_episode_and_season(file)
@@ -375,7 +383,11 @@ class SerieScanner:
episodes_dict[season] = [episode]
return episodes_dict
def __get_missing_episodes_and_season(self, key: str, mp4_files: list):
def __get_missing_episodes_and_season(
self,
key: str,
mp4_files: Iterable[str]
) -> tuple[dict[int, list[int]], str]:
"""Get missing episodes for a serie.
Args:
@@ -388,7 +400,7 @@ class SerieScanner:
# key season , value count of episodes
expected_dict = self.loader.get_season_episode_count(key)
filedict = self.__get_episodes_and_seasons(mp4_files)
episodes_dict = {}
episodes_dict: dict[int, list[int]] = {}
for season, expected_count in expected_dict.items():
existing_episodes = filedict.get(season, [])
missing_episodes = [

View File

@@ -27,38 +27,74 @@ noKeyFound_logger = logging.getLogger("NoKeyFound")
noKeyFound_handler = logging.FileHandler("../../NoKeyFound.log")
noKeyFound_handler.setLevel(logging.ERROR)
class AniworldLoader(Loader):
def __init__(self):
self.SUPPORTED_PROVIDERS = ["VOE", "Doodstream", "Vidmoly", "Vidoza", "SpeedFiles", "Streamtape", "Luluvdo"]
self.SUPPORTED_PROVIDERS = [
"VOE",
"Doodstream",
"Vidmoly",
"Vidoza",
"SpeedFiles",
"Streamtape",
"Luluvdo",
]
self.AniworldHeaders = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "de,de-DE;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"cache-control": "max-age=0",
"priority": "u=0, i",
"sec-ch-ua": '"Chromium";v="136", "Microsoft Edge";v="136", "Not.A/Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
}
self.INVALID_PATH_CHARS = ['<', '>', ':', '"', '/', '\\', '|', '?', '*', '&']
"accept": (
"text/html,application/xhtml+xml,application/xml;q=0.9,"
"image/avif,image/webp,image/apng,*/*;q=0.8"
),
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": (
"de,de-DE;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6"
),
"cache-control": "max-age=0",
"priority": "u=0, i",
"sec-ch-ua": (
'"Chromium";v="136", "Microsoft Edge";v="136", '
'"Not.A/Brand";v="99"'
),
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"user-agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/136.0.0.0 Safari/537.36 Edg/136.0.0.0"
),
}
self.INVALID_PATH_CHARS = [
"<",
">",
":",
'"',
"/",
"\\",
"|",
"?",
"*",
"&",
]
self.RANDOM_USER_AGENT = UserAgent().random
self.LULUVDO_USER_AGENT = "Mozilla/5.0 (Android 15; Mobile; rv:132.0) Gecko/132.0 Firefox/132.0"
self.LULUVDO_USER_AGENT = (
"Mozilla/5.0 (Android 15; Mobile; rv:132.0) "
"Gecko/132.0 Firefox/132.0"
)
self.PROVIDER_HEADERS = {
"Vidmoly": ['Referer: "https://vidmoly.to"'],
"Doodstream": ['Referer: "https://dood.li/"'],
"VOE": [f'User-Agent: {self.RANDOM_USER_AGENT}'],
"Luluvdo": [
f'User-Agent: {self.LULUVDO_USER_AGENT}',
'Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7',
'Origin: "https://luluvdo.com"',
'Referer: "https://luluvdo.com/"'
]}
"Vidmoly": ['Referer: "https://vidmoly.to"'],
"Doodstream": ['Referer: "https://dood.li/"'],
"VOE": [f"User-Agent: {self.RANDOM_USER_AGENT}"],
"Luluvdo": [
f"User-Agent: {self.LULUVDO_USER_AGENT}",
"Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7",
'Origin: "https://luluvdo.com"',
'Referer: "https://luluvdo.com/"',
],
}
self.ANIWORLD_TO = "https://aniworld.to"
self.session = requests.Session()
@@ -66,7 +102,7 @@ class AniworldLoader(Loader):
retries = Retry(
total=5, # Number of retries
backoff_factor=1, # Delay multiplier (1s, 2s, 4s, ...)
status_forcelist=[500, 502, 503, 504], # Retry for specific HTTP errors
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET"]
)
@@ -96,12 +132,13 @@ class AniworldLoader(Loader):
Returns:
List of found series
"""
search_url = f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
search_url = (
f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
)
anime_list = self.fetch_anime_list(search_url)
return anime_list
def fetch_anime_list(self, url: str) -> list:
response = self.session.get(url, timeout=self.DEFAULT_REQUEST_TIMEOUT)
response.raise_for_status()
@@ -297,7 +334,7 @@ class AniworldLoader(Loader):
self._get_episode_html(season, episode, key).content,
'html.parser'
)
providers = {}
providers: dict[str, dict[int, str]] = {}
episode_links = soup.find_all(
'li', class_=lambda x: x and x.startswith('episodeLink')
@@ -390,7 +427,7 @@ class AniworldLoader(Loader):
"VOE"
).get_link(embeded_link, self.DEFAULT_REQUEST_TIMEOUT)
def get_season_episode_count(self, slug : str) -> dict:
def get_season_episode_count(self, slug: str) -> dict:
base_url = f"{self.ANIWORLD_TO}/anime/stream/{slug}/"
response = requests.get(base_url, timeout=self.DEFAULT_REQUEST_TIMEOUT)
soup = BeautifulSoup(response.content, 'html.parser')
@@ -402,7 +439,10 @@ class AniworldLoader(Loader):
for season in range(1, number_of_seasons + 1):
season_url = f"{base_url}staffel-{season}"
response = requests.get(season_url, timeout=self.DEFAULT_REQUEST_TIMEOUT)
response = requests.get(
season_url,
timeout=self.DEFAULT_REQUEST_TIMEOUT,
)
soup = BeautifulSoup(response.content, 'html.parser')
episode_links = soup.find_all('a', href=True)