From 12ce6d4e22d41e97fce3837cb07dab8d263c609e Mon Sep 17 00:00:00 2001 From: Lukas Pupka-Lipinski Date: Mon, 7 Jul 2025 18:34:04 +0200 Subject: [PATCH] fixed: duplication bug added: save to temp and copy to dest folder --- src/FindDublicates.py | 56 +++++++++++++++++++++++++++++++++++ src/Loaders/AniWorldLoader.py | 34 ++++++++++++++++----- src/Main.py | 20 +++++++++++-- 3 files changed, 100 insertions(+), 10 deletions(-) create mode 100644 src/FindDublicates.py diff --git a/src/FindDublicates.py b/src/FindDublicates.py new file mode 100644 index 0000000..ee56707 --- /dev/null +++ b/src/FindDublicates.py @@ -0,0 +1,56 @@ +import os +import hashlib +from collections import defaultdict + + +def compute_hash(filepath, chunk_size=8192): + sha256 = hashlib.sha256() + try: + with open(filepath, 'rb') as f: + for chunk in iter(lambda: f.read(chunk_size), b''): + sha256.update(chunk) + except Exception as e: + print(f"Error reading {filepath}: {e}") + return None + return sha256.hexdigest() + + +def find_duplicates(root_dir): + size_dict = defaultdict(list) + + # Step 1: Group files by size + for dirpath, _, filenames in os.walk(root_dir): + for file in filenames: + if file.lower().endswith('.mp4'): + filepath = os.path.join(dirpath, file) + try: + size = os.path.getsize(filepath) + size_dict[size].append(filepath) + except Exception as e: + print(f"Error accessing {filepath}: {e}") + + # Step 2: Within size groups, group by hash + duplicates = defaultdict(list) + for size, files in size_dict.items(): + if len(files) < 2: + continue + hash_dict = defaultdict(list) + for file in files: + file_hash = compute_hash(file) + if file_hash: + hash_dict[file_hash].append(file) + for h, paths in hash_dict.items(): + if len(paths) > 1: + duplicates[h].extend(paths) + + return duplicates + + +# Example usage +if __name__ == "__main__": + folder_to_scan = "\\\\sshfs.r\\ubuntu@192.168.178.43\\media\\serien\\Serien" + dupes = find_duplicates(folder_to_scan) + for hash_val, files in dupes.items(): + print(f"\nDuplicate group (hash: {hash_val}):") + for f in files: + print(f" {f}") diff --git a/src/Loaders/AniWorldLoader.py b/src/Loaders/AniWorldLoader.py index 41ca6d0..82077a6 100644 --- a/src/Loaders/AniWorldLoader.py +++ b/src/Loaders/AniWorldLoader.py @@ -1,6 +1,5 @@ import os import re -import subprocess import logging import json import requests @@ -16,6 +15,7 @@ from urllib3.util.retry import Retry from src.Loaders.Loader import Loader from src.Loaders.Providers import Providers from yt_dlp import YoutubeDL +import shutil # Read timeout from environment variable, default to 600 seconds (10 minutes) timeout = int(os.getenv("DOWNLOAD_TIMEOUT", 600)) @@ -79,6 +79,13 @@ class AniworldLoader(Loader): self._EpisodeHTMLDict = {} self.Providers = Providers() + def ClearCache(self): + self._KeyHTMLDict = {} + self._EpisodeHTMLDict = {} + + def RemoveFromCache(self): + self._EpisodeHTMLDict = {} + def Search(self, word: str) -> list: search_url = f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}" anime_list = self.fetch_anime_list(search_url) @@ -139,7 +146,6 @@ class AniworldLoader(Loader): return languageCode in languages - def Download(self, baseDirectory: str, serieFolder: str, season: int, episode: int, key: str, language: str = "German Dub") -> bool: sanitized_anime_title = ''.join( char for char in self.GetTitle(key) if char not in self.INVALID_PATH_CHARS @@ -158,17 +164,24 @@ class AniworldLoader(Loader): f"({language}).mp4" ) - output_path = os.path.join(os.path.join(baseDirectory, serieFolder), output_file) + folderPath = os.path.join(os.path.join(baseDirectory, serieFolder), f"Season {season}") + output_path = os.path.join(folderPath, output_file) os.makedirs(os.path.dirname(output_path), exist_ok=True) + + # Get the system-designated temp directory + temp_dir = "./Temp/" + os.makedirs(os.path.dirname(temp_dir), exist_ok=True) + temp_Path = os.path.join(temp_dir, output_file) + for provider in self.SUPPORTED_PROVIDERS: link, header = self._get_direct_link_from_provider(season, episode, key, language) ydl_opts = { 'fragment_retries': float('inf'), - 'outtmpl': output_path, + 'outtmpl': temp_Path, 'quiet': True, 'no_warnings': True, - 'progress_with_newline': True + 'progress_with_newline': False, } if header: @@ -176,7 +189,12 @@ class AniworldLoader(Loader): with YoutubeDL(ydl_opts) as ydl: ydl.download([link]) + + if (os.path.exists(temp_Path)): + shutil.copy(temp_Path, output_path) + os.remove(temp_Path) break + self.ClearCache() def GetSiteKey(self) -> str: @@ -203,7 +221,7 @@ class AniworldLoader(Loader): return self._KeyHTMLDict[key] def _GetEpisodeHTML(self, season: int, episode: int, key: str): if key in self._EpisodeHTMLDict: - return self._EpisodeHTMLDict[key] + return self._EpisodeHTMLDict[(key, season, episode)] link = ( @@ -211,8 +229,8 @@ class AniworldLoader(Loader): f"staffel-{season}/episode-{episode}" ) html = self.session.get(link, timeout=self.DEFAULT_REQUEST_TIMEOUT) - self._EpisodeHTMLDict[key] = html - return self._EpisodeHTMLDict[key] + self._EpisodeHTMLDict[(key, season, episode)] = html + return self._EpisodeHTMLDict[(key, season, episode)] def _get_provider_from_html(self, season: int, episode: int, key: str) -> dict: """ diff --git a/src/Main.py b/src/Main.py index 5cc3c1e..57f97ee 100644 --- a/src/Main.py +++ b/src/Main.py @@ -6,6 +6,7 @@ import SerieList import SerieScanner from src.Loaders.Loaders import Loaders from src.Serie import Serie +import time # Configure logging logging.basicConfig(level=logging.FATAL, format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s') @@ -55,7 +56,8 @@ class SeriesApp: print(f"{i}. {serie}") def search(self, words :str) -> list: - return AniWorldLoader.search_anime(words) + loader = self.Loaders.GetLoader(key="aniworld.to") + return loader.Search(words) def get_user_selection(self): """Handle user input for selecting series.""" @@ -91,6 +93,20 @@ class SeriesApp: bar = "@" * filled_length + "-" * (length - filled_length) return f"[{bar}] {current} / {total}" + def retry(self, func, max_retries=3, delay=2, *args, **kwargs): + for attempt in range(1, max_retries + 1): + try: + func(*args, **kwargs) + return True + except Exception as e: + print(f"Attempt {attempt} failed: {e}") + if attempt == max_retries: + print("All attempts failed.") + else: + print(f"Retrying in {delay} seconds...\n") + time.sleep(delay) + return False + def download_series(self, series): """Simulate the downloading process with a progress bar.""" total_downloaded = 0 @@ -107,7 +123,7 @@ class SeriesApp: loader = self.Loaders.GetLoader(key="aniworld.to") if loader.IsLanguage(season, episode, serie.key): print(f"\ndownload {serie.folder} {season} {episode}\n") - loader.Download(self.directory_to_search, serie.folder, season, episode, serie.key) + self.retry(loader.Download, 3, 1, self.directory_to_search, serie.folder, season, episode, serie.key) downloaded += 1 total_downloaded += 1