fix: wrong folder in data

fixed: duplication bug
added: save to temp and copy to dest folder
2025-07-11 22:14:53 +02:00 · 2025-07-07 18:34:04 +02:00
4 changed files with 106 additions and 14 deletions
--- a/src/FindDublicates.py
+++ b/src/FindDublicates.py
@@ -0,0 +1,56 @@
+import os
+import hashlib
+from collections import defaultdict
+
+
+def compute_hash(filepath, chunk_size=8192):
+    sha256 = hashlib.sha256()
+    try:
+        with open(filepath, 'rb') as f:
+            for chunk in iter(lambda: f.read(chunk_size), b''):
+                sha256.update(chunk)
+    except Exception as e:
+        print(f"Error reading {filepath}: {e}")
+        return None
+    return sha256.hexdigest()
+
+
+def find_duplicates(root_dir):
+    size_dict = defaultdict(list)
+
+    # Step 1: Group files by size
+    for dirpath, _, filenames in os.walk(root_dir):
+        for file in filenames:
+            if file.lower().endswith('.mp4'):
+                filepath = os.path.join(dirpath, file)
+                try:
+                    size = os.path.getsize(filepath)
+                    size_dict[size].append(filepath)
+                except Exception as e:
+                    print(f"Error accessing {filepath}: {e}")
+
+    # Step 2: Within size groups, group by hash
+    duplicates = defaultdict(list)
+    for size, files in size_dict.items():
+        if len(files) < 2:
+            continue
+        hash_dict = defaultdict(list)
+        for file in files:
+            file_hash = compute_hash(file)
+            if file_hash:
+                hash_dict[file_hash].append(file)
+        for h, paths in hash_dict.items():
+            if len(paths) > 1:
+                duplicates[h].extend(paths)
+
+    return duplicates
+
+
+# Example usage
+if __name__ == "__main__":
+    folder_to_scan = "\\\\sshfs.r\\ubuntu@192.168.178.43\\media\\serien\\Serien"
+    dupes = find_duplicates(folder_to_scan)
+    for hash_val, files in dupes.items():
+        print(f"\nDuplicate group (hash: {hash_val}):")
+        for f in files:
+            print(f"  {f}")
--- a/src/Loaders/AniWorldLoader.py
+++ b/src/Loaders/AniWorldLoader.py
@@ -1,6 +1,5 @@
 import os
 import re
-import subprocess
 import logging
 import json
 import requests
@@ -16,6 +15,7 @@ from urllib3.util.retry import Retry
 from src.Loaders.Loader import Loader
 from src.Loaders.Providers import Providers
 from yt_dlp import YoutubeDL
+import shutil

 # Read timeout from environment variable, default to 600 seconds (10 minutes)
 timeout = int(os.getenv("DOWNLOAD_TIMEOUT", 600))
@@ -79,6 +79,13 @@ class AniworldLoader(Loader):
        self._EpisodeHTMLDict = {}
        self.Providers = Providers()

+    def ClearCache(self):
+        self._KeyHTMLDict = {}
+        self._EpisodeHTMLDict = {}
+
+    def RemoveFromCache(self):
+        self._EpisodeHTMLDict = {}
+
    def Search(self, word: str) -> list:
        search_url = f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
        anime_list = self.fetch_anime_list(search_url)
@@ -139,7 +146,6 @@ class AniworldLoader(Loader):

        return languageCode in languages

-
    def Download(self, baseDirectory: str, serieFolder: str, season: int, episode: int, key: str, language: str = "German Dub") -> bool:
        sanitized_anime_title = ''.join(
            char for char in self.GetTitle(key) if char not in self.INVALID_PATH_CHARS
@@ -158,17 +164,24 @@ class AniworldLoader(Loader):
                f"({language}).mp4"
            )

-        output_path = os.path.join(os.path.join(baseDirectory, serieFolder), output_file)
+        folderPath = os.path.join(os.path.join(baseDirectory, serieFolder), f"Season {season}")
+        output_path = os.path.join(folderPath, output_file)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

+
+        # Get the system-designated temp directory
+        temp_dir = "./Temp/"
+        os.makedirs(os.path.dirname(temp_dir), exist_ok=True)
+        temp_Path = os.path.join(temp_dir, output_file)
+
        for provider in self.SUPPORTED_PROVIDERS:
            link, header = self._get_direct_link_from_provider(season, episode, key, language)
            ydl_opts = {
                'fragment_retries': float('inf'),
-                'outtmpl': output_path,
+                'outtmpl': temp_Path,
                'quiet': True,
                'no_warnings': True,
-                'progress_with_newline': True
+                'progress_with_newline': False,
            }

            if header:
@@ -176,7 +189,12 @@ class AniworldLoader(Loader):

            with YoutubeDL(ydl_opts) as ydl:
                ydl.download([link])
+
+            if (os.path.exists(temp_Path)):
+                shutil.copy(temp_Path, output_path)
+                os.remove(temp_Path)
            break
+        self.ClearCache()


    def GetSiteKey(self) -> str:
@@ -203,7 +221,7 @@ class AniworldLoader(Loader):
        return self._KeyHTMLDict[key]
    def _GetEpisodeHTML(self, season: int, episode: int, key: str):
        if key in self._EpisodeHTMLDict:
-         return self._EpisodeHTMLDict[key]
+         return self._EpisodeHTMLDict[(key, season, episode)]


        link = (
@@ -211,8 +229,8 @@ class AniworldLoader(Loader):
            f"staffel-{season}/episode-{episode}"
        )
        html = self.session.get(link, timeout=self.DEFAULT_REQUEST_TIMEOUT)
-        self._EpisodeHTMLDict[key] = html
-        return self._EpisodeHTMLDict[key]
+        self._EpisodeHTMLDict[(key, season, episode)] = html
+        return self._EpisodeHTMLDict[(key, season, episode)]

    def _get_provider_from_html(self, season: int, episode: int, key: str) -> dict:
        """
--- a/src/Main.py
+++ b/src/Main.py
@@ -6,6 +6,7 @@ import SerieList
 import SerieScanner
 from src.Loaders.Loaders import Loaders
 from src.Serie import Serie
+import time

 # Configure logging
 logging.basicConfig(level=logging.FATAL, format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
@@ -55,7 +56,8 @@ class SeriesApp:
            print(f"{i}. {serie}")

    def search(self, words :str) -> list:
-        return AniWorldLoader.search_anime(words)
+        loader = self.Loaders.GetLoader(key="aniworld.to")
+        return loader.Search(words)

    def get_user_selection(self):
        """Handle user input for selecting series."""
@@ -91,6 +93,20 @@ class SeriesApp:
        bar = "@" * filled_length + "-" * (length - filled_length)
        return f"[{bar}] {current} / {total}"

+    def retry(self, func, max_retries=3, delay=2, *args, **kwargs):
+        for attempt in range(1, max_retries + 1):
+            try:
+                func(*args, **kwargs)
+                return True
+            except Exception as e:
+                print(f"Attempt {attempt} failed: {e}")
+                if attempt == max_retries:
+                    print("All attempts failed.")
+                else:
+                    print(f"Retrying in {delay} seconds...\n")
+                    time.sleep(delay)
+        return False
+
    def download_series(self, series):
        """Simulate the downloading process with a progress bar."""
        total_downloaded = 0
@@ -107,7 +123,7 @@ class SeriesApp:
                    loader = self.Loaders.GetLoader(key="aniworld.to")
                    if loader.IsLanguage(season, episode, serie.key):
                        print(f"\ndownload {serie.folder} {season} {episode}\n")
-                        loader.Download(self.directory_to_search,  serie.folder, season, episode, serie.key)
+                        self.retry(loader.Download, 3, 1, self.directory_to_search,  serie.folder, season, episode, serie.key)

                    downloaded += 1
                    total_downloaded += 1
@@ -142,7 +158,6 @@ class SeriesApp:
                index = int(selection) - 1
                if 0 <= index < len(results):
                    chosen_name = results[index]
-
                    self.List.add(Serie(chosen_name["link"], chosen_name["name"], "aniworld.to", chosen_name["link"], {}))
                    return
                else:
@@ -150,6 +165,7 @@ class SeriesApp:
            except ValueError:
                print("Invalid input. Try again.")

+
    def run(self):
        """Main function to run the app."""
        while True:
--- a/src/SerieScanner.py
+++ b/src/SerieScanner.py
@@ -31,10 +31,12 @@ class SerieScanner:
                if (serie != None and not self.is_null_or_whitespace(serie.key)):
                    missings, site = self.__GetMissingEpisodesAndSeason(serie.key, mp4_files)
                    serie.episodeDict = missings
+                    serie.folder = folder
                    serie.save_to_file(os.path.join(os.path.join(self.directory, folder), 'data'))
-                    if folder not in self.folderDict:
-                        self.folderDict[folder] = []
-                    self.folderDict[folder] = serie
+                    if (serie.key in self.folderDict):
+                        logging.ERROR(f"dublication found: {serie.key}");
+                        pass
+                    self.folderDict[serie.key] = serie
                    noKeyFound_logger.info(f"Saved Serie: '{str(serie)}'")
            except NoKeyFoundException as nkfe:
                NoKeyFoundException.error(f"Error processing folder '{folder}': {nkfe}")
Author	SHA1	Message	Date
Lukas Pupka-Lipinski	862de2f9d2	fix: wrong folder in data	2025-07-11 22:14:53 +02:00
Lukas Pupka-Lipinski	12ce6d4e22	fixed: duplication bug added: save to temp and copy to dest folder	2025-07-07 18:34:04 +02:00