fixed: duplication bug
added: save to temp and copy to dest folder
This commit is contained in:
parent
ad61784744
commit
12ce6d4e22
56
src/FindDublicates.py
Normal file
56
src/FindDublicates.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import os
|
||||||
|
import hashlib
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
def compute_hash(filepath, chunk_size=8192):
|
||||||
|
sha256 = hashlib.sha256()
|
||||||
|
try:
|
||||||
|
with open(filepath, 'rb') as f:
|
||||||
|
for chunk in iter(lambda: f.read(chunk_size), b''):
|
||||||
|
sha256.update(chunk)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading {filepath}: {e}")
|
||||||
|
return None
|
||||||
|
return sha256.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicates(root_dir):
|
||||||
|
size_dict = defaultdict(list)
|
||||||
|
|
||||||
|
# Step 1: Group files by size
|
||||||
|
for dirpath, _, filenames in os.walk(root_dir):
|
||||||
|
for file in filenames:
|
||||||
|
if file.lower().endswith('.mp4'):
|
||||||
|
filepath = os.path.join(dirpath, file)
|
||||||
|
try:
|
||||||
|
size = os.path.getsize(filepath)
|
||||||
|
size_dict[size].append(filepath)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error accessing {filepath}: {e}")
|
||||||
|
|
||||||
|
# Step 2: Within size groups, group by hash
|
||||||
|
duplicates = defaultdict(list)
|
||||||
|
for size, files in size_dict.items():
|
||||||
|
if len(files) < 2:
|
||||||
|
continue
|
||||||
|
hash_dict = defaultdict(list)
|
||||||
|
for file in files:
|
||||||
|
file_hash = compute_hash(file)
|
||||||
|
if file_hash:
|
||||||
|
hash_dict[file_hash].append(file)
|
||||||
|
for h, paths in hash_dict.items():
|
||||||
|
if len(paths) > 1:
|
||||||
|
duplicates[h].extend(paths)
|
||||||
|
|
||||||
|
return duplicates
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage
|
||||||
|
if __name__ == "__main__":
|
||||||
|
folder_to_scan = "\\\\sshfs.r\\ubuntu@192.168.178.43\\media\\serien\\Serien"
|
||||||
|
dupes = find_duplicates(folder_to_scan)
|
||||||
|
for hash_val, files in dupes.items():
|
||||||
|
print(f"\nDuplicate group (hash: {hash_val}):")
|
||||||
|
for f in files:
|
||||||
|
print(f" {f}")
|
||||||
@ -1,6 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
|
||||||
import logging
|
import logging
|
||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
@ -16,6 +15,7 @@ from urllib3.util.retry import Retry
|
|||||||
from src.Loaders.Loader import Loader
|
from src.Loaders.Loader import Loader
|
||||||
from src.Loaders.Providers import Providers
|
from src.Loaders.Providers import Providers
|
||||||
from yt_dlp import YoutubeDL
|
from yt_dlp import YoutubeDL
|
||||||
|
import shutil
|
||||||
|
|
||||||
# Read timeout from environment variable, default to 600 seconds (10 minutes)
|
# Read timeout from environment variable, default to 600 seconds (10 minutes)
|
||||||
timeout = int(os.getenv("DOWNLOAD_TIMEOUT", 600))
|
timeout = int(os.getenv("DOWNLOAD_TIMEOUT", 600))
|
||||||
@ -79,6 +79,13 @@ class AniworldLoader(Loader):
|
|||||||
self._EpisodeHTMLDict = {}
|
self._EpisodeHTMLDict = {}
|
||||||
self.Providers = Providers()
|
self.Providers = Providers()
|
||||||
|
|
||||||
|
def ClearCache(self):
|
||||||
|
self._KeyHTMLDict = {}
|
||||||
|
self._EpisodeHTMLDict = {}
|
||||||
|
|
||||||
|
def RemoveFromCache(self):
|
||||||
|
self._EpisodeHTMLDict = {}
|
||||||
|
|
||||||
def Search(self, word: str) -> list:
|
def Search(self, word: str) -> list:
|
||||||
search_url = f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
|
search_url = f"{self.ANIWORLD_TO}/ajax/seriesSearch?keyword={quote(word)}"
|
||||||
anime_list = self.fetch_anime_list(search_url)
|
anime_list = self.fetch_anime_list(search_url)
|
||||||
@ -139,7 +146,6 @@ class AniworldLoader(Loader):
|
|||||||
|
|
||||||
return languageCode in languages
|
return languageCode in languages
|
||||||
|
|
||||||
|
|
||||||
def Download(self, baseDirectory: str, serieFolder: str, season: int, episode: int, key: str, language: str = "German Dub") -> bool:
|
def Download(self, baseDirectory: str, serieFolder: str, season: int, episode: int, key: str, language: str = "German Dub") -> bool:
|
||||||
sanitized_anime_title = ''.join(
|
sanitized_anime_title = ''.join(
|
||||||
char for char in self.GetTitle(key) if char not in self.INVALID_PATH_CHARS
|
char for char in self.GetTitle(key) if char not in self.INVALID_PATH_CHARS
|
||||||
@ -158,17 +164,24 @@ class AniworldLoader(Loader):
|
|||||||
f"({language}).mp4"
|
f"({language}).mp4"
|
||||||
)
|
)
|
||||||
|
|
||||||
output_path = os.path.join(os.path.join(baseDirectory, serieFolder), output_file)
|
folderPath = os.path.join(os.path.join(baseDirectory, serieFolder), f"Season {season}")
|
||||||
|
output_path = os.path.join(folderPath, output_file)
|
||||||
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# Get the system-designated temp directory
|
||||||
|
temp_dir = "./Temp/"
|
||||||
|
os.makedirs(os.path.dirname(temp_dir), exist_ok=True)
|
||||||
|
temp_Path = os.path.join(temp_dir, output_file)
|
||||||
|
|
||||||
for provider in self.SUPPORTED_PROVIDERS:
|
for provider in self.SUPPORTED_PROVIDERS:
|
||||||
link, header = self._get_direct_link_from_provider(season, episode, key, language)
|
link, header = self._get_direct_link_from_provider(season, episode, key, language)
|
||||||
ydl_opts = {
|
ydl_opts = {
|
||||||
'fragment_retries': float('inf'),
|
'fragment_retries': float('inf'),
|
||||||
'outtmpl': output_path,
|
'outtmpl': temp_Path,
|
||||||
'quiet': True,
|
'quiet': True,
|
||||||
'no_warnings': True,
|
'no_warnings': True,
|
||||||
'progress_with_newline': True
|
'progress_with_newline': False,
|
||||||
}
|
}
|
||||||
|
|
||||||
if header:
|
if header:
|
||||||
@ -176,7 +189,12 @@ class AniworldLoader(Loader):
|
|||||||
|
|
||||||
with YoutubeDL(ydl_opts) as ydl:
|
with YoutubeDL(ydl_opts) as ydl:
|
||||||
ydl.download([link])
|
ydl.download([link])
|
||||||
|
|
||||||
|
if (os.path.exists(temp_Path)):
|
||||||
|
shutil.copy(temp_Path, output_path)
|
||||||
|
os.remove(temp_Path)
|
||||||
break
|
break
|
||||||
|
self.ClearCache()
|
||||||
|
|
||||||
|
|
||||||
def GetSiteKey(self) -> str:
|
def GetSiteKey(self) -> str:
|
||||||
@ -203,7 +221,7 @@ class AniworldLoader(Loader):
|
|||||||
return self._KeyHTMLDict[key]
|
return self._KeyHTMLDict[key]
|
||||||
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
|
def _GetEpisodeHTML(self, season: int, episode: int, key: str):
|
||||||
if key in self._EpisodeHTMLDict:
|
if key in self._EpisodeHTMLDict:
|
||||||
return self._EpisodeHTMLDict[key]
|
return self._EpisodeHTMLDict[(key, season, episode)]
|
||||||
|
|
||||||
|
|
||||||
link = (
|
link = (
|
||||||
@ -211,8 +229,8 @@ class AniworldLoader(Loader):
|
|||||||
f"staffel-{season}/episode-{episode}"
|
f"staffel-{season}/episode-{episode}"
|
||||||
)
|
)
|
||||||
html = self.session.get(link, timeout=self.DEFAULT_REQUEST_TIMEOUT)
|
html = self.session.get(link, timeout=self.DEFAULT_REQUEST_TIMEOUT)
|
||||||
self._EpisodeHTMLDict[key] = html
|
self._EpisodeHTMLDict[(key, season, episode)] = html
|
||||||
return self._EpisodeHTMLDict[key]
|
return self._EpisodeHTMLDict[(key, season, episode)]
|
||||||
|
|
||||||
def _get_provider_from_html(self, season: int, episode: int, key: str) -> dict:
|
def _get_provider_from_html(self, season: int, episode: int, key: str) -> dict:
|
||||||
"""
|
"""
|
||||||
|
|||||||
20
src/Main.py
20
src/Main.py
@ -6,6 +6,7 @@ import SerieList
|
|||||||
import SerieScanner
|
import SerieScanner
|
||||||
from src.Loaders.Loaders import Loaders
|
from src.Loaders.Loaders import Loaders
|
||||||
from src.Serie import Serie
|
from src.Serie import Serie
|
||||||
|
import time
|
||||||
|
|
||||||
# Configure logging
|
# Configure logging
|
||||||
logging.basicConfig(level=logging.FATAL, format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
logging.basicConfig(level=logging.FATAL, format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s')
|
||||||
@ -55,7 +56,8 @@ class SeriesApp:
|
|||||||
print(f"{i}. {serie}")
|
print(f"{i}. {serie}")
|
||||||
|
|
||||||
def search(self, words :str) -> list:
|
def search(self, words :str) -> list:
|
||||||
return AniWorldLoader.search_anime(words)
|
loader = self.Loaders.GetLoader(key="aniworld.to")
|
||||||
|
return loader.Search(words)
|
||||||
|
|
||||||
def get_user_selection(self):
|
def get_user_selection(self):
|
||||||
"""Handle user input for selecting series."""
|
"""Handle user input for selecting series."""
|
||||||
@ -91,6 +93,20 @@ class SeriesApp:
|
|||||||
bar = "@" * filled_length + "-" * (length - filled_length)
|
bar = "@" * filled_length + "-" * (length - filled_length)
|
||||||
return f"[{bar}] {current} / {total}"
|
return f"[{bar}] {current} / {total}"
|
||||||
|
|
||||||
|
def retry(self, func, max_retries=3, delay=2, *args, **kwargs):
|
||||||
|
for attempt in range(1, max_retries + 1):
|
||||||
|
try:
|
||||||
|
func(*args, **kwargs)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Attempt {attempt} failed: {e}")
|
||||||
|
if attempt == max_retries:
|
||||||
|
print("All attempts failed.")
|
||||||
|
else:
|
||||||
|
print(f"Retrying in {delay} seconds...\n")
|
||||||
|
time.sleep(delay)
|
||||||
|
return False
|
||||||
|
|
||||||
def download_series(self, series):
|
def download_series(self, series):
|
||||||
"""Simulate the downloading process with a progress bar."""
|
"""Simulate the downloading process with a progress bar."""
|
||||||
total_downloaded = 0
|
total_downloaded = 0
|
||||||
@ -107,7 +123,7 @@ class SeriesApp:
|
|||||||
loader = self.Loaders.GetLoader(key="aniworld.to")
|
loader = self.Loaders.GetLoader(key="aniworld.to")
|
||||||
if loader.IsLanguage(season, episode, serie.key):
|
if loader.IsLanguage(season, episode, serie.key):
|
||||||
print(f"\ndownload {serie.folder} {season} {episode}\n")
|
print(f"\ndownload {serie.folder} {season} {episode}\n")
|
||||||
loader.Download(self.directory_to_search, serie.folder, season, episode, serie.key)
|
self.retry(loader.Download, 3, 1, self.directory_to_search, serie.folder, season, episode, serie.key)
|
||||||
|
|
||||||
downloaded += 1
|
downloaded += 1
|
||||||
total_downloaded += 1
|
total_downloaded += 1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user