From 40ffb99c972da6c94f0d5db023ac5168cca4b707 Mon Sep 17 00:00:00 2001 From: Lukas Date: Sun, 11 Jan 2026 19:47:47 +0100 Subject: [PATCH] Add year support to anime folder names - Add year property to Serie entity with name_with_year - Add year column to AnimeSeries database model - Add get_year() method to AniworldLoader provider - Extract year from folder names before fetching from API - Update SerieScanner to populate year during rescan - Update add_series endpoint to fetch and store year - Optimize: check folder name for year before API call --- src/core/SerieScanner.py | 98 ++++++++++++++++++++++++- src/core/entities/series.py | 68 +++++++++++++---- src/core/providers/aniworld_provider.py | 48 ++++++++++++ src/server/api/anime.py | 34 +++++++-- src/server/database/models.py | 4 + src/server/database/service.py | 5 +- src/server/services/anime_service.py | 12 ++- 7 files changed, 241 insertions(+), 28 deletions(-) diff --git a/src/core/SerieScanner.py b/src/core/SerieScanner.py index 082dfef..43b6f69 100644 --- a/src/core/SerieScanner.py +++ b/src/core/SerieScanner.py @@ -117,6 +117,44 @@ class SerieScanner: if handler in self.events.on_progress: self.events.on_progress.remove(handler) + def _extract_year_from_folder_name(self, folder_name: str) -> int | None: + """Extract year from folder name if present. + + Looks for year in format "(YYYY)" at the end of folder name. + + Args: + folder_name: The folder name to check + + Returns: + int or None: Year if found, None otherwise + + Example: + >>> _extract_year_from_folder_name("Dororo (2025)") + 2025 + >>> _extract_year_from_folder_name("Dororo") + None + """ + if not folder_name: + return None + + # Look for year in format (YYYY) - typically at end of name + match = re.search(r'\((\d{4})\)', folder_name) + if match: + try: + year = int(match.group(1)) + # Validate year is reasonable (between 1900 and 2100) + if 1900 <= year <= 2100: + logger.debug( + "Extracted year from folder name: %s -> %d", + folder_name, + year + ) + return year + except ValueError: + pass + + return None + def subscribe_on_error(self, handler): """ Subscribe a handler to an event. @@ -235,6 +273,33 @@ class SerieScanner: and serie.key and serie.key.strip() ): + # Try to extract year from folder name first + if not hasattr(serie, 'year') or not serie.year: + year_from_folder = self._extract_year_from_folder_name(folder) + if year_from_folder: + serie.year = year_from_folder + logger.info( + "Using year from folder name: %s (year=%d)", + folder, + year_from_folder + ) + else: + # If not in folder name, fetch from provider + try: + serie.year = self.loader.get_year(serie.key) + if serie.year: + logger.info( + "Fetched year from provider: %s (year=%d)", + serie.key, + serie.year + ) + except Exception as e: + logger.warning( + "Could not fetch year for %s: %s", + serie.key, + str(e) + ) + # Delegate the provider to compare local files with # remote metadata, yielding missing episodes per # season. Results are saved back to disk so that both @@ -611,19 +676,46 @@ class SerieScanner: sum(len(eps) for eps in missing_episodes.values()) ) else: + # Try to extract year from folder name first + year = self._extract_year_from_folder_name(folder) + if year: + logger.info( + "Using year from folder name: %s (year=%d)", + folder, + year + ) + else: + # If not in folder name, fetch from provider + try: + year = self.loader.get_year(key) + if year: + logger.info( + "Fetched year from provider: %s (year=%d)", + key, + year + ) + except Exception as e: + logger.warning( + "Could not fetch year for %s: %s", + key, + str(e) + ) + # Create new serie entry serie = Serie( key=key, name="", # Will be populated by caller if needed site=site, folder=folder, - episodeDict=missing_episodes + episodeDict=missing_episodes, + year=year ) self.keyDict[key] = serie logger.debug( - "Created new series entry for %s with %d missing episodes", + "Created new series entry for %s with %d missing episodes (year=%s)", key, - sum(len(eps) for eps in missing_episodes.values()) + sum(len(eps) for eps in missing_episodes.values()), + year ) # Notify completion diff --git a/src/core/entities/series.py b/src/core/entities/series.py index 1d8ad7c..c5ab7b4 100644 --- a/src/core/entities/series.py +++ b/src/core/entities/series.py @@ -22,6 +22,7 @@ class Serie: e.g., "Attack on Titan (2013)") episodeDict: Dictionary mapping season numbers to lists of episode numbers + year: Release year of the series (optional) Raises: ValueError: If key is None or empty string @@ -33,7 +34,8 @@ class Serie: name: str, site: str, folder: str, - episodeDict: dict[int, list[int]] + episodeDict: dict[int, list[int]], + year: int | None = None ): if not key or not key.strip(): raise ValueError("Serie key cannot be None or empty") @@ -43,13 +45,15 @@ class Serie: self._site = site self._folder = folder self._episodeDict = episodeDict + self._year = year def __str__(self): """String representation of Serie object""" + year_str = f", year={self.year}" if self.year else "" return ( f"Serie(key='{self.key}', name='{self.name}', " f"site='{self.site}', folder='{self.folder}', " - f"episodeDict={self.episodeDict})" + f"episodeDict={self.episodeDict}{year_str})" ) @property @@ -129,29 +133,65 @@ class Serie: def episodeDict(self, value: dict[int, list[int]]): self._episodeDict = value + @property + def year(self) -> int | None: + """ + Release year of the series. + + Returns: + int or None: The year the series was released, or None if unknown + """ + return self._year + + @year.setter + def year(self, value: int | None): + """Set the release year of the series.""" + self._year = value + + @property + def name_with_year(self) -> str: + """ + Get the series name with year appended if available. + + Returns a name in the format "Name (Year)" if year is available, + otherwise returns just the name. This should be used for creating + filesystem folders to distinguish series with the same name. + + Returns: + str: Name with year in format "Name (Year)", or just name if no year + + Example: + >>> serie = Serie("dororo", "Dororo", ..., year=2025) + >>> serie.name_with_year + 'Dororo (2025)' + """ + if self._year: + return f"{self._name} ({self._year})" + return self._name + @property def sanitized_folder(self) -> str: """ - Get a filesystem-safe folder name derived from the display name. + Get a filesystem-safe folder name derived from the display name with year. - This property returns a sanitized version of the series name - suitable for use as a filesystem folder name. It removes/replaces - characters that are invalid for filesystems while preserving + This property returns a sanitized version of the series name with year + (if available) suitable for use as a filesystem folder name. It removes/ + replaces characters that are invalid for filesystems while preserving Unicode characters. Use this property when creating folders for the series on disk. The `folder` property stores the actual folder name used. Returns: - str: Filesystem-safe folder name based on display name + str: Filesystem-safe folder name based on display name with year Example: - >>> serie = Serie("attack-on-titan", "Attack on Titan: Final", ...) + >>> serie = Serie("attack-on-titan", "Attack on Titan: Final", ..., year=2025) >>> serie.sanitized_folder - 'Attack on Titan Final' + 'Attack on Titan Final (2025)' """ - # Use name if available, fall back to folder, then key - name_to_sanitize = self._name or self._folder or self._key + # Use name_with_year if available, fall back to folder, then key + name_to_sanitize = self.name_with_year or self._folder or self._key try: return sanitize_folder_name(name_to_sanitize) except ValueError: @@ -167,7 +207,8 @@ class Serie: "folder": self.folder, "episodeDict": { str(k): list(v) for k, v in self.episodeDict.items() - } + }, + "year": self.year } @staticmethod @@ -182,7 +223,8 @@ class Serie: data["name"], data["site"], data["folder"], - episode_dict + episode_dict, + data.get("year") # Optional year field for backward compatibility ) def save_to_file(self, filename: str): diff --git a/src/core/providers/aniworld_provider.py b/src/core/providers/aniworld_provider.py index 2de9bd5..65d169e 100644 --- a/src/core/providers/aniworld_provider.py +++ b/src/core/providers/aniworld_provider.py @@ -380,6 +380,54 @@ class AniworldLoader(Loader): logging.warning(f"No title found for key: {key}") return "" + def get_year(self, key: str) -> int | None: + """Get anime release year from series key. + + Attempts to extract the year from the series page metadata. + Returns None if year cannot be determined. + + Args: + key: Series identifier + + Returns: + int or None: Release year if found, None otherwise + """ + logging.debug(f"Getting year for key: {key}") + try: + soup = BeautifulSoup( + self._get_key_html(key).content, + 'html.parser' + ) + + # Try to find year in metadata + # Check for "Jahr:" or similar metadata fields + for p_tag in soup.find_all('p'): + text = p_tag.get_text() + if 'Jahr:' in text or 'Year:' in text: + # Extract year from text like "Jahr: 2025" + match = re.search(r'(\d{4})', text) + if match: + year = int(match.group(1)) + logging.debug(f"Found year in metadata: {year}") + return year + + # Try alternative: look for year in genre/info section + info_div = soup.find('div', class_='series-info') + if info_div: + text = info_div.get_text() + match = re.search(r'\b(19\d{2}|20\d{2})\b', text) + if match: + year = int(match.group(1)) + logging.debug(f"Found year in info section: {year}") + return year + + logging.debug(f"No year found for key: {key}") + return None + + except Exception as e: + logging.warning(f"Error extracting year for key {key}: {e}") + return None + def _get_key_html(self, key: str): """Get cached HTML for series key. diff --git a/src/server/api/anime.py b/src/server/api/anime.py index 940d222..5f4cd45 100644 --- a/src/server/api/anime.py +++ b/src/server/api/anime.py @@ -693,10 +693,26 @@ async def add_series( detail="Could not extract series key from link", ) - # Step B: Create sanitized folder name from display name + # Step B: Fetch year from provider and create folder name with year name = request.name.strip() + + # Fetch year from provider + year = None + if series_app and hasattr(series_app, 'loader'): + try: + year = series_app.loader.get_year(key) + logger.info(f"Fetched year for {key}: {year}") + except Exception as e: + logger.warning(f"Could not fetch year for {key}: {e}") + + # Create folder name with year if available + if year: + folder_name_with_year = f"{name} ({year})" + else: + folder_name_with_year = name + try: - folder = sanitize_folder_name(name) + folder = sanitize_folder_name(folder_name_with_year) except ValueError as e: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, @@ -729,14 +745,16 @@ async def add_series( name=name, site="aniworld.to", folder=folder, + year=year, ) db_id = anime_series.id logger.info( - "Added series to database: %s (key=%s, db_id=%d)", + "Added series to database: %s (key=%s, db_id=%d, year=%s)", name, key, - db_id + db_id, + year ) # Step D: Add to SerieList (in-memory only, no folder creation) @@ -746,17 +764,19 @@ async def add_series( name=name, site="aniworld.to", folder=folder, - episodeDict={} + episodeDict={}, + year=year ) # Add to in-memory cache without creating folder on disk if hasattr(series_app.list, 'keyDict'): series_app.list.keyDict[key] = serie logger.info( - "Added series to in-memory cache: %s (key=%s, folder=%s)", + "Added series to in-memory cache: %s (key=%s, folder=%s, year=%s)", name, key, - folder + folder, + year ) # Step E: Trigger targeted scan for missing episodes diff --git a/src/server/database/models.py b/src/server/database/models.py index 6d58ceb..8694759 100644 --- a/src/server/database/models.py +++ b/src/server/database/models.py @@ -73,6 +73,10 @@ class AnimeSeries(Base, TimestampMixin): String(1000), nullable=False, doc="Filesystem folder name - METADATA ONLY, not for lookups" ) + year: Mapped[Optional[int]] = mapped_column( + Integer, nullable=True, + doc="Release year of the series" + ) # Relationships episodes: Mapped[List["Episode"]] = relationship( diff --git a/src/server/database/service.py b/src/server/database/service.py index 5b13f9c..1f5813b 100644 --- a/src/server/database/service.py +++ b/src/server/database/service.py @@ -64,6 +64,7 @@ class AnimeSeriesService: name: str, site: str, folder: str, + year: int | None = None, ) -> AnimeSeries: """Create a new anime series. @@ -73,6 +74,7 @@ class AnimeSeriesService: name: Series name site: Provider site URL folder: Local filesystem path + year: Release year (optional) Returns: Created AnimeSeries instance @@ -85,11 +87,12 @@ class AnimeSeriesService: name=name, site=site, folder=folder, + year=year, ) db.add(series) await db.flush() await db.refresh(series) - logger.info(f"Created anime series: {series.name} (key={series.key})") + logger.info(f"Created anime series: {series.name} (key={series.key}, year={year})") return series @staticmethod diff --git a/src/server/services/anime_service.py b/src/server/services/anime_service.py index 51d1cc5..557ed5c 100644 --- a/src/server/services/anime_service.py +++ b/src/server/services/anime_service.py @@ -594,6 +594,7 @@ class AnimeService: name=serie.name, site=serie.site, folder=serie.folder, + year=serie.year if hasattr(serie, 'year') else None, ) # Create Episode records @@ -608,9 +609,10 @@ class AnimeService: ) logger.debug( - "Created series in database: %s (key=%s)", + "Created series in database: %s (key=%s, year=%s)", serie.name, - serie.key + serie.key, + serie.year if hasattr(serie, 'year') else None ) async def _update_series_in_db(self, serie, existing, db) -> None: @@ -768,6 +770,7 @@ class AnimeService: name=serie.name, site=serie.site, folder=serie.folder, + year=serie.year if hasattr(serie, 'year') else None, ) # Create Episode records for each episode in episodeDict @@ -782,9 +785,10 @@ class AnimeService: ) logger.info( - "Added series to database: %s (key=%s)", + "Added series to database: %s (key=%s, year=%s)", serie.name, - serie.key + serie.key, + serie.year if hasattr(serie, 'year') else None ) return anime_series