feat: add NFO scan after rescan and year caching
- Add nfo_scan_after_rescan config option (default: true) - Implement year caching in AniworldLoader and EnhancedAniWorldLoader - Make get_year abstract method in base provider - Run NFO validation/creation after scheduled rescan completes - Add _YearDict cache to avoid re-extracting year from HTML
This commit is contained in:
@@ -110,6 +110,7 @@ class EnhancedAniWorldLoader(Loader):
|
||||
# Cache dictionaries
|
||||
self._KeyHTMLDict = {}
|
||||
self._EpisodeHTMLDict = {}
|
||||
self._YearDict = {}
|
||||
|
||||
# Provider manager
|
||||
self.Providers = Providers()
|
||||
@@ -666,6 +667,10 @@ class EnhancedAniWorldLoader(Loader):
|
||||
if title_span:
|
||||
span = title_span.find('span')
|
||||
if span:
|
||||
# Extract and cache year from soup if available
|
||||
year = self._ExtractYearFromSoup(soup)
|
||||
if year is not None:
|
||||
self._YearDict[key] = year
|
||||
return span.text.strip()
|
||||
|
||||
self.logger.warning("Could not extract title for key: %s", key)
|
||||
@@ -674,7 +679,62 @@ class EnhancedAniWorldLoader(Loader):
|
||||
except Exception as e:
|
||||
self.logger.error("Failed to get title for key %s: %s", key, e)
|
||||
raise RetryableError(f"Title extraction failed: {e}") from e
|
||||
|
||||
|
||||
def _ExtractYearFromSoup(self, soup: BeautifulSoup) -> int | None:
|
||||
"""Extract year from parsed BeautifulSoup.
|
||||
|
||||
Looks for 'Jahr: {year}' pattern in p tags.
|
||||
|
||||
Args:
|
||||
soup: Parsed BeautifulSoup object
|
||||
|
||||
Returns:
|
||||
Year as int or None if not found
|
||||
"""
|
||||
for p_tag in soup.find_all('p'):
|
||||
text = p_tag.get_text()
|
||||
if 'Jahr:' in text or 'Year:' in text:
|
||||
match = re.search(r'(\d{4})', text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
info_div = soup.find('div', class_='series-info')
|
||||
if info_div:
|
||||
text = info_div.get_text()
|
||||
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
|
||||
if match:
|
||||
return int(match.group(1))
|
||||
|
||||
return None
|
||||
|
||||
def GetYear(self, key: str) -> int | None:
|
||||
"""Get anime release year from series key.
|
||||
|
||||
Uses cached year from GetTitle if available,
|
||||
otherwise extracts and caches it.
|
||||
|
||||
Args:
|
||||
key: Series identifier
|
||||
|
||||
Returns:
|
||||
Release year or None if not found
|
||||
"""
|
||||
# Check cache first
|
||||
if key in self._YearDict:
|
||||
return self._YearDict[key]
|
||||
|
||||
# Not cached - extract from HTML
|
||||
try:
|
||||
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
|
||||
year = self._ExtractYearFromSoup(soup)
|
||||
if year is not None:
|
||||
self._YearDict[key] = year
|
||||
return year
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning("Error extracting year for key %s: %s", key, e)
|
||||
return None
|
||||
|
||||
def GetSiteKey(self) -> str:
|
||||
"""Get site identifier."""
|
||||
return "aniworld.to"
|
||||
|
||||
Reference in New Issue
Block a user