feat: add NFO scan after rescan and year caching

- Add nfo_scan_after_rescan config option (default: true)
- Implement year caching in AniworldLoader and EnhancedAniWorldLoader
- Make get_year abstract method in base provider
- Run NFO validation/creation after scheduled rescan completes
- Add _YearDict cache to avoid re-extracting year from HTML
This commit is contained in:
2026-06-05 18:15:41 +02:00
parent 8b21f1243f
commit e74b04c1ee
10 changed files with 839 additions and 35 deletions

View File

@@ -110,6 +110,7 @@ class EnhancedAniWorldLoader(Loader):
# Cache dictionaries
self._KeyHTMLDict = {}
self._EpisodeHTMLDict = {}
self._YearDict = {}
# Provider manager
self.Providers = Providers()
@@ -666,6 +667,10 @@ class EnhancedAniWorldLoader(Loader):
if title_span:
span = title_span.find('span')
if span:
# Extract and cache year from soup if available
year = self._ExtractYearFromSoup(soup)
if year is not None:
self._YearDict[key] = year
return span.text.strip()
self.logger.warning("Could not extract title for key: %s", key)
@@ -674,7 +679,62 @@ class EnhancedAniWorldLoader(Loader):
except Exception as e:
self.logger.error("Failed to get title for key %s: %s", key, e)
raise RetryableError(f"Title extraction failed: {e}") from e
def _ExtractYearFromSoup(self, soup: BeautifulSoup) -> int | None:
"""Extract year from parsed BeautifulSoup.
Looks for 'Jahr: {year}' pattern in p tags.
Args:
soup: Parsed BeautifulSoup object
Returns:
Year as int or None if not found
"""
for p_tag in soup.find_all('p'):
text = p_tag.get_text()
if 'Jahr:' in text or 'Year:' in text:
match = re.search(r'(\d{4})', text)
if match:
return int(match.group(1))
info_div = soup.find('div', class_='series-info')
if info_div:
text = info_div.get_text()
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
if match:
return int(match.group(1))
return None
def GetYear(self, key: str) -> int | None:
"""Get anime release year from series key.
Uses cached year from GetTitle if available,
otherwise extracts and caches it.
Args:
key: Series identifier
Returns:
Release year or None if not found
"""
# Check cache first
if key in self._YearDict:
return self._YearDict[key]
# Not cached - extract from HTML
try:
soup = BeautifulSoup(self._GetKeyHTML(key).content, 'html.parser')
year = self._ExtractYearFromSoup(soup)
if year is not None:
self._YearDict[key] = year
return year
except Exception as e:
self.logger.warning("Error extracting year for key %s: %s", key, e)
return None
def GetSiteKey(self) -> str:
"""Get site identifier."""
return "aniworld.to"