feat: add NFO scan after rescan and year caching

- Add nfo_scan_after_rescan config option (default: true)
- Implement year caching in AniworldLoader and EnhancedAniWorldLoader
- Make get_year abstract method in base provider
- Run NFO validation/creation after scheduled rescan completes
- Add _YearDict cache to avoid re-extracting year from HTML
This commit is contained in:
2026-06-05 18:15:41 +02:00
parent 8b21f1243f
commit e74b04c1ee
10 changed files with 839 additions and 35 deletions

View File

@@ -158,6 +158,7 @@ class AniworldLoader(Loader):
self._KeyHTMLDict = {}
self._EpisodeHTMLDict = {}
self._YearDict = {}
self.Providers = Providers()
# Events: download_progress is triggered with progress dict
@@ -774,55 +775,81 @@ class AniworldLoader(Loader):
if span_tag:
title = span_tag.text
logger.debug("Found title: %s", title)
# Also try to extract year from sibling p tag "Jahr: {year}"
# Year is typically right after title in the HTML structure
year = self._extract_year_from_soup(soup)
if year is not None:
self._YearDict[key] = year
logger.debug("Cached year %d for key: %s", year, key)
return title
logger.warning("No title found for key: %s", key)
return ""
def _extract_year_from_soup(self, soup: BeautifulSoup) -> int | None:
"""Extract year from BeautifulSoup object.
Looks for 'Jahr: {year}' pattern in p tags adjacent to series-title.
Args:
soup: Parsed BeautifulSoup object
Returns:
Year as int or None if not found
"""
# Try to find year in metadata
for p_tag in soup.find_all('p'):
text = p_tag.get_text()
if 'Jahr:' in text or 'Year:' in text:
match = re.search(r'(\d{4})', text)
if match:
return int(match.group(1))
# Fallback: look in series-info div
info_div = soup.find('div', class_='series-info')
if info_div:
text = info_div.get_text()
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
if match:
return int(match.group(1))
return None
def get_year(self, key: str) -> int | None:
"""Get anime release year from series key.
Attempts to extract the year from the series page metadata.
Returns None if year cannot be determined.
Uses cached year from get_title if available,
otherwise extracts and caches it.
Args:
key: Series identifier
Returns:
int or None: Release year if found, None otherwise
Release year or None if not found
"""
logger.debug("Getting year for key: %s", key)
# Check cache first
if key in self._YearDict:
logger.debug("Using cached year %d for key: %s", self._YearDict[key], key)
return self._YearDict[key]
# Not cached - extract from HTML
try:
soup = BeautifulSoup(
_decode_html_content(self._get_key_html(key).content),
'html.parser'
)
# Try to find year in metadata
# Check for "Jahr:" or similar metadata fields
for p_tag in soup.find_all('p'):
text = p_tag.get_text()
if 'Jahr:' in text or 'Year:' in text:
# Extract year from text like "Jahr: 2025"
match = re.search(r'(\d{4})', text)
if match:
year = int(match.group(1))
logger.debug("Found year in metadata: %s", year)
return year
# Try alternative: look for year in genre/info section
info_div = soup.find('div', class_='series-info')
if info_div:
text = info_div.get_text()
match = re.search(r'\b(19\d{2}|20\d{2})\b', text)
if match:
year = int(match.group(1))
logger.debug("Found year in info section: %s", year)
return year
logger.debug("No year found for key: %s", key)
return None
year = self._extract_year_from_soup(soup)
if year is not None:
self._YearDict[key] = year
logger.debug("Found and cached year %d for key: %s", year, key)
return year
except Exception as e:
logger.warning("Error extracting year for key %s: %s", key, e)
return None