feat: implement NFO ID storage and media scan tracking

Task 3 (NFO data):
- Add parse_nfo_ids() method to NFOService
- Extract TMDB/TVDB IDs from NFO files during scan
- Update database with extracted IDs
- Add comprehensive unit and integration tests

Task 4 (Media scan):
- Track initial media scan with SystemSettings flag
- Run background loading only on first startup
- Skip media scan on subsequent runs
This commit is contained in:
2026-01-21 19:36:54 +01:00
parent 050db40af3
commit 125892abe5
6 changed files with 572 additions and 43 deletions

View File

@@ -287,6 +287,87 @@ class NFOService:
return nfo_path
def parse_nfo_ids(self, nfo_path: Path) -> Dict[str, Optional[int]]:
"""Parse TMDB ID and TVDB ID from an existing NFO file.
Args:
nfo_path: Path to tvshow.nfo file
Returns:
Dictionary with 'tmdb_id' and 'tvdb_id' keys.
Values are integers if found, None otherwise.
Example:
>>> ids = nfo_service.parse_nfo_ids(Path("/anime/series/tvshow.nfo"))
>>> print(ids)
{'tmdb_id': 1429, 'tvdb_id': 79168}
"""
result = {"tmdb_id": None, "tvdb_id": None}
if not nfo_path.exists():
logger.debug(f"NFO file not found: {nfo_path}")
return result
try:
tree = etree.parse(str(nfo_path))
root = tree.getroot()
# Try to find TMDB ID from uniqueid elements first
for uniqueid in root.findall(".//uniqueid"):
uid_type = uniqueid.get("type")
uid_text = uniqueid.text
if uid_type == "tmdb" and uid_text:
try:
result["tmdb_id"] = int(uid_text)
except ValueError:
logger.warning(
f"Invalid TMDB ID format in NFO: {uid_text}"
)
elif uid_type == "tvdb" and uid_text:
try:
result["tvdb_id"] = int(uid_text)
except ValueError:
logger.warning(
f"Invalid TVDB ID format in NFO: {uid_text}"
)
# Fallback: check for dedicated tmdbid/tvdbid elements
if result["tmdb_id"] is None:
tmdbid_elem = root.find(".//tmdbid")
if tmdbid_elem is not None and tmdbid_elem.text:
try:
result["tmdb_id"] = int(tmdbid_elem.text)
except ValueError:
logger.warning(
f"Invalid TMDB ID format in tmdbid element: "
f"{tmdbid_elem.text}"
)
if result["tvdb_id"] is None:
tvdbid_elem = root.find(".//tvdbid")
if tvdbid_elem is not None and tvdbid_elem.text:
try:
result["tvdb_id"] = int(tvdbid_elem.text)
except ValueError:
logger.warning(
f"Invalid TVDB ID format in tvdbid element: "
f"{tvdbid_elem.text}"
)
logger.debug(
f"Parsed IDs from NFO: {nfo_path.name} - "
f"TMDB: {result['tmdb_id']}, TVDB: {result['tvdb_id']}"
)
except etree.XMLSyntaxError as e:
logger.error(f"Invalid XML in NFO file {nfo_path}: {e}")
except Exception as e: # pylint: disable=broad-except
logger.error(f"Error parsing NFO file {nfo_path}: {e}")
return result
def _find_best_match(
self,
results: List[Dict[str, Any]],

View File

@@ -102,21 +102,84 @@ class SeriesManagerService:
image_size=settings.nfo_image_size
)
async def process_nfo_for_series(self, serie_folder: str, serie_name: str, year: Optional[int] = None):
async def process_nfo_for_series(
self,
serie_folder: str,
serie_name: str,
serie_key: str,
year: Optional[int] = None,
db=None
):
"""Process NFO file for a series (create or update).
Args:
serie_folder: Series folder name
serie_name: Series display name
serie_key: Series unique identifier for database updates
year: Release year (helps with TMDB matching)
db: Optional database session for updating IDs
"""
if not self.nfo_service:
return
try:
folder_path = Path(self.anime_directory) / serie_folder
nfo_path = folder_path / "tvshow.nfo"
nfo_exists = await self.nfo_service.check_nfo_exists(serie_folder)
# If NFO exists, parse IDs and update database
if nfo_exists and db:
logger.debug(f"Parsing IDs from existing NFO for '{serie_name}'")
ids = self.nfo_service.parse_nfo_ids(nfo_path)
if ids["tmdb_id"] or ids["tvdb_id"]:
# Update database with extracted IDs
from datetime import datetime, timezone
from sqlalchemy import select
from src.server.database.models import AnimeSeries
result = await db.execute(
select(AnimeSeries).filter(AnimeSeries.key == serie_key)
)
series = result.scalars().first()
if series:
now = datetime.now(timezone.utc)
series.has_nfo = True
if series.nfo_created_at is None:
series.nfo_created_at = now
series.nfo_updated_at = now
if ids["tmdb_id"] is not None:
series.tmdb_id = ids["tmdb_id"]
logger.debug(
f"Updated TMDB ID for '{serie_name}': "
f"{ids['tmdb_id']}"
)
if ids["tvdb_id"] is not None:
series.tvdb_id = ids["tvdb_id"]
logger.debug(
f"Updated TVDB ID for '{serie_name}': "
f"{ids['tvdb_id']}"
)
await db.commit()
logger.info(
f"Updated database with IDs from NFO for "
f"'{serie_name}' - TMDB: {ids['tmdb_id']}, "
f"TVDB: {ids['tvdb_id']}"
)
else:
logger.warning(
f"Series not found in database for NFO ID update: "
f"{serie_key}"
)
# Create or update NFO file if configured
if not nfo_exists and self.auto_create_nfo:
logger.info(f"Creating NFO for '{serie_name}' ({serie_folder})")
await self.nfo_service.create_tvshow_nfo(
@@ -156,9 +219,10 @@ class SeriesManagerService:
This method:
1. Uses SerieList to scan series folders
2. For each series without NFO (if auto_create=True), creates one
3. For each series with NFO (if update_on_scan=True), updates it
4. Runs operations concurrently for better performance
2. For each series with existing NFO, reads TMDB/TVDB IDs and updates database
3. For each series without NFO (if auto_create=True), creates one
4. For each series with NFO (if update_on_scan=True), updates it
5. Runs operations concurrently for better performance
"""
if not self.nfo_service:
logger.info("NFO service not enabled, skipping NFO processing")
@@ -173,30 +237,37 @@ class SeriesManagerService:
logger.info(f"Processing NFO for {len(all_series)} series...")
# Create tasks for concurrent processing
tasks = []
for serie in all_series:
# Extract year from first air date if available
year = None
if hasattr(serie, 'year') and serie.year:
year = serie.year
# Import database session
from src.server.database.connection import get_db_session
# Create database session for ID updates
async with get_db_session() as db:
# Create tasks for concurrent processing
tasks = []
for serie in all_series:
# Extract year from first air date if available
year = None
if hasattr(serie, 'year') and serie.year:
year = serie.year
task = self.process_nfo_for_series(
serie_folder=serie.folder,
serie_name=serie.name,
serie_key=serie.key,
year=year,
db=db
)
tasks.append(task)
task = self.process_nfo_for_series(
serie_folder=serie.folder,
serie_name=serie.name,
year=year
)
tasks.append(task)
# Process in batches to avoid overwhelming TMDB API
batch_size = 5
for i in range(0, len(tasks), batch_size):
batch = tasks[i:i + batch_size]
await asyncio.gather(*batch, return_exceptions=True)
# Small delay between batches to respect rate limits
if i + batch_size < len(tasks):
await asyncio.sleep(2)
# Process in batches to avoid overwhelming TMDB API
batch_size = 5
for i in range(0, len(tasks), batch_size):
batch = tasks[i:i + batch_size]
await asyncio.gather(*batch, return_exceptions=True)
# Small delay between batches to respect rate limits
if i + batch_size < len(tasks):
await asyncio.sleep(2)
logger.info("NFO processing complete")

View File

@@ -341,8 +341,52 @@ async def lifespan(_application: FastAPI):
await background_loader.start()
logger.info("Background loader service started")
# Check for incomplete series and queue background loading
await _check_incomplete_series_on_startup(background_loader)
# Check if initial media scan has been completed
is_media_scan_done = False
try:
async with get_db_session() as db:
is_media_scan_done = (
await SystemSettingsService
.is_initial_media_scan_completed(db)
)
except Exception as e:
logger.warning(
"Failed to check media scan status: %s, assuming not done",
e
)
is_media_scan_done = False
# Run media scan only on first run
if not is_media_scan_done:
logger.info("Performing initial media scan...")
try:
# Check for incomplete series and queue background loading
await _check_incomplete_series_on_startup(background_loader)
logger.info("Initial media scan completed")
# Mark media scan as completed
try:
async with get_db_session() as db:
await (
SystemSettingsService
.mark_initial_media_scan_completed(db)
)
logger.info("Marked media scan as completed")
except Exception as e:
logger.warning(
"Failed to mark media scan as completed: %s",
e
)
except Exception as e:
logger.error(
"Failed to complete media scan: %s",
e,
exc_info=True
)
else:
logger.info(
"Skipping media scan - already completed on previous run"
)
else:
logger.info(
"Download service initialization skipped - "