feat: implement NFO ID storage and media scan tracking

Task 3 (NFO data): - Add parse_nfo_ids() method to NFOService - Extract TMDB/TVDB IDs from NFO files during scan - Update database with extracted IDs - Add comprehensive unit and integration tests Task 4 (Media scan): - Track initial media scan with SystemSettings flag - Run background loading only on first startup - Skip media scan on subsequent runs
2026-01-21 19:36:54 +01:00
parent 050db40af3
commit 125892abe5
6 changed files with 572 additions and 43 deletions
--- a/src/core/services/nfo_service.py
+++ b/src/core/services/nfo_service.py
@@ -287,6 +287,87 @@ class NFOService:
            
            return nfo_path
    
+    def parse_nfo_ids(self, nfo_path: Path) -> Dict[str, Optional[int]]:
+        """Parse TMDB ID and TVDB ID from an existing NFO file.
+        
+        Args:
+            nfo_path: Path to tvshow.nfo file
+            
+        Returns:
+            Dictionary with 'tmdb_id' and 'tvdb_id' keys.
+            Values are integers if found, None otherwise.
+            
+        Example:
+            >>> ids = nfo_service.parse_nfo_ids(Path("/anime/series/tvshow.nfo"))
+            >>> print(ids)
+            {'tmdb_id': 1429, 'tvdb_id': 79168}
+        """
+        result = {"tmdb_id": None, "tvdb_id": None}
+        
+        if not nfo_path.exists():
+            logger.debug(f"NFO file not found: {nfo_path}")
+            return result
+        
+        try:
+            tree = etree.parse(str(nfo_path))
+            root = tree.getroot()
+            
+            # Try to find TMDB ID from uniqueid elements first
+            for uniqueid in root.findall(".//uniqueid"):
+                uid_type = uniqueid.get("type")
+                uid_text = uniqueid.text
+                
+                if uid_type == "tmdb" and uid_text:
+                    try:
+                        result["tmdb_id"] = int(uid_text)
+                    except ValueError:
+                        logger.warning(
+                            f"Invalid TMDB ID format in NFO: {uid_text}"
+                        )
+                
+                elif uid_type == "tvdb" and uid_text:
+                    try:
+                        result["tvdb_id"] = int(uid_text)
+                    except ValueError:
+                        logger.warning(
+                            f"Invalid TVDB ID format in NFO: {uid_text}"
+                        )
+            
+            # Fallback: check for dedicated tmdbid/tvdbid elements
+            if result["tmdb_id"] is None:
+                tmdbid_elem = root.find(".//tmdbid")
+                if tmdbid_elem is not None and tmdbid_elem.text:
+                    try:
+                        result["tmdb_id"] = int(tmdbid_elem.text)
+                    except ValueError:
+                        logger.warning(
+                            f"Invalid TMDB ID format in tmdbid element: "
+                            f"{tmdbid_elem.text}"
+                        )
+            
+            if result["tvdb_id"] is None:
+                tvdbid_elem = root.find(".//tvdbid")
+                if tvdbid_elem is not None and tvdbid_elem.text:
+                    try:
+                        result["tvdb_id"] = int(tvdbid_elem.text)
+                    except ValueError:
+                        logger.warning(
+                            f"Invalid TVDB ID format in tvdbid element: "
+                            f"{tvdbid_elem.text}"
+                        )
+            
+            logger.debug(
+                f"Parsed IDs from NFO: {nfo_path.name} - "
+                f"TMDB: {result['tmdb_id']}, TVDB: {result['tvdb_id']}"
+            )
+            
+        except etree.XMLSyntaxError as e:
+            logger.error(f"Invalid XML in NFO file {nfo_path}: {e}")
+        except Exception as e:  # pylint: disable=broad-except
+            logger.error(f"Error parsing NFO file {nfo_path}: {e}")
+        
+        return result
+    
    def _find_best_match(
        self,
        results: List[Dict[str, Any]],
--- a/src/core/services/series_manager_service.py
+++ b/src/core/services/series_manager_service.py
@@ -102,21 +102,84 @@ class SeriesManagerService:
            image_size=settings.nfo_image_size
        )
    
-    async def process_nfo_for_series(self, serie_folder: str, serie_name: str, year: Optional[int] = None):
+    async def process_nfo_for_series(
+        self, 
+        serie_folder: str, 
+        serie_name: str, 
+        serie_key: str,
+        year: Optional[int] = None,
+        db=None
+    ):
        """Process NFO file for a series (create or update).
        
        Args:
            serie_folder: Series folder name
            serie_name: Series display name
+            serie_key: Series unique identifier for database updates
            year: Release year (helps with TMDB matching)
+            db: Optional database session for updating IDs
        """
        if not self.nfo_service:
            return
        
        try:
            folder_path = Path(self.anime_directory) / serie_folder
+            nfo_path = folder_path / "tvshow.nfo"
            nfo_exists = await self.nfo_service.check_nfo_exists(serie_folder)
            
+            # If NFO exists, parse IDs and update database
+            if nfo_exists and db:
+                logger.debug(f"Parsing IDs from existing NFO for '{serie_name}'")
+                ids = self.nfo_service.parse_nfo_ids(nfo_path)
+                
+                if ids["tmdb_id"] or ids["tvdb_id"]:
+                    # Update database with extracted IDs
+                    from datetime import datetime, timezone
+
+                    from sqlalchemy import select
+
+                    from src.server.database.models import AnimeSeries
+                    
+                    result = await db.execute(
+                        select(AnimeSeries).filter(AnimeSeries.key == serie_key)
+                    )
+                    series = result.scalars().first()
+                    
+                    if series:
+                        now = datetime.now(timezone.utc)
+                        series.has_nfo = True
+                        
+                        if series.nfo_created_at is None:
+                            series.nfo_created_at = now
+                        series.nfo_updated_at = now
+                        
+                        if ids["tmdb_id"] is not None:
+                            series.tmdb_id = ids["tmdb_id"]
+                            logger.debug(
+                                f"Updated TMDB ID for '{serie_name}': "
+                                f"{ids['tmdb_id']}"
+                            )
+                        
+                        if ids["tvdb_id"] is not None:
+                            series.tvdb_id = ids["tvdb_id"]
+                            logger.debug(
+                                f"Updated TVDB ID for '{serie_name}': "
+                                f"{ids['tvdb_id']}"
+                            )
+                        
+                        await db.commit()
+                        logger.info(
+                            f"Updated database with IDs from NFO for "
+                            f"'{serie_name}' - TMDB: {ids['tmdb_id']}, "
+                            f"TVDB: {ids['tvdb_id']}"
+                        )
+                    else:
+                        logger.warning(
+                            f"Series not found in database for NFO ID update: "
+                            f"{serie_key}"
+                        )
+            
+            # Create or update NFO file if configured
            if not nfo_exists and self.auto_create_nfo:
                logger.info(f"Creating NFO for '{serie_name}' ({serie_folder})")
                await self.nfo_service.create_tvshow_nfo(
@@ -156,9 +219,10 @@ class SeriesManagerService:
        
        This method:
        1. Uses SerieList to scan series folders
-        2. For each series without NFO (if auto_create=True), creates one
-        3. For each series with NFO (if update_on_scan=True), updates it
-        4. Runs operations concurrently for better performance
+        2. For each series with existing NFO, reads TMDB/TVDB IDs and updates database
+        3. For each series without NFO (if auto_create=True), creates one
+        4. For each series with NFO (if update_on_scan=True), updates it
+        5. Runs operations concurrently for better performance
        """
        if not self.nfo_service:
            logger.info("NFO service not enabled, skipping NFO processing")
@@ -173,30 +237,37 @@ class SeriesManagerService:
        
        logger.info(f"Processing NFO for {len(all_series)} series...")
        
-        # Create tasks for concurrent processing
-        tasks = []
-        for serie in all_series:
-            # Extract year from first air date if available
-            year = None
-            if hasattr(serie, 'year') and serie.year:
-                year = serie.year
+        # Import database session
+        from src.server.database.connection import get_db_session
+
+        # Create database session for ID updates
+        async with get_db_session() as db:
+            # Create tasks for concurrent processing
+            tasks = []
+            for serie in all_series:
+                # Extract year from first air date if available
+                year = None
+                if hasattr(serie, 'year') and serie.year:
+                    year = serie.year
+                
+                task = self.process_nfo_for_series(
+                    serie_folder=serie.folder,
+                    serie_name=serie.name,
+                    serie_key=serie.key,
+                    year=year,
+                    db=db
+                )
+                tasks.append(task)
            
-            task = self.process_nfo_for_series(
-                serie_folder=serie.folder,
-                serie_name=serie.name,
-                year=year
-            )
-            tasks.append(task)
-        
-        # Process in batches to avoid overwhelming TMDB API
-        batch_size = 5
-        for i in range(0, len(tasks), batch_size):
-            batch = tasks[i:i + batch_size]
-            await asyncio.gather(*batch, return_exceptions=True)
-            
-            # Small delay between batches to respect rate limits
-            if i + batch_size < len(tasks):
-                await asyncio.sleep(2)
+            # Process in batches to avoid overwhelming TMDB API
+            batch_size = 5
+            for i in range(0, len(tasks), batch_size):
+                batch = tasks[i:i + batch_size]
+                await asyncio.gather(*batch, return_exceptions=True)
+                
+                # Small delay between batches to respect rate limits
+                if i + batch_size < len(tasks):
+                    await asyncio.sleep(2)
        
        logger.info("NFO processing complete")
    
--- a/src/server/fastapi_app.py
+++ b/src/server/fastapi_app.py
@@ -341,8 +341,52 @@ async def lifespan(_application: FastAPI):
                await background_loader.start()
                logger.info("Background loader service started")
                
-                # Check for incomplete series and queue background loading
-                await _check_incomplete_series_on_startup(background_loader)
+                # Check if initial media scan has been completed
+                is_media_scan_done = False
+                try:
+                    async with get_db_session() as db:
+                        is_media_scan_done = (
+                            await SystemSettingsService
+                            .is_initial_media_scan_completed(db)
+                        )
+                except Exception as e:
+                    logger.warning(
+                        "Failed to check media scan status: %s, assuming not done",
+                        e
+                    )
+                    is_media_scan_done = False
+                
+                # Run media scan only on first run
+                if not is_media_scan_done:
+                    logger.info("Performing initial media scan...")
+                    try:
+                        # Check for incomplete series and queue background loading
+                        await _check_incomplete_series_on_startup(background_loader)
+                        logger.info("Initial media scan completed")
+                        
+                        # Mark media scan as completed
+                        try:
+                            async with get_db_session() as db:
+                                await (
+                                    SystemSettingsService
+                                    .mark_initial_media_scan_completed(db)
+                                )
+                            logger.info("Marked media scan as completed")
+                        except Exception as e:
+                            logger.warning(
+                                "Failed to mark media scan as completed: %s",
+                                e
+                            )
+                    except Exception as e:
+                        logger.error(
+                            "Failed to complete media scan: %s",
+                            e,
+                            exc_info=True
+                        )
+                else:
+                    logger.info(
+                        "Skipping media scan - already completed on previous run"
+                    )
            else:
                logger.info(
                    "Download service initialization skipped - "