"""Performance tests for large library scanning operations. This module tests the performance characteristics of library scanning with large numbers of series to ensure the application scales appropriately. """ import asyncio import time from pathlib import Path from typing import List from unittest.mock import AsyncMock, MagicMock, Mock, patch import pytest from src.core.entities.series import Serie from src.core.SeriesApp import SeriesApp from src.core.SerieScanner import SerieScanner class TestLargeLibraryScanning: """Test performance of library scanning with large numbers of series.""" @pytest.mark.asyncio async def test_scan_1000_series_completes_under_time_limit(self, tmp_path): """Test that scanning 1000 series completes within acceptable time.""" # Target: < 5 minutes for 1000 series max_scan_time_seconds = 300 # Create mock directory structure anime_dir = tmp_path / "anime" anime_dir.mkdir() # Create 1000 mock series folders num_series = 1000 for i in range(num_series): series_folder = anime_dir / f"Series_{i:04d}" series_folder.mkdir() # Create minimal data file (series_folder / "data.json").write_text("{}") # Create mock loader mock_loader = Mock() mock_loader.GetKey.return_value = "test_key" # Create scanner scanner = SerieScanner(str(anime_dir), mock_loader) # Mock _SerieClass to return Serie objects quickly def mock_serie_class(folder, **kwargs): serie = Mock(spec=Serie) serie.key = f"key_{folder}" serie.name = f"Series {folder}" serie.folder = folder serie.episodeDict = {} return serie with patch.object(scanner, '_SerieClass', side_effect=mock_serie_class): start_time = time.time() # Run scan scanner.scan() elapsed_time = time.time() - start_time # Verify results assert elapsed_time < max_scan_time_seconds, \ f"Scan took {elapsed_time:.2f}s, exceeds limit of {max_scan_time_seconds}s" assert len(scanner.keyDict) == num_series # Performance metrics series_per_second = num_series / elapsed_time print(f"\nPerformance: {series_per_second:.2f} series/second") print(f"Total time: {elapsed_time:.2f}s for {num_series} series") @pytest.mark.asyncio async def test_scan_100_series_baseline_performance(self, tmp_path): """Establish baseline performance for scanning 100 series.""" anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 100 for i in range(num_series): series_folder = anime_dir / f"Series_{i:03d}" series_folder.mkdir() (series_folder / "data.json").write_text("{}") mock_loader = Mock() mock_loader.GetKey.return_value = "test_key" scanner = SerieScanner(str(anime_dir), mock_loader) def mock_serie_class(folder, **kwargs): serie = Mock(spec=Serie) serie.key = f"key_{folder}" serie.name = f"Series {folder}" serie.folder = folder serie.episodeDict = {} return serie with patch.object(scanner, '_SerieClass', side_effect=mock_serie_class): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time assert len(scanner.keyDict) == num_series # Should be very fast for 100 series assert elapsed_time < 30, f"Scan took {elapsed_time:.2f}s, too slow" print(f"\nBaseline: {elapsed_time:.2f}s for {num_series} series") print(f"Rate: {num_series / elapsed_time:.2f} series/second") @pytest.mark.asyncio async def test_scan_progress_callbacks_with_large_library(self, tmp_path): """Test that progress callbacks work efficiently with large library.""" anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 500 for i in range(num_series): (anime_dir / f"Series_{i:03d}").mkdir() mock_loader = Mock() mock_loader.GetKey.return_value = "test_key" scanner = SerieScanner(str(anime_dir), mock_loader) # Track progress callback invocations progress_calls = [] def progress_callback(data): progress_calls.append(data) scanner.subscribe_on_progress(progress_callback) def mock_serie_class(folder, **kwargs): serie = Mock(spec=Serie) serie.key = f"key_{folder}" serie.name = folder serie.folder = folder serie.episodeDict = {} return serie with patch.object(scanner, '_SerieClass', side_effect=mock_serie_class): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time # Verify progress callbacks were called assert len(progress_calls) > 0 assert len(progress_calls) <= num_series # Should have reasonable update frequency # Progress callbacks shouldn't significantly impact performance assert elapsed_time < 60, \ f"Scan with callbacks took {elapsed_time:.2f}s, too slow" print(f"\nWith callbacks: {len(progress_calls)} progress updates") print(f"Scan time: {elapsed_time:.2f}s") class TestDatabaseQueryPerformance: """Test database query performance during scans.""" @pytest.mark.asyncio async def test_database_query_performance_1000_series(self): """Test database query performance with 1000 series.""" from src.server.database.connection import get_db_session from src.server.database.service import AnimeSeriesService # Create mock series data num_series = 1000 mock_series = [] for i in range(num_series): mock_serie = Mock() mock_serie.id = i mock_serie.key = f"series_key_{i:04d}" mock_serie.name = f"Test Series {i}" mock_serie.folder = f"Series_{i:04d}" mock_series.append(mock_serie) # Mock database session mock_db = AsyncMock() with patch('src.server.database.service.AnimeSeriesService.get_all', return_value=mock_series): start_time = time.time() async with get_db_session() as db: result = await AnimeSeriesService.get_all(db, with_episodes=False) elapsed_time = time.time() - start_time # Database query should be fast assert elapsed_time < 5.0, \ f"Query took {elapsed_time:.2f}s, exceeds 5s limit" assert len(result) == num_series print(f"\nDB Query: {elapsed_time:.2f}s for {num_series} series") @pytest.mark.asyncio async def test_batch_database_writes_performance(self): """Test performance of batch database writes.""" from src.server.database.connection import get_db_session from src.server.database.service import AnimeSeriesService num_series = 500 # Mock database operations mock_db = AsyncMock() create_mock = AsyncMock() with patch('src.server.database.service.AnimeSeriesService.create', side_effect=create_mock): start_time = time.time() # Simulate batch creation for i in range(num_series): await create_mock( mock_db, key=f"key_{i}", name=f"Series {i}", folder=f"Folder_{i}" ) elapsed_time = time.time() - start_time # Batch writes should be reasonably fast assert elapsed_time < 10.0, \ f"Batch writes took {elapsed_time:.2f}s, too slow" writes_per_second = num_series / elapsed_time print(f"\nDB Writes: {writes_per_second:.2f} writes/second") print(f"Total: {elapsed_time:.2f}s for {num_series} series") @pytest.mark.asyncio async def test_concurrent_database_access_performance(self): """Test database performance with concurrent access.""" from src.server.database.connection import get_db_session from src.server.database.service import AnimeSeriesService num_concurrent = 50 queries_per_task = 10 async def query_task(task_id: int): """Simulate concurrent database queries.""" mock_db = AsyncMock() for i in range(queries_per_task): # Simulate query with small delay await asyncio.sleep(0.01) start_time = time.time() # Run concurrent tasks tasks = [query_task(i) for i in range(num_concurrent)] await asyncio.gather(*tasks) elapsed_time = time.time() - start_time total_queries = num_concurrent * queries_per_task queries_per_second = total_queries / elapsed_time # Should handle concurrent access efficiently assert elapsed_time < 30.0, \ f"Concurrent access took {elapsed_time:.2f}s, too slow" print(f"\nConcurrent DB: {queries_per_second:.2f} queries/second") print(f"Total: {total_queries} queries in {elapsed_time:.2f}s") class TestMemoryUsageDuringScans: """Test memory usage characteristics during large scans.""" @pytest.mark.asyncio async def test_memory_usage_stays_under_limit(self, tmp_path): """Test that memory usage stays below 500MB during large scan.""" import psutil process = psutil.Process() # Get baseline memory baseline_memory_mb = process.memory_info().rss / 1024 / 1024 anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 1000 for i in range(num_series): (anime_dir / f"Series_{i:04d}").mkdir() mock_loader = Mock() mock_loader.GetKey.return_value = "test_key" scanner = SerieScanner(str(anime_dir), mock_loader) def mock_serie_class(folder, **kwargs): serie = Mock(spec=Serie) serie.key = f"key_{folder}" serie.name = folder serie.folder = folder serie.episodeDict = {} return serie with patch.object(scanner, '_SerieClass', side_effect=mock_serie_class): scanner.scan() # Check memory after scan current_memory_mb = process.memory_info().rss / 1024 / 1024 memory_increase_mb = current_memory_mb - baseline_memory_mb # Memory increase should be under 500MB assert memory_increase_mb < 500, \ f"Memory increased by {memory_increase_mb:.2f}MB, exceeds 500MB limit" print(f"\nMemory: Baseline {baseline_memory_mb:.2f}MB") print(f"After scan: {current_memory_mb:.2f}MB") print(f"Increase: {memory_increase_mb:.2f}MB for {num_series} series") @pytest.mark.asyncio async def test_memory_efficient_series_storage(self): """Test that series are stored efficiently in memory.""" import sys # Create mock series objects num_series = 1000 series_dict = {} for i in range(num_series): serie = Mock(spec=Serie) serie.key = f"series_key_{i:04d}" serie.name = f"Test Series {i}" serie.folder = f"Series_{i:04d}" serie.episodeDict = {} series_dict[serie.key] = serie # Calculate approximate size dict_size = sys.getsizeof(series_dict) avg_size_per_series = dict_size / num_series # Each series should be reasonably small in memory assert avg_size_per_series < 10000, \ f"Average size per series {avg_size_per_series}bytes is too large" print(f"\nSeries Storage: {dict_size} bytes for {num_series} series") print(f"Average: {avg_size_per_series:.2f} bytes/series") class TestConcurrentScanOperations: """Test handling of concurrent scan operations.""" @pytest.mark.asyncio async def test_concurrent_scan_prevention(self): """Test that only one scan can run at a time.""" from src.server.services.anime_service import AnimeService, get_anime_service from src.server.services.scan_service import ScanServiceError # Get service service = get_anime_service() # Mock the scan lock service._scan_lock = asyncio.Lock() async def long_running_scan(): """Simulate a long-running scan.""" async with service._scan_lock: await asyncio.sleep(0.5) # Start first scan task1 = asyncio.create_task(long_running_scan()) # Wait a bit to ensure first scan has lock await asyncio.sleep(0.1) # Try to start second scan - should be blocked task2 = asyncio.create_task(long_running_scan()) # First task should finish await task1 # Second task should complete after first await task2 # Both should complete without error assert task1.done() assert task2.done() @pytest.mark.asyncio async def test_scan_handles_concurrent_database_access(self): """Test that scans handle concurrent database access properly.""" from src.server.database.connection import get_db_session from src.server.database.service import AnimeSeriesService num_concurrent_operations = 20 async def database_operation(operation_id: int): """Simulate concurrent database operation.""" mock_db = AsyncMock() # Simulate query await asyncio.sleep(0.05) return f"op_{operation_id}" start_time = time.time() # Run operations concurrently results = await asyncio.gather( *[database_operation(i) for i in range(num_concurrent_operations)] ) elapsed_time = time.time() - start_time # All operations should complete assert len(results) == num_concurrent_operations # Should complete reasonably fast with concurrency assert elapsed_time < 5.0, \ f"Concurrent operations took {elapsed_time:.2f}s, too slow" print(f"\nConcurrent ops: {len(results)} operations in {elapsed_time:.2f}s") class TestLargeScanScalability: """Test scalability characteristics with increasing library sizes.""" @pytest.mark.asyncio async def test_scan_time_scales_linearly(self, tmp_path): """Test that scan time scales approximately linearly with library size.""" anime_dir = tmp_path / "anime" anime_dir.mkdir() mock_loader = Mock() mock_loader.GetKey.return_value = "test_key" def mock_serie_class(folder, **kwargs): serie = Mock(spec=Serie) serie.key = f"key_{folder}" serie.name = folder serie.folder = folder serie.episodeDict = {} return serie scan_times = [] library_sizes = [100, 200, 400, 800] for size in library_sizes: # Create series folders for i in range(size): (anime_dir / f"Size{size}_Series_{i:04d}").mkdir() scanner = SerieScanner(str(anime_dir), mock_loader) with patch.object(scanner, '_SerieClass', side_effect=mock_serie_class): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time scan_times.append(elapsed_time) # Clean up for next iteration for folder in anime_dir.iterdir(): if folder.name.startswith(f"Size{size}_"): folder.rmdir() # Calculate scaling factor # Time should roughly double when size doubles for i in range(len(scan_times) - 1): ratio = scan_times[i + 1] / scan_times[i] size_ratio = library_sizes[i + 1] / library_sizes[i] # Allow for some variance (ratio should be between 1.5x and 3x size ratio) assert ratio < size_ratio * 3, \ f"Scaling is worse than linear: {ratio:.2f}x time for {size_ratio}x size" print("\nScalability test:") for size, time_taken in zip(library_sizes, scan_times): print(f" {size} series: {time_taken:.2f}s ({size/time_taken:.2f} series/sec)") @pytest.mark.asyncio async def test_memory_scales_acceptably_with_size(self, tmp_path): """Test that memory usage scales acceptably with library size.""" import psutil process = psutil.Process() anime_dir = tmp_path / "anime" anime_dir.mkdir() mock_loader = Mock() mock_loader.GetKey.return_value = "test_key" def mock_serie_class(folder, **kwargs): serie = Mock(spec=Serie) serie.key = f"key_{folder}" serie.name = folder serie.folder = folder serie.episodeDict = {} return serie library_sizes = [100, 500, 1000] memory_usage = [] for size in library_sizes: # Create folders for i in range(size): (anime_dir / f"Size{size}_S{i:04d}").mkdir() baseline = process.memory_info().rss / 1024 / 1024 scanner = SerieScanner(str(anime_dir), mock_loader) with patch.object(scanner, '_SerieClass', side_effect=mock_serie_class): scanner.scan() current = process.memory_info().rss / 1024 / 1024 memory_increase = current - baseline memory_usage.append(memory_increase) # Cleanup for folder in anime_dir.iterdir(): if folder.name.startswith(f"Size{size}_"): folder.rmdir() # Memory should scale reasonably (not exponentially) for i in range(len(memory_usage) - 1): ratio = memory_usage[i + 1] / memory_usage[i] if memory_usage[i] > 0 else 1 size_ratio = library_sizes[i + 1] / library_sizes[i] # Memory growth should be proportional or less assert ratio <= size_ratio * 2, \ f"Memory scaling is too aggressive: {ratio:.2f}x for {size_ratio}x size" print("\nMemory scaling:") for size, mem in zip(library_sizes, memory_usage): per_series = (mem / size) * 1024 if size > 0 else 0 # Convert to KB print(f" {size} series: {mem:.2f}MB ({per_series:.2f}KB/series)")