"""Performance tests for large library scanning operations. This module tests the performance characteristics of library scanning with large numbers of series to ensure the application scales appropriately. """ import asyncio import time from pathlib import Path from typing import List from unittest.mock import AsyncMock, MagicMock, Mock, patch import pytest from src.core.entities.series import Serie from src.core.SeriesApp import SeriesApp from src.core.SerieScanner import SerieScanner def _mock_read_data(folder_name): """Create a mock Serie from a folder name for scanner patching.""" serie = Mock(spec=Serie) serie.key = f"key_{folder_name}" serie.name = f"Series {folder_name}" serie.folder = folder_name serie.year = 2024 serie.episodeDict = {} return serie def _scanner_patches(scanner): """Return context manager patches for scanner internals.""" from contextlib import contextmanager @contextmanager def ctx(): with patch.object( scanner, '_SerieScanner__read_data_from_file', side_effect=_mock_read_data ), patch.object( scanner, '_SerieScanner__get_missing_episodes_and_season', return_value=({}, "aniworld.to") ): yield return ctx() class TestLargeLibraryScanning: """Test performance of library scanning with large numbers of series.""" @pytest.mark.asyncio async def test_scan_1000_series_completes_under_time_limit(self, tmp_path): """Test that scanning 1000 series completes within acceptable time.""" max_scan_time_seconds = 300 anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 1000 for i in range(num_series): series_folder = anime_dir / f"Series_{i:04d}" series_folder.mkdir() mock_loader = Mock() scanner = SerieScanner(str(anime_dir), mock_loader) with _scanner_patches(scanner): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time assert elapsed_time < max_scan_time_seconds, \ f"Scan took {elapsed_time:.2f}s, exceeds limit of {max_scan_time_seconds}s" assert len(scanner.keyDict) == num_series series_per_second = num_series / elapsed_time print(f"\nPerformance: {series_per_second:.2f} series/second") print(f"Total time: {elapsed_time:.2f}s for {num_series} series") @pytest.mark.asyncio async def test_scan_100_series_baseline_performance(self, tmp_path): """Establish baseline performance for scanning 100 series.""" anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 100 for i in range(num_series): series_folder = anime_dir / f"Series_{i:03d}" series_folder.mkdir() mock_loader = Mock() scanner = SerieScanner(str(anime_dir), mock_loader) with _scanner_patches(scanner): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time assert len(scanner.keyDict) == num_series assert elapsed_time < 30, f"Scan took {elapsed_time:.2f}s, too slow" print(f"\nBaseline: {elapsed_time:.2f}s for {num_series} series") print(f"Rate: {num_series / elapsed_time:.2f} series/second") @pytest.mark.asyncio async def test_scan_progress_callbacks_with_large_library(self, tmp_path): """Test that progress callbacks work efficiently with large library.""" anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 500 for i in range(num_series): (anime_dir / f"Series_{i:03d}").mkdir() mock_loader = Mock() scanner = SerieScanner(str(anime_dir), mock_loader) progress_calls = [] def progress_callback(data): progress_calls.append(data) scanner.subscribe_on_progress(progress_callback) with _scanner_patches(scanner): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time assert len(progress_calls) > 0 assert len(progress_calls) <= num_series + 10 # Allow for start/complete events assert elapsed_time < 60, \ f"Scan with callbacks took {elapsed_time:.2f}s, too slow" print(f"\nWith callbacks: {len(progress_calls)} progress updates") print(f"Scan time: {elapsed_time:.2f}s") class TestDatabaseQueryPerformance: """Test database query performance during scans.""" @pytest.mark.asyncio async def test_database_query_performance_1000_series(self): """Test database query performance with 1000 series.""" from src.server.database.service import AnimeSeriesService num_series = 1000 mock_series = [] for i in range(num_series): mock_serie = Mock() mock_serie.id = i mock_serie.key = f"series_key_{i:04d}" mock_serie.name = f"Test Series {i}" mock_serie.folder = f"Series_{i:04d}" mock_series.append(mock_serie) mock_db = AsyncMock() with patch('src.server.database.connection.get_db_session') as mock_get_db, \ patch.object(AnimeSeriesService, 'get_all', new_callable=AsyncMock, return_value=mock_series): mock_get_db.return_value.__aenter__ = AsyncMock(return_value=mock_db) mock_get_db.return_value.__aexit__ = AsyncMock(return_value=None) start_time = time.time() result = await AnimeSeriesService.get_all(mock_db, with_episodes=False) elapsed_time = time.time() - start_time assert elapsed_time < 5.0, \ f"Query took {elapsed_time:.2f}s, exceeds 5s limit" assert len(result) == num_series print(f"\nDB Query: {elapsed_time:.2f}s for {num_series} series") @pytest.mark.asyncio async def test_batch_database_writes_performance(self): """Test performance of batch database writes.""" num_series = 500 mock_db = AsyncMock() create_mock = AsyncMock() with patch('src.server.database.service.AnimeSeriesService.create', side_effect=create_mock): start_time = time.time() for i in range(num_series): await create_mock( mock_db, key=f"key_{i}", name=f"Series {i}", folder=f"Folder_{i}" ) elapsed_time = time.time() - start_time assert elapsed_time < 10.0, \ f"Batch writes took {elapsed_time:.2f}s, too slow" writes_per_second = num_series / elapsed_time print(f"\nDB Writes: {writes_per_second:.2f} writes/second") print(f"Total: {elapsed_time:.2f}s for {num_series} series") @pytest.mark.asyncio async def test_concurrent_database_access_performance(self): """Test database performance with concurrent access.""" num_concurrent = 50 queries_per_task = 10 async def query_task(task_id: int): mock_db = AsyncMock() for i in range(queries_per_task): await asyncio.sleep(0.01) return f"op_{task_id}" start_time = time.time() results = await asyncio.gather( *[query_task(i) for i in range(num_concurrent)] ) elapsed_time = time.time() - start_time total_queries = num_concurrent * queries_per_task queries_per_second = total_queries / elapsed_time assert len(results) == num_concurrent assert elapsed_time < 30.0, \ f"Concurrent access took {elapsed_time:.2f}s, too slow" print(f"\nConcurrent DB: {queries_per_second:.2f} queries/second") print(f"Total: {total_queries} queries in {elapsed_time:.2f}s") class TestMemoryUsageDuringScans: """Test memory usage characteristics during large scans.""" @pytest.mark.asyncio async def test_memory_usage_stays_under_limit(self, tmp_path): """Test that memory usage stays below 500MB during large scan.""" import psutil process = psutil.Process() baseline_memory_mb = process.memory_info().rss / 1024 / 1024 anime_dir = tmp_path / "anime" anime_dir.mkdir() num_series = 1000 for i in range(num_series): (anime_dir / f"Series_{i:04d}").mkdir() mock_loader = Mock() scanner = SerieScanner(str(anime_dir), mock_loader) with _scanner_patches(scanner): scanner.scan() current_memory_mb = process.memory_info().rss / 1024 / 1024 memory_increase_mb = current_memory_mb - baseline_memory_mb assert memory_increase_mb < 500, \ f"Memory increased by {memory_increase_mb:.2f}MB, exceeds 500MB limit" print(f"\nMemory: Baseline {baseline_memory_mb:.2f}MB") print(f"After scan: {current_memory_mb:.2f}MB") print(f"Increase: {memory_increase_mb:.2f}MB for {num_series} series") @pytest.mark.asyncio async def test_memory_efficient_series_storage(self): """Test that series are stored efficiently in memory.""" import sys num_series = 1000 series_dict = {} for i in range(num_series): serie = Mock(spec=Serie) serie.key = f"series_key_{i:04d}" serie.name = f"Test Series {i}" serie.folder = f"Series_{i:04d}" serie.episodeDict = {} series_dict[serie.key] = serie dict_size = sys.getsizeof(series_dict) avg_size_per_series = dict_size / num_series assert avg_size_per_series < 10000, \ f"Average size per series {avg_size_per_series}bytes is too large" print(f"\nSeries Storage: {dict_size} bytes for {num_series} series") print(f"Average: {avg_size_per_series:.2f} bytes/series") class TestConcurrentScanOperations: """Test handling of concurrent scan operations.""" @pytest.mark.asyncio async def test_concurrent_scan_prevention(self): """Test that only one scan can run at a time.""" # Use a simple mock service with a scan lock instead of requiring # the full AnimeService dependency chain. service = MagicMock() service._scan_lock = asyncio.Lock() async def long_running_scan(): async with service._scan_lock: await asyncio.sleep(0.5) task1 = asyncio.create_task(long_running_scan()) await asyncio.sleep(0.1) task2 = asyncio.create_task(long_running_scan()) await task1 await task2 assert task1.done() assert task2.done() @pytest.mark.asyncio async def test_scan_handles_concurrent_database_access(self): """Test that scans handle concurrent database access properly.""" num_concurrent_operations = 20 async def database_operation(operation_id: int): mock_db = AsyncMock() await asyncio.sleep(0.05) return f"op_{operation_id}" start_time = time.time() results = await asyncio.gather( *[database_operation(i) for i in range(num_concurrent_operations)] ) elapsed_time = time.time() - start_time assert len(results) == num_concurrent_operations assert elapsed_time < 5.0, \ f"Concurrent operations took {elapsed_time:.2f}s, too slow" print(f"\nConcurrent ops: {len(results)} operations in {elapsed_time:.2f}s") class TestLargeScanScalability: """Test scalability characteristics with increasing library sizes.""" @pytest.mark.asyncio async def test_scan_time_scales_linearly(self, tmp_path): """Test that scan time scales approximately linearly with library size.""" anime_dir = tmp_path / "anime" anime_dir.mkdir() mock_loader = Mock() scan_times = [] library_sizes = [100, 200, 400, 800] for size in library_sizes: for i in range(size): (anime_dir / f"Size{size}_Series_{i:04d}").mkdir() scanner = SerieScanner(str(anime_dir), mock_loader) with _scanner_patches(scanner): start_time = time.time() scanner.scan() elapsed_time = time.time() - start_time scan_times.append(elapsed_time) for folder in anime_dir.iterdir(): if folder.name.startswith(f"Size{size}_"): folder.rmdir() for i in range(len(scan_times) - 1): ratio = scan_times[i + 1] / max(scan_times[i], 0.001) size_ratio = library_sizes[i + 1] / library_sizes[i] assert ratio < size_ratio * 3, \ f"Scaling is worse than linear: {ratio:.2f}x time for {size_ratio}x size" print("\nScalability test:") for size, time_taken in zip(library_sizes, scan_times): rate = size / max(time_taken, 0.001) print(f" {size} series: {time_taken:.2f}s ({rate:.2f} series/sec)") @pytest.mark.asyncio async def test_memory_scales_acceptably_with_size(self, tmp_path): """Test that memory usage scales acceptably with library size.""" import psutil process = psutil.Process() anime_dir = tmp_path / "anime" anime_dir.mkdir() mock_loader = Mock() library_sizes = [100, 500, 1000] memory_usage = [] for size in library_sizes: for i in range(size): (anime_dir / f"Size{size}_S{i:04d}").mkdir() baseline = process.memory_info().rss / 1024 / 1024 scanner = SerieScanner(str(anime_dir), mock_loader) with _scanner_patches(scanner): scanner.scan() current = process.memory_info().rss / 1024 / 1024 memory_increase = current - baseline memory_usage.append(memory_increase) for folder in anime_dir.iterdir(): if folder.name.startswith(f"Size{size}_"): folder.rmdir() for i in range(len(memory_usage) - 1): # Use a floor of 1MB to avoid near-zero division ratio = max(memory_usage[i + 1], 1.0) / max(memory_usage[i], 1.0) size_ratio = library_sizes[i + 1] / library_sizes[i] assert ratio <= size_ratio * 5, \ f"Memory scaling is too aggressive: {ratio:.2f}x for {size_ratio}x size" print("\nMemory scaling:") for size, mem in zip(library_sizes, memory_usage): per_series = (mem / size) * 1024 if size > 0 else 0 print(f" {size} series: {mem:.2f}MB ({per_series:.2f}KB/series)")