Files
Aniworld/tests/performance/test_large_library.py
Lukas 5526ab884a refactor: restructure core→server, split large entity files into database module
- Move src/core/ → src/server/
- Split SerieList.py (531 lines) and series.py (414 lines) into src/server/database/
- Add database/models.py for SQLAlchemy models
- Update all test imports to reflect new structure
- Remove deprecated test files (test_serie_class.py, test_serie_folder_with_year.py)
2026-06-04 21:11:53 +02:00

426 lines
15 KiB
Python

"""Performance tests for large library scanning operations.
This module tests the performance characteristics of library scanning
with large numbers of series to ensure the application scales appropriately.
"""
import asyncio
import time
from pathlib import Path
from typing import List
from unittest.mock import AsyncMock, MagicMock, Mock, patch
import pytest
from src.server.database.models import AnimeSeries
from src.server.SeriesApp import SeriesApp
from src.server.SerieScanner import SerieScanner
def _mock_read_data(folder_name):
"""Create a mock AnimeSeries from a folder name for scanner patching."""
anime = Mock(spec=AnimeSeries)
anime.key = f"key_{folder_name}"
anime.name = f"Series {folder_name}"
anime.folder = folder_name
anime.year = 2024
anime.episodeDict = {}
return anime
def _scanner_patches(scanner):
"""Return context manager patches for scanner internals."""
from contextlib import contextmanager
@contextmanager
def ctx():
with patch.object(
scanner, '_SerieScanner__read_data_from_file',
side_effect=_mock_read_data
), patch.object(
scanner, '_SerieScanner__get_missing_episodes_and_season',
return_value=({}, "aniworld.to")
):
yield
return ctx()
class TestLargeLibraryScanning:
"""Test performance of library scanning with large numbers of series."""
@pytest.mark.asyncio
async def test_scan_1000_series_completes_under_time_limit(self, tmp_path):
"""Test that scanning 1000 series completes within acceptable time."""
max_scan_time_seconds = 300
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
num_series = 1000
for i in range(num_series):
series_folder = anime_dir / f"Series_{i:04d}"
series_folder.mkdir()
mock_loader = Mock()
scanner = SerieScanner(str(anime_dir), mock_loader)
with _scanner_patches(scanner):
start_time = time.time()
scanner.scan()
elapsed_time = time.time() - start_time
assert elapsed_time < max_scan_time_seconds, \
f"Scan took {elapsed_time:.2f}s, exceeds limit of {max_scan_time_seconds}s"
assert len(scanner.keyDict) == num_series
series_per_second = num_series / elapsed_time
print(f"\nPerformance: {series_per_second:.2f} series/second")
print(f"Total time: {elapsed_time:.2f}s for {num_series} series")
@pytest.mark.asyncio
async def test_scan_100_series_baseline_performance(self, tmp_path):
"""Establish baseline performance for scanning 100 series."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
num_series = 100
for i in range(num_series):
series_folder = anime_dir / f"Series_{i:03d}"
series_folder.mkdir()
mock_loader = Mock()
scanner = SerieScanner(str(anime_dir), mock_loader)
with _scanner_patches(scanner):
start_time = time.time()
scanner.scan()
elapsed_time = time.time() - start_time
assert len(scanner.keyDict) == num_series
assert elapsed_time < 30, f"Scan took {elapsed_time:.2f}s, too slow"
print(f"\nBaseline: {elapsed_time:.2f}s for {num_series} series")
print(f"Rate: {num_series / elapsed_time:.2f} series/second")
@pytest.mark.asyncio
async def test_scan_progress_callbacks_with_large_library(self, tmp_path):
"""Test that progress callbacks work efficiently with large library."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
num_series = 500
for i in range(num_series):
(anime_dir / f"Series_{i:03d}").mkdir()
mock_loader = Mock()
scanner = SerieScanner(str(anime_dir), mock_loader)
progress_calls = []
def progress_callback(data):
progress_calls.append(data)
scanner.subscribe_on_progress(progress_callback)
with _scanner_patches(scanner):
start_time = time.time()
scanner.scan()
elapsed_time = time.time() - start_time
assert len(progress_calls) > 0
assert len(progress_calls) <= num_series + 10 # Allow for start/complete events
assert elapsed_time < 60, \
f"Scan with callbacks took {elapsed_time:.2f}s, too slow"
print(f"\nWith callbacks: {len(progress_calls)} progress updates")
print(f"Scan time: {elapsed_time:.2f}s")
class TestDatabaseQueryPerformance:
"""Test database query performance during scans."""
@pytest.mark.asyncio
async def test_database_query_performance_1000_series(self):
"""Test database query performance with 1000 series."""
from src.server.database.service import AnimeSeriesService
num_series = 1000
mock_series = []
for i in range(num_series):
mock_serie = Mock()
mock_serie.id = i
mock_serie.key = f"series_key_{i:04d}"
mock_serie.name = f"Test Series {i}"
mock_serie.folder = f"Series_{i:04d}"
mock_series.append(mock_serie)
mock_db = AsyncMock()
with patch('src.server.database.connection.get_db_session') as mock_get_db, \
patch.object(AnimeSeriesService, 'get_all',
new_callable=AsyncMock, return_value=mock_series):
mock_get_db.return_value.__aenter__ = AsyncMock(return_value=mock_db)
mock_get_db.return_value.__aexit__ = AsyncMock(return_value=None)
start_time = time.time()
result = await AnimeSeriesService.get_all(mock_db, with_episodes=False)
elapsed_time = time.time() - start_time
assert elapsed_time < 5.0, \
f"Query took {elapsed_time:.2f}s, exceeds 5s limit"
assert len(result) == num_series
print(f"\nDB Query: {elapsed_time:.2f}s for {num_series} series")
@pytest.mark.asyncio
async def test_batch_database_writes_performance(self):
"""Test performance of batch database writes."""
num_series = 500
mock_db = AsyncMock()
create_mock = AsyncMock()
with patch('src.server.database.service.AnimeSeriesService.create',
side_effect=create_mock):
start_time = time.time()
for i in range(num_series):
await create_mock(
mock_db,
key=f"key_{i}",
name=f"Series {i}",
folder=f"Folder_{i}"
)
elapsed_time = time.time() - start_time
assert elapsed_time < 10.0, \
f"Batch writes took {elapsed_time:.2f}s, too slow"
writes_per_second = num_series / elapsed_time
print(f"\nDB Writes: {writes_per_second:.2f} writes/second")
print(f"Total: {elapsed_time:.2f}s for {num_series} series")
@pytest.mark.asyncio
async def test_concurrent_database_access_performance(self):
"""Test database performance with concurrent access."""
num_concurrent = 50
queries_per_task = 10
async def query_task(task_id: int):
mock_db = AsyncMock()
for i in range(queries_per_task):
await asyncio.sleep(0.01)
return f"op_{task_id}"
start_time = time.time()
results = await asyncio.gather(
*[query_task(i) for i in range(num_concurrent)]
)
elapsed_time = time.time() - start_time
total_queries = num_concurrent * queries_per_task
queries_per_second = total_queries / elapsed_time
assert len(results) == num_concurrent
assert elapsed_time < 30.0, \
f"Concurrent access took {elapsed_time:.2f}s, too slow"
print(f"\nConcurrent DB: {queries_per_second:.2f} queries/second")
print(f"Total: {total_queries} queries in {elapsed_time:.2f}s")
class TestMemoryUsageDuringScans:
"""Test memory usage characteristics during large scans."""
@pytest.mark.asyncio
async def test_memory_usage_stays_under_limit(self, tmp_path):
"""Test that memory usage stays below 500MB during large scan."""
import psutil
process = psutil.Process()
baseline_memory_mb = process.memory_info().rss / 1024 / 1024
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
num_series = 1000
for i in range(num_series):
(anime_dir / f"Series_{i:04d}").mkdir()
mock_loader = Mock()
scanner = SerieScanner(str(anime_dir), mock_loader)
with _scanner_patches(scanner):
scanner.scan()
current_memory_mb = process.memory_info().rss / 1024 / 1024
memory_increase_mb = current_memory_mb - baseline_memory_mb
assert memory_increase_mb < 500, \
f"Memory increased by {memory_increase_mb:.2f}MB, exceeds 500MB limit"
print(f"\nMemory: Baseline {baseline_memory_mb:.2f}MB")
print(f"After scan: {current_memory_mb:.2f}MB")
print(f"Increase: {memory_increase_mb:.2f}MB for {num_series} series")
@pytest.mark.asyncio
async def test_memory_efficient_series_storage(self):
"""Test that series are stored efficiently in memory."""
import sys
num_series = 1000
series_dict = {}
for i in range(num_series):
anime = Mock(spec=AnimeSeries)
anime.key = f"series_key_{i:04d}"
anime.name = f"Test Series {i}"
anime.folder = f"Series_{i:04d}"
anime.episodeDict = {}
series_dict[anime.key] = anime
dict_size = sys.getsizeof(series_dict)
avg_size_per_series = dict_size / num_series
assert avg_size_per_series < 10000, \
f"Average size per series {avg_size_per_series}bytes is too large"
print(f"\nSeries Storage: {dict_size} bytes for {num_series} series")
print(f"Average: {avg_size_per_series:.2f} bytes/series")
class TestConcurrentScanOperations:
"""Test handling of concurrent scan operations."""
@pytest.mark.asyncio
async def test_concurrent_scan_prevention(self):
"""Test that only one scan can run at a time."""
# Use a simple mock service with a scan lock instead of requiring
# the full AnimeService dependency chain.
service = MagicMock()
service._scan_lock = asyncio.Lock()
async def long_running_scan():
async with service._scan_lock:
await asyncio.sleep(0.5)
task1 = asyncio.create_task(long_running_scan())
await asyncio.sleep(0.1)
task2 = asyncio.create_task(long_running_scan())
await task1
await task2
assert task1.done()
assert task2.done()
@pytest.mark.asyncio
async def test_scan_handles_concurrent_database_access(self):
"""Test that scans handle concurrent database access properly."""
num_concurrent_operations = 20
async def database_operation(operation_id: int):
mock_db = AsyncMock()
await asyncio.sleep(0.05)
return f"op_{operation_id}"
start_time = time.time()
results = await asyncio.gather(
*[database_operation(i) for i in range(num_concurrent_operations)]
)
elapsed_time = time.time() - start_time
assert len(results) == num_concurrent_operations
assert elapsed_time < 5.0, \
f"Concurrent operations took {elapsed_time:.2f}s, too slow"
print(f"\nConcurrent ops: {len(results)} operations in {elapsed_time:.2f}s")
class TestLargeScanScalability:
"""Test scalability characteristics with increasing library sizes."""
@pytest.mark.asyncio
async def test_scan_time_scales_linearly(self, tmp_path):
"""Test that scan time scales approximately linearly with library size."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
mock_loader = Mock()
scan_times = []
library_sizes = [100, 200, 400, 800]
for size in library_sizes:
for i in range(size):
(anime_dir / f"Size{size}_Series_{i:04d}").mkdir()
scanner = SerieScanner(str(anime_dir), mock_loader)
with _scanner_patches(scanner):
start_time = time.time()
scanner.scan()
elapsed_time = time.time() - start_time
scan_times.append(elapsed_time)
for folder in anime_dir.iterdir():
if folder.name.startswith(f"Size{size}_"):
folder.rmdir()
for i in range(len(scan_times) - 1):
ratio = scan_times[i + 1] / max(scan_times[i], 0.001)
size_ratio = library_sizes[i + 1] / library_sizes[i]
assert ratio < size_ratio * 3, \
f"Scaling is worse than linear: {ratio:.2f}x time for {size_ratio}x size"
print("\nScalability test:")
for size, time_taken in zip(library_sizes, scan_times):
rate = size / max(time_taken, 0.001)
print(f" {size} series: {time_taken:.2f}s ({rate:.2f} series/sec)")
@pytest.mark.asyncio
async def test_memory_scales_acceptably_with_size(self, tmp_path):
"""Test that memory usage scales acceptably with library size."""
import psutil
process = psutil.Process()
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
mock_loader = Mock()
library_sizes = [100, 500, 1000]
memory_usage = []
for size in library_sizes:
for i in range(size):
(anime_dir / f"Size{size}_S{i:04d}").mkdir()
baseline = process.memory_info().rss / 1024 / 1024
scanner = SerieScanner(str(anime_dir), mock_loader)
with _scanner_patches(scanner):
scanner.scan()
current = process.memory_info().rss / 1024 / 1024
memory_increase = current - baseline
memory_usage.append(memory_increase)
for folder in anime_dir.iterdir():
if folder.name.startswith(f"Size{size}_"):
folder.rmdir()
for i in range(len(memory_usage) - 1):
# Use a floor of 1MB to avoid near-zero division
ratio = max(memory_usage[i + 1], 1.0) / max(memory_usage[i], 1.0)
size_ratio = library_sizes[i + 1] / library_sizes[i]
assert ratio <= size_ratio * 5, \
f"Memory scaling is too aggressive: {ratio:.2f}x for {size_ratio}x size"
print("\nMemory scaling:")
for size, mem in zip(library_sizes, memory_usage):
per_series = (mem / size) * 1024 if size > 0 else 0
print(f" {size} series: {mem:.2f}MB ({per_series:.2f}KB/series)")