Aniworld/tests/integration/test_data_migration.py
Lukas 17754a86f0 Add database migration from legacy data files
- Create DataMigrationService for migrating data files to SQLite
- Add sync database methods to AnimeSeriesService
- Update SerieScanner to save to database with file fallback
- Update anime API endpoints to use database with fallback
- Add delete endpoint for anime series
- Add automatic migration on startup in fastapi_app.py lifespan
- Add 28 unit tests for migration service
- Add 14 integration tests for migration flow
- Update infrastructure.md and database README docs

Migration runs automatically on startup, legacy data files preserved.
2025-12-01 17:42:09 +01:00

472 lines
15 KiB
Python

"""Integration tests for data migration from file-based to database storage.
This module tests the complete migration flow including:
- Migration of legacy data files to database
- API endpoints working with database backend
- Data integrity during migration
"""
import json
import os
import pytest
from httpx import ASGITransport, AsyncClient
from src.server.database.service import AnimeSeriesService
from src.server.fastapi_app import app
from src.server.services.auth_service import auth_service
from src.server.services.data_migration_service import (
DataMigrationService,
MigrationResult,
)
@pytest.fixture
def temp_anime_dir(tmp_path):
"""Create a temporary anime directory with test data files."""
anime_dir = tmp_path / "anime"
anime_dir.mkdir()
# Create multiple anime series directories with data files
series_data = [
{
"key": "test-anime-1",
"name": "Test Anime 1",
"site": "aniworld.to",
"folder": "Test Anime 1 (2020)",
"episodeDict": {
"1": [1, 2, 3]
}
},
{
"key": "test-anime-2",
"name": "Test Anime 2",
"site": "aniworld.to",
"folder": "Test Anime 2 (2021)",
"episodeDict": {
"1": [1],
"2": [1, 2]
}
},
{
"key": "test-anime-3",
"name": "Test Anime 3",
"site": "aniworld.to",
"folder": "Test Anime 3 (2022)",
"episodeDict": {}
}
]
for data in series_data:
series_dir = anime_dir / data["folder"]
series_dir.mkdir()
data_file = series_dir / "data"
data_file.write_text(json.dumps(data))
return anime_dir
@pytest.fixture
def temp_db_path(tmp_path):
"""Create a temporary database path."""
return tmp_path / "test_aniworld.db"
@pytest.fixture
async def test_db_session(temp_db_path):
"""Create an async database session with a temporary database."""
from sqlalchemy.ext.asyncio import (
async_sessionmaker,
create_async_engine,
)
from src.server.database.base import Base
# Create test database
test_db_url = f"sqlite+aiosqlite:///{temp_db_path}"
engine = create_async_engine(test_db_url, echo=False)
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async_session = async_sessionmaker(engine, expire_on_commit=False)
async with async_session() as session:
yield session
await engine.dispose()
class TestDataMigrationIntegration:
"""Integration tests for the complete data migration flow."""
@pytest.mark.asyncio
async def test_full_migration_flow(
self, temp_anime_dir, test_db_session
):
"""Test complete migration from data files to database."""
# Setup: Verify data files exist
data_files = list(temp_anime_dir.glob("*/data"))
assert len(data_files) == 3, "Should have 3 data files"
# Create migration service
migration_service = DataMigrationService()
# Check for legacy data files
files = await migration_service.check_for_legacy_data_files(
str(temp_anime_dir)
)
assert len(files) == 3, "Should find 3 legacy data files"
# Run full migration
result = await migration_service.migrate_all_legacy_data(
str(temp_anime_dir), test_db_session
)
# Verify results
assert result.total_found == 3
assert result.migrated == 3
assert result.failed == 0
assert result.skipped == 0
assert len(result.errors) == 0
# Verify all entries in database
all_series = await AnimeSeriesService.get_all(test_db_session)
assert len(all_series) == 3, "Should have 3 series in database"
# Verify series keys
keys_in_db = {s.key for s in all_series}
expected_keys = {"test-anime-1", "test-anime-2", "test-anime-3"}
assert keys_in_db == expected_keys, \
"All series keys should be migrated"
@pytest.mark.asyncio
async def test_migration_preserves_data(
self, temp_anime_dir, test_db_session
):
"""Test that migration preserves all series data."""
migration_service = DataMigrationService()
# Run migration
await migration_service.migrate_all_legacy_data(
str(temp_anime_dir), test_db_session
)
# Verify specific series data
series = await AnimeSeriesService.get_by_key(
test_db_session, "test-anime-1"
)
assert series is not None
assert series.name == "Test Anime 1"
assert series.site == "aniworld.to"
assert series.folder == "Test Anime 1 (2020)"
assert series.episode_dict == {"1": [1, 2, 3]}
# Verify series with multiple seasons
series2 = await AnimeSeriesService.get_by_key(
test_db_session, "test-anime-2"
)
assert series2 is not None
assert series2.episode_dict == {"1": [1], "2": [1, 2]}
@pytest.mark.asyncio
async def test_migration_idempotent(
self, temp_anime_dir, test_db_session
):
"""Test that re-running migration doesn't create duplicates."""
migration_service = DataMigrationService()
# Run migration twice
result1 = await migration_service.migrate_all_legacy_data(
str(temp_anime_dir), test_db_session
)
result2 = await migration_service.migrate_all_legacy_data(
str(temp_anime_dir), test_db_session
)
# First run should migrate all
assert result1.migrated == 3
assert result1.skipped == 0
# Second run should skip all (already in DB)
assert result2.migrated == 0
assert result2.skipped == 3
# Database should only have 3 entries (not 6)
all_series = await AnimeSeriesService.get_all(test_db_session)
assert len(all_series) == 3
@pytest.mark.asyncio
async def test_single_file_migration(
self, temp_anime_dir, test_db_session
):
"""Test migration of a single data file."""
migration_service = DataMigrationService()
# Get one data file path
data_file = str(temp_anime_dir / "Test Anime 1 (2020)" / "data")
# Migrate single file
result = await migration_service.migrate_data_file_to_db(
data_file, test_db_session
)
assert result is True
# Verify in database
series = await AnimeSeriesService.get_by_key(
test_db_session, "test-anime-1"
)
assert series is not None
assert series.name == "Test Anime 1"
@pytest.mark.asyncio
async def test_migration_with_corrupted_file(
self, temp_anime_dir, test_db_session
):
"""Test migration handles corrupted files gracefully."""
# Create a corrupted data file
corrupted_dir = temp_anime_dir / "Corrupted Anime"
corrupted_dir.mkdir()
corrupted_file = corrupted_dir / "data"
corrupted_file.write_text("not valid json {{{")
migration_service = DataMigrationService()
# Run migration
result = await migration_service.migrate_all_legacy_data(
str(temp_anime_dir), test_db_session
)
# Should have 3 migrated, 1 failed
assert result.total_found == 4
assert result.migrated == 3
assert result.failed == 1
assert len(result.errors) == 1
@pytest.mark.asyncio
async def test_migration_with_empty_directory(
self, tmp_path, test_db_session
):
"""Test migration with directory containing no data files."""
empty_dir = tmp_path / "empty_anime"
empty_dir.mkdir()
migration_service = DataMigrationService()
# Check for files
files = await migration_service.check_for_legacy_data_files(
str(empty_dir)
)
assert len(files) == 0
# Run migration on empty directory
result = await migration_service.migrate_all_legacy_data(
str(empty_dir), test_db_session
)
assert result.total_found == 0
assert result.migrated == 0
assert result.failed == 0
@pytest.mark.asyncio
async def test_migration_with_invalid_directory(
self, tmp_path, test_db_session
):
"""Test migration with non-existent directory."""
migration_service = DataMigrationService()
# Try non-existent directory
files = await migration_service.check_for_legacy_data_files(
"/non/existent/path"
)
assert len(files) == 0
result = await migration_service.migrate_all_legacy_data(
"/non/existent/path", test_db_session
)
assert result.total_found == 0
@pytest.mark.asyncio
async def test_cleanup_migrated_files(
self, temp_anime_dir, test_db_session
):
"""Test cleanup of migrated data files with backup."""
migration_service = DataMigrationService()
# Get data file paths before migration
files = await migration_service.check_for_legacy_data_files(
str(temp_anime_dir)
)
assert len(files) == 3
# Run cleanup (with backup=True)
await migration_service.cleanup_migrated_files(files, backup=True)
# Original data files should be removed
for original_path in files:
assert not os.path.exists(original_path), \
f"Original file should not exist: {original_path}"
# Backup files have timestamp suffix: data.backup.YYYYMMDD_HHMMSS
parent_dir = os.path.dirname(original_path)
backup_files = [
f for f in os.listdir(parent_dir)
if f.startswith("data.backup.")
]
assert len(backup_files) == 1, \
f"Backup file should exist in {parent_dir}"
@pytest.mark.asyncio
async def test_cleanup_without_backup(
self, temp_anime_dir, test_db_session
):
"""Test cleanup of migrated data files without backup."""
migration_service = DataMigrationService()
# Get data file paths
files = await migration_service.check_for_legacy_data_files(
str(temp_anime_dir)
)
# Run cleanup without backup
await migration_service.cleanup_migrated_files(files, backup=False)
# Files should be deleted, no backups
for original_path in files:
assert not os.path.exists(original_path)
assert not os.path.exists(original_path + ".migrated")
class TestAPIWithDatabaseIntegration:
"""Test API endpoints with database backend.
Note: These tests focus on the database integration layer.
Full API tests are in tests/api/test_anime_endpoints.py.
"""
@pytest.fixture
def mock_auth(self):
"""Mock authentication for API tests."""
return {"user_id": "test_user", "role": "admin"}
@pytest.fixture
async def authenticated_client(self, mock_auth):
"""Create an authenticated test client."""
# Create token
token = auth_service.create_access_token(mock_auth)
transport = ASGITransport(app=app)
async with AsyncClient(
transport=transport,
base_url="http://test"
) as client:
client.headers["Authorization"] = f"Bearer {token}"
yield client
@pytest.mark.asyncio
async def test_anime_service_uses_database(
self, test_db_session
):
"""Test that AnimeSeriesService correctly stores data."""
# Create a series through the service
_series = await AnimeSeriesService.create(
test_db_session,
key="api-test-anime",
name="API Test Anime",
site="aniworld.to",
folder="API Test Anime (2024)",
episode_dict={"1": [1, 2, 3]}
)
await test_db_session.commit()
# Verify it's stored
retrieved = await AnimeSeriesService.get_by_key(
test_db_session, "api-test-anime"
)
assert retrieved is not None
assert retrieved.name == "API Test Anime"
assert retrieved.folder == "API Test Anime (2024)"
@pytest.mark.asyncio
async def test_database_update_series(
self, test_db_session
):
"""Test that series can be updated in database."""
# Create a series
series = await AnimeSeriesService.create(
test_db_session,
key="update-test-anime",
name="Original Name",
site="aniworld.to",
folder="Original Folder",
episode_dict={}
)
await test_db_session.commit()
# Update it
updated = await AnimeSeriesService.update(
test_db_session,
series.id,
name="Updated Name",
episode_dict={"1": [1, 2]}
)
await test_db_session.commit()
# Verify update
assert updated.name == "Updated Name"
assert updated.episode_dict == {"1": [1, 2]}
@pytest.mark.asyncio
async def test_database_delete_series(
self, test_db_session
):
"""Test that series can be deleted from database."""
# Create a series
series = await AnimeSeriesService.create(
test_db_session,
key="delete-test-anime",
name="To Delete",
site="aniworld.to",
folder="Delete Folder",
episode_dict={}
)
await test_db_session.commit()
series_id = series.id
# Delete it
result = await AnimeSeriesService.delete(test_db_session, series_id)
await test_db_session.commit()
assert result is True
# Verify deletion
retrieved = await AnimeSeriesService.get_by_key(
test_db_session, "delete-test-anime"
)
assert retrieved is None
class TestMigrationResult:
"""Tests for MigrationResult dataclass."""
def test_migration_result_defaults(self):
"""Test default values for MigrationResult."""
result = MigrationResult()
assert result.total_found == 0
assert result.migrated == 0
assert result.failed == 0
assert result.skipped == 0
assert result.errors == []
def test_migration_result_str(self):
"""Test string representation of MigrationResult."""
result = MigrationResult(
total_found=10,
migrated=7,
failed=1,
skipped=2,
errors=["Error 1"]
)
expected = (
"Migration Result: 7 migrated, 2 skipped, "
"1 failed (total: 10)"
)
assert str(result) == expected