Files
BanGUI/backend/tests/test_startup_integration.py
Lukas c4ede71fa6 Fix: Enforce single-worker deployment for session cache cluster safety
Addresses: Backend session cache not cluster-safe (multi-worker issue)

Problem:
- Session cache is process-local (InMemorySessionCache)
- Multi-worker deployments (uvicorn --workers N) create separate processes
- Each process has its own independent session cache
- Sessions cached in Worker A are invisible to Workers B, C, D
- Users randomly logged out when requests land on different workers
- Also affects RuntimeState, rate limiter, and background jobs

Solution (Option A - Strict single-worker enforcement):
- Enhance startup validation with clearer error messages
- Update error messages to explain the problem and how to fix it
- Document single-worker requirement prominently in Docker configs
- Update module docstrings to clarify constraints

Changes:
1. app/startup.py:
   - Enhanced _check_single_worker_mode() error message with troubleshooting
   - Enhanced _stage_check_worker_mode_and_acquire_lock() error message
   - Removed unused import

2. app/utils/session_cache.py:
   - Updated module docstring to explain constraints more clearly
   - Added references to deployment documentation
   - Clarified multi-worker solution for future implementation

3. app/utils/runtime_state.py:
   - Updated module docstring with deployment constraint references
   - Aligned messaging with session_cache.py

4. Docker/Dockerfile.backend:
   - Added comprehensive comments about single-worker requirement
   - Explained impact in multi-worker deployments
   - Referenced deployment constraints documentation

5. Docker/docker-compose.yml, compose.prod.yml, compose.debug.yml:
   - Added documentation comments about BANGUI_WORKERS constraint
   - Explained why single-worker is required

6. backend/tests/test_startup_integration.py:
   - Fixed test unpacking to match function return signature (3 values, not 2)

This ensures multi-worker deployments fail loudly at startup with clear
guidance on what went wrong and how to fix it. The database-backed scheduler
lock provides defense-in-depth for container orchestration scenarios.

For future multi-worker support, implement:
- Redis or database-backed session cache
- Shared RuntimeState coordination
- Distributed APScheduler backend

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-30 20:54:24 +02:00

190 lines
7.1 KiB
Python

"""Integration tests for the complete startup flow with StartupDAG."""
import tempfile
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fastapi import FastAPI
from app.config import Settings
from app.startup import startup_shared_resources
def _create_test_settings(tmpdir: str) -> Settings:
"""Create a minimal Settings object for testing."""
return Settings(
database_path=str(Path(tmpdir) / "bangui.db"),
fail2ban_socket="/var/run/fail2ban/fail2ban.sock",
session_secret="test-secret-12345678901234567890",
fail2ban_config_dir="/etc/fail2ban",
geoip_db_path="/usr/share/GeoIP/GeoLite2-Country.mmdb",
geoip_allow_http_fallback=False,
log_level="info",
)
@pytest.mark.asyncio
async def test_startup_shared_resources_complete_flow() -> None:
"""Test that startup_shared_resources successfully initializes all resources via DAG."""
# Create a test app
app = FastAPI()
app.state = MagicMock()
# Create minimal settings for testing
with tempfile.TemporaryDirectory() as tmpdir:
settings = _create_test_settings(tmpdir)
# Mock external dependencies that would require actual fail2ban/MaxMind
with patch("app.startup.open_db") as mock_open_db, patch(
"app.startup.init_db"
) as mock_init_db, patch(
"app.startup.setup_service.is_setup_complete"
) as mock_is_setup_complete, patch(
"app.startup.set_setup_complete_cache"
) as mock_set_setup_complete, patch(
"app.startup.GeoCache"
) as mock_geo_cache_class, patch(
"app.startup.ensure_jail_configs"
) as mock_ensure_jail, patch(
"app.startup.health_check.register"
) as mock_health_check_register, patch(
"app.startup.blocklist_import.register"
) as mock_blocklist_import_register, patch(
"app.startup.geo_cache_cleanup.register"
) as mock_geo_cache_cleanup_register, patch(
"app.startup.geo_cache_flush.register"
) as mock_geo_cache_flush_register, patch(
"app.startup.geo_re_resolve.register"
) as mock_geo_re_resolve_register, patch(
"app.startup.history_sync.register"
) as mock_history_sync_register, patch(
"app.startup.session_cleanup.register"
) as mock_session_cleanup_register:
# Setup mock database
mock_db = AsyncMock()
mock_db.close = AsyncMock()
mock_open_db.return_value = mock_db
# Setup mock services
mock_init_db.return_value = None
mock_is_setup_complete.return_value = False
mock_set_setup_complete.return_value = None
# Setup mock GeoCache
mock_geo_cache = MagicMock()
mock_geo_cache.load_cache_from_db = AsyncMock()
mock_geo_cache.count_unresolved = AsyncMock(return_value=0)
mock_geo_cache.init_geoip = MagicMock()
mock_geo_cache_class.return_value = mock_geo_cache
# Setup mock blocklist import (async function)
mock_blocklist_import_register.return_value = None
# Call startup_shared_resources
http_session, scheduler, startup_db = await startup_shared_resources(app, settings)
# Verify all stages completed successfully
assert http_session is not None
assert scheduler is not None
assert startup_db is not None
assert scheduler.running
# Verify resources were initialized
assert app.state.geo_cache is mock_geo_cache
# Verify all task registration functions were called
mock_health_check_register.assert_called_once()
mock_blocklist_import_register.assert_called_once()
mock_geo_cache_cleanup_register.assert_called_once()
mock_geo_cache_flush_register.assert_called_once()
mock_geo_re_resolve_register.assert_called_once()
mock_history_sync_register.assert_called_once()
mock_session_cleanup_register.assert_called_once()
# Cleanup
await http_session.close()
scheduler.shutdown(wait=False)
@pytest.mark.asyncio
async def test_startup_shared_resources_rollback_on_database_failure() -> None:
"""Test that startup_shared_resources rolls back all resources if database init fails."""
app = FastAPI()
app.state = MagicMock()
with tempfile.TemporaryDirectory() as tmpdir:
settings = _create_test_settings(tmpdir)
with patch("app.startup.open_db") as mock_open_db, patch(
"app.startup.init_db"
) as mock_init_db:
# Setup mock database to fail
mock_db = AsyncMock()
mock_db.close = AsyncMock()
mock_open_db.return_value = mock_db
mock_init_db.side_effect = RuntimeError("Database initialization failed")
# startup_shared_resources should raise the database error
with pytest.raises(RuntimeError, match="Database initialization failed"):
await startup_shared_resources(app, settings)
# Verify cleanup was attempted
mock_db.close.assert_called()
@pytest.mark.asyncio
async def test_startup_shared_resources_scheduler_starts() -> None:
"""Test that the scheduler is started during startup."""
app = FastAPI()
app.state = MagicMock()
with tempfile.TemporaryDirectory() as tmpdir:
settings = _create_test_settings(tmpdir)
with patch("app.startup.open_db") as mock_open_db, patch(
"app.startup.init_db"
), patch("app.startup.setup_service.is_setup_complete") as mock_is_setup, patch(
"app.startup.set_setup_complete_cache"
), patch(
"app.startup.GeoCache"
) as mock_geo_cache_class, patch(
"app.startup.ensure_jail_configs"
), patch(
"app.startup.health_check.register"
), patch(
"app.startup.blocklist_import.register"
), patch(
"app.startup.geo_cache_cleanup.register"
), patch(
"app.startup.geo_cache_flush.register"
), patch(
"app.startup.geo_re_resolve.register"
), patch(
"app.startup.history_sync.register"
), patch(
"app.startup.session_cleanup.register"
):
mock_db = AsyncMock()
mock_db.close = AsyncMock()
mock_open_db.return_value = mock_db
mock_is_setup.return_value = False
mock_geo_cache = MagicMock()
mock_geo_cache.load_cache_from_db = AsyncMock()
mock_geo_cache.count_unresolved = AsyncMock(return_value=0)
mock_geo_cache.init_geoip = MagicMock()
mock_geo_cache_class.return_value = mock_geo_cache
http_session, scheduler, startup_db = await startup_shared_resources(app, settings)
# Verify scheduler is running
assert scheduler.running
# Cleanup
await http_session.close()
scheduler.shutdown(wait=False)