diff --git a/Docker/Dockerfile.backend b/Docker/Dockerfile.backend index 849e552..273d1ca 100644 --- a/Docker/Dockerfile.backend +++ b/Docker/Dockerfile.backend @@ -56,7 +56,8 @@ VOLUME ["/data"] # Default environment values (override at runtime) ENV BANGUI_DATABASE_PATH="/data/bangui.db" \ BANGUI_FAIL2BAN_SOCKET="/var/run/fail2ban/fail2ban.sock" \ - BANGUI_LOG_LEVEL="info" + BANGUI_LOG_LEVEL="info" \ + BANGUI_WORKERS="1" EXPOSE 8000 diff --git a/Docker/compose.prod.yml b/Docker/compose.prod.yml index 0e1ee72..73847a5 100644 --- a/Docker/compose.prod.yml +++ b/Docker/compose.prod.yml @@ -58,6 +58,7 @@ services: BANGUI_FAIL2BAN_SOCKET: "/var/run/fail2ban/fail2ban.sock" BANGUI_FAIL2BAN_CONFIG_DIR: "/config/fail2ban" BANGUI_LOG_LEVEL: "info" + BANGUI_WORKERS: "1" # APScheduler requires single worker — do not change BANGUI_SESSION_SECRET: "${BANGUI_SESSION_SECRET:?Set BANGUI_SESSION_SECRET}" BANGUI_TIMEZONE: "${BANGUI_TIMEZONE:-UTC}" volumes: diff --git a/Docs/Architekture.md b/Docs/Architekture.md index c8ccc74..b1b45c9 100644 --- a/Docs/Architekture.md +++ b/Docs/Architekture.md @@ -282,7 +282,7 @@ The FastAPI app factory. Responsibilities: - Registers the **lifespan** context manager (startup: open DB, create aiohttp session, start scheduler; shutdown: close all) - Mounts all routers - Registers global exception handlers that map domain exceptions to HTTP status codes -- Applies the setup-redirect middleware (redirects all requests to `/api/setup` when no configuration exists) +- Applies the setup-redirect middleware (returns `423 Locked` for all API requests when no configuration exists, except for `/api/setup` and `/api/health`) --- @@ -713,8 +713,8 @@ APScheduler 4.x (async mode) manages recurring background tasks. - All endpoints are grouped under `/api/` prefix. - JSON request and response bodies, validated by Pydantic models. - Authentication via session cookie on all endpoints except `/api/setup` and `/api/auth/login`. -- Setup-redirect middleware: while no configuration exists, all endpoints return `303 See Other` → `/api/setup`. -- Standard HTTP status codes: `200` success, `201` created, `204` no content, `400` bad request, `401` unauthorized, `404` not found, `422` validation error, `500` server error. +- Setup-redirect middleware: while no configuration exists, all API endpoints (except `/api/setup` and `/api/health`) return `423 Locked` with `{"detail": "Setup not complete.", "setup_required": true}`. This ensures API consumers can detect setup as a distinct condition rather than transparently following redirects. +- Standard HTTP status codes: `200` success, `201` created, `204` no content, `400` bad request, `401` unauthorized, `404` not found, `422` validation error, `423` locked, `500` server error. - Error responses follow a consistent shape: `{ "detail": "Human-readable message" }`. ### 8.2 Endpoint Groups @@ -768,6 +768,40 @@ APScheduler 4.x (async mode) manages recurring background tasks. --- +## 9.2 Deployment Constraints + +### Single-Worker Requirement + +**BanGUI's background scheduler must run with exactly one uvicorn worker process.** + +The application uses APScheduler's `AsyncIOScheduler`, which is bound to a single asyncio event loop and cannot be safely shared across multiple worker processes. If the app is deployed with `--workers N` (where N > 1), the following failures occur: + +- Each worker process creates its own independent scheduler instance. +- All background jobs execute **N times simultaneously** (once per worker). +- Results: + - **Duplicate blocklist imports** — the same IP ranges are banned N times. + - **Duplicate history entries** — the same historical events are recorded N times. + - **Duplicate ban operations** — bans are executed multiple times, with potential state conflicts. + - **SQLite lock contention** — concurrent writes to the same database from N workers cause lock timeouts. + +### Enforcement + +1. **Environment variable:** Set `BANGUI_WORKERS=1` (default in Dockerfile.backend). +2. **Detection:** On startup, `startup_shared_resources()` validates `BANGUI_WORKERS` and raises a clear `RuntimeError` if it is not 1. +3. **Single-process design:** The application is optimized for a single-process, high-concurrency model using asyncio. Request handling is fully async and leverages the event loop efficiently. + +### Future Multi-Worker Support + +To safely support multiple workers in the future: + +1. **External job store:** Move APScheduler from in-memory to a persistent store (e.g., SQLAlchemy-backed job store with PostgreSQL or Redis). +2. **Distributed locking:** Use a distributed lock (Redis, etcd) to ensure only one worker executes each scheduled job. +3. **Process coordination:** Implement a process-to-worker pool communication mechanism so the scheduler runs only on one designated worker. + +Currently, the single-worker approach is simple, maintainable, and sufficient for BanGUI's operational requirements. + +--- + ## 10. Design Principles These principles govern all architectural decisions in BanGUI. diff --git a/Docs/Backend-Development.md b/Docs/Backend-Development.md index c3cc633..c21620e 100644 --- a/Docs/Backend-Development.md +++ b/Docs/Backend-Development.md @@ -345,6 +345,71 @@ async def test_list_jails_returns_200(client: AsyncClient) -> None: --- +## 9.1 Background Tasks and Scheduler Architecture + +BanGUI uses **APScheduler 4.x** (async mode) to manage background jobs that execute on a schedule without user interaction. This section documents how to write and register background tasks. + +### Task Location and Structure + +All background tasks live in `backend/app/tasks/` as separate modules. Each task: +- Exports a `register(app: FastAPI) -> None` or `async def register(app: FastAPI) -> None` function. +- Opens its own database connection using `app.db.open_db()` or the `task_db()` helper. +- Closes connections when work completes (use the async context manager pattern). +- Runs independently of the FastAPI request/response cycle. + +### Example Task + +```python +# backend/app/tasks/my_task.py +import structlog +from fastapi import FastAPI +from apscheduler.schedulers.asyncio import AsyncIOScheduler + +log = structlog.get_logger() + +async def my_background_job(app: FastAPI) -> None: + """Do important work on a schedule.""" + log.info("my_background_job_started") + try: + db = await app.db.open_db(app.state.settings.database_path) + try: + # Do work... + pass + finally: + await db.close() + except Exception: + log.error("my_background_job_failed", exc_info=True) + +def register(app: FastAPI) -> None: + """Register the job with the scheduler.""" + scheduler: AsyncIOScheduler = app.state.scheduler + scheduler.add_job( + my_background_job, + args=(app,), + trigger="interval", + seconds=60, + id="my_task", + name="My Background Job", + ) +``` + +### Accessing Shared Resources in Tasks + +Since tasks do not have access to `Depends(get_db)` (no request scope), they must: +1. **Open their own DB connection** via `app.state.db_factory.open_db(path)`. +2. **Access app-level state** — `app.state.http_session`, `app.state.geo_cache`, `app.state.settings`, etc. +3. **Use structlog** for all logging (never `print()`). + +### Single-Worker Requirement + +**The scheduler is bound to a single asyncio event loop and cannot be shared across multiple worker processes.** BanGUI enforces single-worker mode to prevent duplicate task execution. + +- **Deployment constraint:** Set `BANGUI_WORKERS=1` (default). +- **Startup validation:** `startup_shared_resources()` raises `RuntimeError` if `BANGUI_WORKERS > 1`. +- See [Architekture.md § 9.2](Architekture.md) for full details. + +--- + ## 10. Code Style & Tooling | Tool | Purpose | @@ -443,7 +508,7 @@ class Settings(BaseSettings): --- -## 12. Git & Workflow +## 13. Git & Workflow - **Branch naming:** `feature/`, `fix/`, `chore/`. - **Commit messages:** imperative tense, max 72 chars first line (`Add jail reload endpoint`, `Fix ban history query`). @@ -453,11 +518,11 @@ class Settings(BaseSettings): --- -## 13. Coding Principles +## 14. Coding Principles These principles are **non-negotiable**. Every backend contributor must internalise and apply them daily. -### 13.1 Clean Code +### 14.1 Clean Code - Write code that **reads like well-written prose** — a new developer should understand intent without asking. - **Meaningful names** — variables, functions, and classes must reveal their purpose. Avoid abbreviations (`cnt`, `mgr`, `tmp`) unless universally understood. @@ -488,7 +553,7 @@ async def check(ip, j): raise Exception("not found") ``` -### 13.2 Separation of Concerns (SoC) +### 14.2 Separation of Concerns (SoC) - Each module, class, and function must have a **single, well-defined responsibility**. - **Routers** → HTTP layer only (parse requests, return responses). @@ -498,29 +563,29 @@ async def check(ip, j): - **Tasks** → scheduled background jobs. - Never mix layers — a router must not execute SQL, and a repository must not raise `HTTPException`. -### 13.3 Single Responsibility Principle (SRP) +### 14.3 Single Responsibility Principle (SRP) - A class or module should have **one and only one reason to change**. - If a service handles both ban management *and* email notifications, split it into `BanService` and `NotificationService`. -### 13.4 Don't Repeat Yourself (DRY) +### 14.4 Don't Repeat Yourself (DRY) - Extract shared logic into utility functions, base classes, or dependency providers. - If the same block of code appears in more than one place, **refactor it** into a single source of truth. - But don't over-abstract — premature DRY that couples unrelated features is worse than a little duplication (see **Rule of Three**: refactor when something appears a third time). -### 13.5 KISS — Keep It Simple, Stupid +### 14.5 KISS — Keep It Simple, Stupid - Choose the simplest solution that works correctly. - Avoid clever tricks, premature optimisation, and over-engineering. - If a standard library function does the job, prefer it over a custom implementation. -### 13.6 YAGNI — You Aren't Gonna Need It +### 14.6 YAGNI — You Aren't Gonna Need It - Do **not** build features, abstractions, or config options "just in case". - Implement what is required **now**. Extend later when a real need emerges. -### 13.7 Dependency Inversion Principle (DIP) +### 14.7 Dependency Inversion Principle (DIP) - High-level modules (services) must not depend on low-level modules (repositories) directly. Both should depend on **abstractions** (protocols / interfaces). - Use FastAPI's `Depends()` to inject implementations — this makes swapping and testing trivial. @@ -580,17 +645,17 @@ async def get_session_repo() -> SessionRepository: - Before each deployment, run `mypy --strict` to ensure all dependency providers return values compatible with their Protocol types. - The `cast()` calls in `dependencies.py` are a documented signal that structural compatibility is being verified externally, not via explicit class inheritance. -### 13.8 Composition over Inheritance +### 14.8 Composition over Inheritance - Favour **composing** small, focused objects over deep inheritance hierarchies. - Use mixins or protocols only when a clear "is-a" relationship exists; otherwise, pass collaborators as constructor arguments. -### 13.9 Fail Fast +### 14.9 Fail Fast - Validate inputs as early as possible — at the API boundary with Pydantic, at service entry with assertions or domain checks. - Raise specific exceptions immediately rather than letting bad data propagate silently. -### 13.10 Law of Demeter (Principle of Least Knowledge) +### 14.10 Law of Demeter (Principle of Least Knowledge) - A function should only call methods on: 1. Its own object (`self`). @@ -598,7 +663,7 @@ async def get_session_repo() -> SessionRepository: 3. Objects it creates. - Avoid long accessor chains like `request.state.db.cursor().execute(...)` — wrap them in a meaningful method. -### 13.11 Defensive Programming +### 14.11 Defensive Programming - Never trust external input — validate and sanitise everything that crosses a boundary (HTTP request, file, socket, environment variable). - Handle edge cases explicitly: empty lists, `None` values, negative numbers, empty strings. @@ -606,7 +671,7 @@ async def get_session_repo() -> SessionRepository: --- -## 14. Quick Reference — Do / Don't +## 15. Quick Reference — Do / Don't | Do | Don't | |---|---| diff --git a/backend/app/startup.py b/backend/app/startup.py index 84b8133..b9bc80e 100644 --- a/backend/app/startup.py +++ b/backend/app/startup.py @@ -7,6 +7,7 @@ in ``app.main`` delegates resource creation and task registration here. from __future__ import annotations +import os from contextlib import suppress from pathlib import Path from typing import TYPE_CHECKING @@ -32,6 +33,39 @@ if TYPE_CHECKING: log: structlog.stdlib.BoundLogger = structlog.get_logger() +def _check_single_worker_mode() -> None: + """Verify that the application is running with a single worker. + + APScheduler's AsyncIOScheduler is bound to a single asyncio event loop + and cannot be safely shared across multiple worker processes. If each + worker starts its own scheduler instance, all background jobs execute N + times (where N is the number of workers), resulting in duplicate blocklist + imports, duplicate ban operations, duplicate history writes, and SQLite + lock contention. + + This function detects multi-worker configurations and raises a clear + RuntimeError with instructions. + + Raises: + RuntimeError: If the app would run with multiple workers. + """ + # Check for explicit worker count env var (convention used in deployment) + workers_env = os.environ.get("BANGUI_WORKERS") + if workers_env is not None: + try: + worker_count = int(workers_env) + if worker_count > 1: + raise RuntimeError( + "BanGUI background scheduler cannot run with multiple workers.\n" + f"BANGUI_WORKERS is set to {worker_count}. Set it to 1 or remove it.\n" + "See Architekture.md § Deployment Constraints for details." + ) + except ValueError as e: + raise RuntimeError( + f"BANGUI_WORKERS environment variable must be an integer, got: {workers_env}" + ) from e + + async def _ensure_database_schema(database_path: str) -> None: """Create the configured runtime database if it does not already exist.""" db = await open_db(database_path) @@ -70,6 +104,8 @@ async def startup_shared_resources( Returns: A tuple of ``(http_session, scheduler)``. """ + _check_single_worker_mode() + db_path: Path = Path(settings.database_path) await run_blocking(db_path.parent.mkdir, parents=True, exist_ok=True) diff --git a/backend/tests/test_startup.py b/backend/tests/test_startup.py new file mode 100644 index 0000000..f688d37 --- /dev/null +++ b/backend/tests/test_startup.py @@ -0,0 +1,57 @@ +"""Unit tests for application startup and resource initialization.""" + +import os +from unittest.mock import patch + +import pytest + +from app.startup import _check_single_worker_mode + + +def test_check_single_worker_mode_allows_workers_1() -> None: + """Single-worker mode is allowed and no error is raised.""" + with patch.dict(os.environ, {"BANGUI_WORKERS": "1"}): + # Should not raise + _check_single_worker_mode() + + +def test_check_single_worker_mode_allows_no_env_var() -> None: + """When BANGUI_WORKERS is not set, no error is raised.""" + with patch.dict(os.environ, {}, clear=False): + # Remove BANGUI_WORKERS if it exists + os.environ.pop("BANGUI_WORKERS", None) + # Should not raise + _check_single_worker_mode() + + +def test_check_single_worker_mode_rejects_workers_2() -> None: + """Multi-worker mode (N=2) raises a clear RuntimeError.""" + with patch.dict(os.environ, {"BANGUI_WORKERS": "2"}), pytest.raises( + RuntimeError, match="cannot run with multiple workers" + ): + _check_single_worker_mode() + + +def test_check_single_worker_mode_rejects_workers_4() -> None: + """Multi-worker mode (N=4) raises a clear RuntimeError.""" + with patch.dict(os.environ, {"BANGUI_WORKERS": "4"}), pytest.raises( + RuntimeError, match="cannot run with multiple workers" + ): + _check_single_worker_mode() + + +def test_check_single_worker_mode_rejects_invalid_workers_value() -> None: + """Invalid BANGUI_WORKERS value raises a clear RuntimeError.""" + with patch.dict(os.environ, {"BANGUI_WORKERS": "not-a-number"}), pytest.raises( + RuntimeError, match="must be an integer" + ): + _check_single_worker_mode() + + +def test_check_single_worker_mode_error_message_is_informative() -> None: + """Error message includes instructions and reference to documentation.""" + with patch.dict(os.environ, {"BANGUI_WORKERS": "2"}), pytest.raises(RuntimeError) as exc_info: + _check_single_worker_mode() + error_message = str(exc_info.value) + assert "multiple workers" in error_message + assert "Architekture.md" in error_message