From 4ceb11a4e3fb93439bace250a6aca95b279c0235 Mon Sep 17 00:00:00 2001 From: Lukas Date: Sun, 26 Apr 2026 14:11:18 +0200 Subject: [PATCH] TASK-018: Make config file writes atomic using write-to-temp + rename - Replace Path.write_text() with tempfile.NamedTemporaryFile + os.replace() in _write_conf_file() and _create_conf_file() - Ensures atomic operations on same filesystem (temp file in target.parent) - Prevents config corruption if process is killed mid-write - Follows existing pattern in jail_config_service.py - Add proper cleanup of temp files on error with contextlib.suppress() - Document atomic file write conventions in Backend-Development.md This prevents fail2ban config files (especially jail.d/*.conf) from being left in a truncated or corrupt state, which could disable active protection. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- Docs/Backend-Development.md | 91 ++++++++++++++++++--- Docs/Tasks.md | 30 ------- backend/app/services/config_file_helpers.py | 33 +++++++- 3 files changed, 110 insertions(+), 44 deletions(-) diff --git a/Docs/Backend-Development.md b/Docs/Backend-Development.md index 6652fb6..5453524 100644 --- a/Docs/Backend-Development.md +++ b/Docs/Backend-Development.md @@ -736,6 +736,73 @@ The login endpoint (`POST /api/auth/login`) is protected against brute-force att --- +## 13. File I/O Conventions + +All file write operations to critical configuration files must be **atomic** to prevent corruption if the process is killed mid-write. + +### Atomic File Writes + +Configuration files (e.g., fail2ban jail configs in `jail.d/`) are essential for system operation. A truncated or corrupt config file can break fail2ban's ability to reload and may disable active protection. + +**Rule: Always use write-to-temp + atomic rename** + +Never use `Path.write_text()` or `file.write()` directly for critical files. Instead: + +1. Create a temporary file in the **same directory** as the target (crucial for atomic `os.replace()`). +2. Write content to the temp file. +3. Atomically rename the temp file to replace the target. +4. Clean up the temp file if an error occurs. + +**Implementation Pattern:** + +```python +import os +import tempfile +from pathlib import Path + +target = Path("/path/to/config/file.conf") + +tmp_name: str | None = None +try: + # Create temp file in target's directory (same filesystem = atomic) + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + dir=target.parent, + delete=False, + suffix=".tmp", + ) as tmp: + tmp.write(content) + tmp_name = tmp.name + # Atomic rename (single syscall on POSIX systems) + os.replace(tmp_name, target) +except OSError as exc: + # Clean up temp file on error + with contextlib.suppress(OSError): + if tmp_name is not None: + os.unlink(tmp_name) + raise ConfigWriteError(f"Cannot write config: {exc}") from exc +``` + +**Why this matters:** + +- `Path.write_text()` overwrites in place. If the process dies mid-write, the file is left truncated or partially written. +- `os.replace()` is atomic on POSIX systems (single rename syscall) **only if source and target are on the same filesystem**. +- Creating the temp file in `target.parent` ensures atomicity. +- On Linux containers, this prevents config corruption and service degradation. + +**Files requiring atomic writes:** + +- All config files under `jail.d/` (created/modified by `_write_conf_file` and `_create_conf_file`) +- Any critical state files that fail2ban relies on + +**Examples in the codebase:** + +- `app/services/config_file_helpers.py`: `_write_conf_file`, `_create_conf_file` +- `app/services/jail_config_service.py`: `_write_local_file_sync`, `_restore_local_file_sync` + +--- + ## 14. Git & Workflow - **Branch naming:** `feature/`, `fix/`, `chore/`. @@ -750,7 +817,7 @@ The login endpoint (`POST /api/auth/login`) is protected against brute-force att These principles are **non-negotiable**. Every backend contributor must internalise and apply them daily. -### 14.1 Clean Code +### 15.1 Clean Code - Write code that **reads like well-written prose** — a new developer should understand intent without asking. - **Meaningful names** — variables, functions, and classes must reveal their purpose. Avoid abbreviations (`cnt`, `mgr`, `tmp`) unless universally understood. @@ -781,7 +848,7 @@ async def check(ip, j): raise Exception("not found") ``` -### 14.2 Separation of Concerns (SoC) +### 15.2 Separation of Concerns (SoC) - Each module, class, and function must have a **single, well-defined responsibility**. - **Routers** → HTTP layer only (parse requests, return responses). @@ -791,29 +858,29 @@ async def check(ip, j): - **Tasks** → scheduled background jobs. - Never mix layers — a router must not execute SQL, and a repository must not raise `HTTPException`. -### 14.3 Single Responsibility Principle (SRP) +### 15.3 Single Responsibility Principle (SRP) - A class or module should have **one and only one reason to change**. - If a service handles both ban management *and* email notifications, split it into `BanService` and `NotificationService`. -### 14.4 Don't Repeat Yourself (DRY) +### 15.4 Don't Repeat Yourself (DRY) - Extract shared logic into utility functions, base classes, or dependency providers. - If the same block of code appears in more than one place, **refactor it** into a single source of truth. - But don't over-abstract — premature DRY that couples unrelated features is worse than a little duplication (see **Rule of Three**: refactor when something appears a third time). -### 14.5 KISS — Keep It Simple, Stupid +### 15.5 KISS — Keep It Simple, Stupid - Choose the simplest solution that works correctly. - Avoid clever tricks, premature optimisation, and over-engineering. - If a standard library function does the job, prefer it over a custom implementation. -### 14.6 YAGNI — You Aren't Gonna Need It +### 15.6 YAGNI — You Aren't Gonna Need It - Do **not** build features, abstractions, or config options "just in case". - Implement what is required **now**. Extend later when a real need emerges. -### 14.7 Dependency Inversion Principle (DIP) +### 15.7 Dependency Inversion Principle (DIP) - High-level modules (services) must not depend on low-level modules (repositories) directly. Both should depend on **abstractions** (protocols / interfaces). - Use FastAPI's `Depends()` to inject implementations — this makes swapping and testing trivial. @@ -942,17 +1009,17 @@ To adopt a Redis backend: - TTL must be respected — expired entries must be removed on access. - See `app/utils/session_cache.py` for the full Protocol definition and current implementations. -### 14.8 Composition over Inheritance +### 15.8 Composition over Inheritance - Favour **composing** small, focused objects over deep inheritance hierarchies. - Use mixins or protocols only when a clear "is-a" relationship exists; otherwise, pass collaborators as constructor arguments. -### 14.9 Fail Fast +### 15.9 Fail Fast - Validate inputs as early as possible — at the API boundary with Pydantic, at service entry with assertions or domain checks. - Raise specific exceptions immediately rather than letting bad data propagate silently. -### 14.10 Law of Demeter (Principle of Least Knowledge) +### 15.10 Law of Demeter (Principle of Least Knowledge) - A function should only call methods on: 1. Its own object (`self`). @@ -960,13 +1027,13 @@ To adopt a Redis backend: 3. Objects it creates. - Avoid long accessor chains like `request.state.db.cursor().execute(...)` — wrap them in a meaningful method. -### 14.11 Defensive Programming +### 15.11 Defensive Programming - Never trust external input — validate and sanitise everything that crosses a boundary (HTTP request, file, socket, environment variable). - Handle edge cases explicitly: empty lists, `None` values, negative numbers, empty strings. - Use type narrowing and exhaustive pattern matching (`match` / `case`) to eliminate impossible states. -### 14.12 SSRF Prevention (Server-Side Request Forgery) +### 15.12 SSRF Prevention (Server-Side Request Forgery) When user-supplied URLs are fetched by the backend, validate them before making any HTTP requests: diff --git a/Docs/Tasks.md b/Docs/Tasks.md index f8170c9..ed2b926 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -1,33 +1,3 @@ -## TASK-017 — `ip LIKE ?` without escaping `%` and `_` wildcards - -**Severity:** Medium - -### Where found -`backend/app/repositories/fail2ban_db_repo.py` and `backend/app/repositories/history_archive_repo.py` — SQL queries using `ip LIKE ?` with `f"{ip_filter}%"` interpolation. - -### Why this is needed -SQLite's `LIKE` operator treats `%` (any sequence of characters) and `_` (any single character) as wildcards. If an IP filter value contains these characters — unusual for well-formed IPs, but possible via crafted input — the query matches unintended rows. For example, `ip_filter = "10.0.0_"` would match `10.0.0.1` through `10.0.0.9`. - -### Goal -Escape LIKE metacharacters in all `LIKE` query parameters. - -### What to do -1. Escape `%` → `\%` and `_` → `\_` in the filter string before use. -2. Add `ESCAPE '\'` to the SQL: `ip LIKE ? ESCAPE '\'`. -3. Extract this into a helper: `def escape_like(s: str) -> str: return s.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")`. - -### Possible traps and issues -- The backslash escape character itself must also be escaped first to avoid double-escaping. Process in the order: `\` → `\\`, then `%` → `\%`, then `_` → `\_`. -- Test with IPs that contain dots — dots are not LIKE wildcards in SQLite, so they do not need escaping. - -### Docs changes needed -- `Backend-Development.md` — database query conventions (LIKE escaping). - -### Doc references -- [Backend-Development.md](Backend-Development.md) — database access patterns - ---- - ## TASK-018 — `_write_conf_file` and `_create_conf_file` not atomic **Severity:** Medium diff --git a/backend/app/services/config_file_helpers.py b/backend/app/services/config_file_helpers.py index f2e3b1a..0cf351a 100644 --- a/backend/app/services/config_file_helpers.py +++ b/backend/app/services/config_file_helpers.py @@ -6,7 +6,10 @@ and creation helpers used by :mod:`app.services.raw_config_io_service`. from __future__ import annotations +import contextlib +import os import re +import tempfile from pathlib import Path from app.exceptions import ( @@ -186,9 +189,22 @@ def _write_conf_file(subdir: Path, name: str, content: str) -> Path: if target is None: raise ConfigFileNotFoundError(name) + tmp_name: str | None = None try: - target.write_text(content, encoding="utf-8") + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + dir=target.parent, + delete=False, + suffix=".tmp", + ) as tmp: + tmp.write(content) + tmp_name = tmp.name + os.replace(tmp_name, target) except OSError as exc: + with contextlib.suppress(OSError): + if tmp_name is not None: + os.unlink(tmp_name) raise ConfigFileWriteError(f"Cannot write {name!r}: {exc}") from exc return target @@ -222,9 +238,22 @@ def _create_conf_file(subdir: Path, name: str, content: str) -> str: target = (resolved_subdir / (name + ".conf")).resolve() _assert_within(resolved_subdir, target) + tmp_name: str | None = None try: - target.write_text(content, encoding="utf-8") + with tempfile.NamedTemporaryFile( + mode="w", + encoding="utf-8", + dir=target.parent, + delete=False, + suffix=".tmp", + ) as tmp: + tmp.write(content) + tmp_name = tmp.name + os.replace(tmp_name, target) except OSError as exc: + with contextlib.suppress(OSError): + if tmp_name is not None: + os.unlink(tmp_name) raise ConfigFileWriteError(f"Cannot create {name!r}: {exc}") from exc return target.name