diff --git a/Docs/Backend-Development.md b/Docs/Backend-Development.md index e0ab8b8..ec85291 100644 --- a/Docs/Backend-Development.md +++ b/Docs/Backend-Development.md @@ -611,6 +611,44 @@ async def _fail2ban_connection_handler(request: Request, exc: Fail2BanConnection ) ``` +### Service Error Handling Policy + +Service methods often call external systems (HTTP APIs, databases, fail2ban) that can fail in diverse ways. To maintain debuggability and predictability: + +**Never catch `Exception` broadly** except where unavoidable. Instead, catch specific exception types that match the operation's failure modes: + +- **Network I/O**: `TimeoutError`, `aiohttp.ClientError`, `asyncio.TimeoutError` +- **File I/O**: `OSError` (includes `IOError`, `FileNotFoundError`, `PermissionError`) +- **JSON parsing**: `json.JSONDecodeError`, `ValueError` +- **Database errors**: `aiosqlite.Error` and derivatives (caught as `OSError`) +- **Third-party libraries**: Specific exception classes (e.g., `geoip2.errors.GeoIP2Error`) + +**When catching service-critical exceptions**: +1. Catch the specific exception types for the operation. +2. Log with the exception type and relevant context. +3. Return a safe fallback (empty dict, None, etc.) or re-raise if the service cannot function. + +**When truly unavoidable broad catches are needed** (e.g., retrying transient network failures): +1. Place specific catches first. +2. Add one final `except Exception` **after** specific cases, with `error_type="unexpected"` logged to flag surprises. +3. Document why broad catching is necessary (e.g., "tests use mock objects that may raise arbitrary exceptions"). + +**Example:** +```python +async def lookup_batch(ips: list[str], http_session: aiohttp.ClientSession) -> dict[str, GeoInfo]: + """Resolve multiple IPs, returning empty map on failure.""" + try: + result = await http_session.post(url, json=payload, timeout=timeout) + except (TimeoutError, aiohttp.ClientError) as exc: + # Expected network failures — log and return empty result + log.warning("geo_batch_http_failed", error=type(exc).__name__) + return {} + except Exception as exc: + # Unexpected — log as error for investigation + log.error("geo_batch_unexpected_error", error=type(exc).__name__) + return {} +``` + --- ## 9. Testing diff --git a/Docs/Tasks.md b/Docs/Tasks.md index c3e8e76..2ce1f4d 100644 --- a/Docs/Tasks.md +++ b/Docs/Tasks.md @@ -1,32 +1,744 @@ -## TASK-033 — Session token returned in JSON body alongside HttpOnly cookie +## 1) Broad exception catching in backend services +- Where found: + - [backend/app/services/ban_service.py](backend/app/services/ban_service.py) + - [backend/app/services/geo_cache.py](backend/app/services/geo_cache.py) + - [backend/app/services/blocklist_service.py](backend/app/services/blocklist_service.py) +- Why this is needed: + - Catching broad Exception hides root causes and weakens operational debugging. +- Goal: + - Replace broad catches with targeted exception handling and predictable failure paths. +- What to do: + - Inventory each broad catch. + - Replace with explicit exception classes. + - Keep one top-level safety catch only where unavoidable, with full context logging. +- Possible traps and issues: + - Over-tightening catches can expose previously hidden runtime failures. +- Docs changes needed: + - Add service error-handling policy and allowed catch patterns. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + - https://docs.python.org/3/tutorial/errors.html -**Severity:** Medium -### Where found -`backend/app/routers/auth.py` — `login()` returns `LoginResponse(token=signed_token, expires_at=expires_at)` in the JSON body **and** sets the HttpOnly cookie. `backend/app/models/auth.py` — `LoginResponse.token` field. +--- +## 2) Hidden cross-service coupling (service imports service) +- Where found: + - [backend/app/services/jail_service.py](backend/app/services/jail_service.py) + - [backend/app/services/jail_config_service.py](backend/app/services/jail_config_service.py) + - [backend/app/services/history_service.py](backend/app/services/history_service.py) +- Why this is needed: + - Direct service imports create hidden dependency graphs and make testing harder. +- Goal: + - Make service dependencies explicit and injectable. +- What to do: + - Define clear service interfaces. + - Pass dependencies via constructor/function parameters. + - Remove direct cross-service module imports. +- Possible traps and issues: + - Circular references may surface during refactor. +- Docs changes needed: + - Document service dependency rules and injection style. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + - [Docs/Backend-Development.md](Docs/Backend-Development.md) -### Why this is needed -The `LoginResponse` JSON body contains the full signed session token. JavaScript running on the page (including third-party analytics scripts or a future XSS injection) can read the response body from a `fetch()` call and store the token in `localStorage` or a non-HttpOnly cookie. The Bearer-header authentication path (`Authorization: Bearer `) then allows using that extracted token, completely bypassing the protections provided by the HttpOnly cookie. +--- +## 3) Blocklist import flow mixes too many responsibilities +- Where found: + - [backend/app/services/blocklist_service.py](backend/app/services/blocklist_service.py) +- Why this is needed: + - One function handling download, validation, persistence, and banning is hard to test and evolve. +- Goal: + - Split into focused components with clear boundaries. +- What to do: + - Extract downloader, parser/validator, ban executor, and persistence coordinator. + - Keep orchestration in a thin workflow service. +- Possible traps and issues: + - Behavior changes in retry/error aggregation during split. +- Docs changes needed: + - Add blocklist import sequence diagram and component ownership. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + - [Docs/Features.md](Docs/Features.md) -### Goal -Prevent the session token from being accessible to JavaScript when using cookie-based authentication. +--- +## 4) Module-level mutable runtime flags in service layer +- Where found: + - [backend/app/services/jail_service.py](backend/app/services/jail_service.py) +- Why this is needed: + - Global mutable state is difficult to reason about under concurrency and tests. +- Goal: + - Move mutable runtime state to managed app/runtime state services. +- What to do: + - Replace module-level flags with injected state holder. + - Guard mutations with clear synchronization boundaries. +- Possible traps and issues: + - Race conditions can reappear if state updates are spread across modules. +- Docs changes needed: + - Document allowed mutable-state locations. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) -### What to do -1. For browser SPA consumers: Remove the `token` field from `LoginResponse`. The HttpOnly cookie is the only token the browser needs. -2. If an API-first (non-browser) token flow is required, create a separate endpoint `POST /api/auth/token` that returns a token in the body and does **not** set a cookie. Document this endpoint as "for programmatic API clients only, not for browser use". -3. Update the frontend — verify that `AuthProvider` does not use `response.token` (confirmed: it currently does not). +--- +## 5) Inconsistent domain exception contracts across services +- Where found: + - [backend/app/routers/jails.py](backend/app/routers/jails.py) + - [backend/app/routers/config.py](backend/app/routers/config.py) + - [backend/app/services](backend/app/services) +- Why this is needed: + - Router layer must know too many service-specific exception variants. +- Goal: + - Standardize domain exception taxonomy and HTTP mapping. +- What to do: + - Define error categories and mandatory service error types. + - Align router exception handlers to those categories. +- Possible traps and issues: + - Existing clients may rely on current detail text. +- Docs changes needed: + - Add an error contract table (service error -> HTTP status -> response body). +- Doc references: + - [backend/app/main.py](backend/app/main.py) + - [Docs/Backend-Development.md](Docs/Backend-Development.md) -### Possible traps and issues -- Any existing API client that relies on the token in the `LoginResponse` body will break. Check tests. -- The `expires_at` field in `LoginResponse` is useful for the frontend to know when to prompt for re-login — this can remain. -- The Bearer-token path in `require_auth` (`Authorization: Bearer`) remains functional for programmatic clients using the dedicated token endpoint. +--- +## 6) Raw DB connection exposed as dependency for all routes +- Where found: + - [backend/app/dependencies.py](backend/app/dependencies.py) +- Why this is needed: + - Architectural boundary relies on convention, not enforcement. +- Goal: + - Enforce repository boundary for persistence access. +- What to do: + - Prefer repository dependencies in routers. + - Restrict direct DB usage to repository/service internals. +- Possible traps and issues: + - Large refactor may touch many endpoint signatures. +- Docs changes needed: + - Add dependency layering rule to backend guidelines. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) -### Docs changes needed -- `Features.md` — document the authentication flow (cookie for browser, token endpoint for API clients). -- `Backend-Development.md` — authentication endpoint design. -- `Web-Development.md` — document that the frontend uses only the HttpOnly cookie. +--- +## 7) Service layer coupled to response/presentation models +- Where found: + - [backend/app/services/ban_service.py](backend/app/services/ban_service.py) +- Why this is needed: + - Domain logic becomes tied to API shape and slows model evolution. +- Goal: + - Keep service returns domain-centered; map to API models at router boundary. +- What to do: + - Introduce service DTO/domain objects. + - Map to response models in routers or dedicated mappers. +- Possible traps and issues: + - Temporary duplicate model definitions during migration. +- Docs changes needed: + - Add layer responsibilities and mapping policy. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) -### Doc references -- [Features.md](Features.md) — authentication -- [Backend-Development.md](Backend-Development.md) — auth router design -- [Web-Development.md](Web-Development.md) — AuthProvider \ No newline at end of file +--- +## 8) Inconsistent modeling style (TypedDict vs Pydantic) +- Where found: + - [backend/app/services/jail_service.py](backend/app/services/jail_service.py) + - [backend/app/models](backend/app/models) +- Why this is needed: + - Mixed validation/serialization behavior increases maintenance cost. +- Goal: + - Standardize model type usage by layer. +- What to do: + - Define when TypedDict is allowed. + - Convert external-facing structures to Pydantic consistently. +- Possible traps and issues: + - Performance and strictness differences may alter runtime behavior. +- Docs changes needed: + - Add modeling conventions section. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + +--- +## 9) Repository protocol coverage is incomplete +- Where found: + - [backend/app/repositories/protocols.py](backend/app/repositories/protocols.py) + - [backend/app/repositories](backend/app/repositories) +- Why this is needed: + - Missing protocols reduce mockability and static contract checks. +- Goal: + - Full protocol coverage for repository interfaces. +- What to do: + - Add protocol definitions for missing repositories. + - Validate implementation compatibility in tests/CI. +- Possible traps and issues: + - Protocol drift if methods evolve without synchronized updates. +- Docs changes needed: + - Add repository protocol checklist. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + +--- +## 10) Startup sequence depends on implicit ordering +- Where found: + - [backend/app/startup.py](backend/app/startup.py) +- Why this is needed: + - Hidden order dependencies make lifecycle changes risky. +- Goal: + - Explicitly define resource/task startup graph and required prerequisites. +- What to do: + - Document dependency graph. + - Add startup assertions and health checks for each stage. +- Possible traps and issues: + - Partial startup failures may leave stale scheduler or open sessions. +- Docs changes needed: + - Add startup DAG and failure-mode behavior. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + +--- +## 11) Logging semantics are inconsistent across backend modules +- Where found: + - [backend/app/services](backend/app/services) + - [backend/app/tasks](backend/app/tasks) +- Why this is needed: + - Uneven level usage reduces observability quality. +- Goal: + - Standardize logging levels and event naming. +- What to do: + - Define logging conventions. + - Align service/task logging with that convention. +- Possible traps and issues: + - Excessive log volume if levels are set too low globally. +- Docs changes needed: + - Add logging policy and examples. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + +--- +## 12) Prop drilling in jail overview page +- Where found: + - [frontend/src/pages/jails/JailOverviewSection.tsx](frontend/src/pages/jails/JailOverviewSection.tsx) + - [frontend/src/pages/JailsPage.tsx](frontend/src/pages/JailsPage.tsx) +- Why this is needed: + - Large prop chains increase coupling and refactor cost. +- Goal: + - Move jail state/actions into dedicated context or controller hook. +- What to do: + - Introduce JailContext (or equivalent local state container). + - Remove multi-hop prop forwarding. +- Possible traps and issues: + - Context overuse can trigger broad rerenders if not split. +- Docs changes needed: + - Add frontend state ownership notes. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 13) Config page is over-centralized +- Where found: + - [frontend/src/pages/ConfigPage.tsx](frontend/src/pages/ConfigPage.tsx) +- Why this is needed: + - Tab orchestration and UI concerns are too concentrated. +- Goal: + - Decompose page into focused route/tab controllers. +- What to do: + - Split tab state/routing logic from rendering components. + - Extract domain-specific subcontainers. +- Possible traps and issues: + - Shared state sync across tabs can regress. +- Docs changes needed: + - Add config page composition map. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 14) Error boundary granularity is too coarse +- Where found: + - [frontend/src/App.tsx](frontend/src/App.tsx) + - [frontend/src/components/ErrorBoundary.tsx](frontend/src/components/ErrorBoundary.tsx) +- Why this is needed: + - Single top boundary causes full app fallback on local component failures. +- Goal: + - Add page-level and section-level boundaries. +- What to do: + - Wrap risky pages/widgets with local boundaries. + - Preserve shell/navigation when a section fails. +- Possible traps and issues: + - Too many boundaries can complicate fallback UX consistency. +- Docs changes needed: + - Add frontend resilience/fallback strategy. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + - https://react.dev/reference/react/Component#catching-rendering-errors-with-an-error-boundary + +--- +## 15) Fragmented async error UX handling in components +- Where found: + - [frontend/src/pages/jails/BanUnbanForm.tsx](frontend/src/pages/jails/BanUnbanForm.tsx) + - [frontend/src/components](frontend/src/components) +- Why this is needed: + - Localized ad-hoc error handling leads to inconsistent user feedback. +- Goal: + - Centralized error reporting + consistent UI feedback channels. +- What to do: + - Introduce notification/error service. + - Standardize form operation error patterns. +- Possible traps and issues: + - Duplicate messaging if local and global handlers fire together. +- Docs changes needed: + - Add frontend error handling guideline. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 16) API usage pattern is inconsistent across components/hooks +- Where found: + - [frontend/src/pages/JailsPage.tsx](frontend/src/pages/JailsPage.tsx) + - [frontend/src/pages/jails/BanUnbanForm.tsx](frontend/src/pages/jails/BanUnbanForm.tsx) + - [frontend/src/hooks](frontend/src/hooks) +- Why this is needed: + - Data access logic is spread, reducing composability and testability. +- Goal: + - Use a consistent hook/service boundary for API calls. +- What to do: + - Define data-fetching conventions and keep component files UI-focused. +- Possible traps and issues: + - Over-abstracting can obscure simple endpoint interactions. +- Docs changes needed: + - Add API usage layering section. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 17) Weak typed error contracts in generic hooks +- Where found: + - [frontend/src/hooks/useListData.ts](frontend/src/hooks/useListData.ts) + - [frontend/src/hooks/usePolledData.ts](frontend/src/hooks/usePolledData.ts) +- Why this is needed: + - Unknown-only error handling weakens actionable UX and diagnostics. +- Goal: + - Standardize API error typing and hook-level error model. +- What to do: + - Introduce discriminated error payloads. + - Map ApiError and network errors consistently. +- Possible traps and issues: + - Type expansion can touch many call sites. +- Docs changes needed: + - Add typed error model examples. +- Doc references: + - [frontend/src/api/client.ts](frontend/src/api/client.ts) + +--- +## 18) Duplicate polling/list loading behavior across hooks +- Where found: + - [frontend/src/hooks/useListData.ts](frontend/src/hooks/useListData.ts) + - [frontend/src/hooks/usePolledData.ts](frontend/src/hooks/usePolledData.ts) +- Why this is needed: + - Duplication multiplies maintenance bugs. +- Goal: + - Share a composable base for fetch lifecycle, cancellation, and polling. +- What to do: + - Extract shared primitives and keep hook-specific selectors minimal. +- Possible traps and issues: + - Generic abstractions can become too complex. +- Docs changes needed: + - Add hook architecture overview. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 19) Provider dependency chain is implicit +- Where found: + - [frontend/src/App.tsx](frontend/src/App.tsx) +- Why this is needed: + - Order-sensitive providers can fail silently during future refactors. +- Goal: + - Make provider dependency order explicit and tested. +- What to do: + - Document provider order rationale. + - Add provider composition tests. +- Possible traps and issues: + - Hidden assumptions in auth/timezone/theme initialization. +- Docs changes needed: + - Add provider order contract. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 20) Loading UX lacks progressive/skeleton states +- Where found: + - [frontend/src/pages](frontend/src/pages) +- Why this is needed: + - Blank loading regions reduce perceived responsiveness. +- Goal: + - Introduce standardized loading placeholders. +- What to do: + - Build shared skeleton components for tables/charts/forms. +- Possible traps and issues: + - Skeleton mismatch with actual layout can cause content shift. +- Docs changes needed: + - Add loading UX component guidance. +- Doc references: + - [Docs/Web-Design.md](Docs/Web-Design.md) + +--- +## 21) Silent auth error swallow in fetch error utility +- Where found: + - [frontend/src/utils/fetchError.ts](frontend/src/utils/fetchError.ts) +- Why this is needed: + - Silent return can drop actionable errors when no auth handler is registered. +- Goal: + - Ensure auth errors are handled deterministically with fallback logging/handling. +- What to do: + - Enforce handler registration or fallback behavior. +- Possible traps and issues: + - Duplicate redirect flows if multiple auth handlers exist. +- Docs changes needed: + - Add auth error handling contract for utilities. +- Doc references: + - [frontend/src/providers/AuthProvider.tsx](frontend/src/providers/AuthProvider.tsx) + +--- +## 22) Magic strings are scattered in frontend storage keys +- Where found: + - [frontend/src/providers/AuthProvider.tsx](frontend/src/providers/AuthProvider.tsx) + - [frontend/src/layouts/MainLayout.tsx](frontend/src/layouts/MainLayout.tsx) + - [frontend/src/providers/ThemeProvider.tsx](frontend/src/providers/ThemeProvider.tsx) +- Why this is needed: + - Repeated literals invite drift and typo regressions. +- Goal: + - Centralize user/session/local storage keys. +- What to do: + - Consolidate into a single constants module. +- Possible traps and issues: + - Existing tests may assume current literal values. +- Docs changes needed: + - Add storage key registry note. +- Doc references: + - [frontend/src/utils/constants.ts](frontend/src/utils/constants.ts) + +--- +## 23) No global cancellation policy on route transitions +- Where found: + - [frontend/src/hooks](frontend/src/hooks) +- Why this is needed: + - Many hooks cancel individually, but route-wide cancellation remains inconsistent. +- Goal: + - Provide a global request lifecycle cancellation mechanism. +- What to do: + - Introduce navigation-aware cancellation context/manager. +- Possible traps and issues: + - Over-cancel can break long-lived background fetches unintentionally. +- Docs changes needed: + - Add request lifecycle policy. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 24) API response wrapper shape is inconsistent +- Where found: + - [backend/app/routers/dashboard.py](backend/app/routers/dashboard.py) + - [backend/app/routers/jails.py](backend/app/routers/jails.py) + - [frontend/src/types](frontend/src/types) +- Why this is needed: + - Inconsistent payload envelopes increase frontend branching and integration bugs. +- Goal: + - Define and enforce a consistent response envelope policy. +- What to do: + - Standardize endpoint response forms. + - Align frontend typing and parsing strategy. +- Possible traps and issues: + - Breaking contract for existing clients. +- Docs changes needed: + - Add API response style guide. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + +--- +## 25) No canonical snake_case/camelCase serialization policy +- Where found: + - [backend/app/models/server.py](backend/app/models/server.py) + - [frontend/src/types/server.ts](frontend/src/types/server.ts) +- Why this is needed: + - Naming convention drift causes brittle frontend-backend contracts. +- Goal: + - Enforce one API field naming policy. +- What to do: + - Configure model aliasing strategy and update frontend contracts. +- Possible traps and issues: + - Partial migration can leave mixed payload formats. +- Docs changes needed: + - Add naming convention section for API fields. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + - https://docs.pydantic.dev/latest/concepts/alias/ + +--- +## 26) Pagination contract is not standardized across endpoints +- Where found: + - [backend/app/routers/dashboard.py](backend/app/routers/dashboard.py) + - [backend/app/routers/history.py](backend/app/routers/history.py) + - [backend/app/routers/blocklist.py](backend/app/routers/blocklist.py) +- Why this is needed: + - Different pagination patterns increase frontend complexity. +- Goal: + - Use one shared paginated response model. +- What to do: + - Introduce common pagination schema and query parameter policy. +- Possible traps and issues: + - Existing endpoint consumers may require transition period. +- Docs changes needed: + - Add pagination API spec. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + +--- +## 27) Error response body shape is inconsistent +- Where found: + - [backend/app/main.py](backend/app/main.py) + - [backend/app/routers](backend/app/routers) + - [frontend/src/api/client.ts](frontend/src/api/client.ts) +- Why this is needed: + - Frontend cannot reliably branch on machine-readable error codes. +- Goal: + - Standard error response schema with code + detail + metadata. +- What to do: + - Add shared error model and update handlers. +- Possible traps and issues: + - Legacy consumers parsing detail strings may break. +- Docs changes needed: + - Add backend error schema and mapping table. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + +--- +## 28) Login failure delay can enable app-layer DoS +- Where found: + - [backend/app/routers/auth.py](backend/app/routers/auth.py#L110) +- Why this is needed: + - Fixed 10-second await for invalid login attempts can amplify load impact. +- Goal: + - Keep brute-force resistance without exhausting request capacity. +- What to do: + - Replace fixed sleep with limiter-backed penalty strategy and concurrency protection. +- Possible traps and issues: + - Too little penalty weakens brute-force protection. +- Docs changes needed: + - Document authentication throttling strategy. +- Doc references: + - [backend/app/utils/rate_limiter.py](backend/app/utils/rate_limiter.py) + +--- +## 29) Blocklist URL validation has DNS-rebinding window +- Where found: + - [backend/app/utils/ip_utils.py](backend/app/utils/ip_utils.py#L145) + - [backend/app/services/blocklist_service.py](backend/app/services/blocklist_service.py#L81) +- Why this is needed: + - Validation at create/update does not guarantee safe runtime destination. +- Goal: + - Enforce SSRF safety at connect/request time as well. +- What to do: + - Add runtime destination/IP allow-deny validation in HTTP layer. +- Possible traps and issues: + - Resolver/cache behavior may vary across environments. +- Docs changes needed: + - Add blocklist network security notes and runtime checks. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + - https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html + +--- +## 30) Setup persistence is non-atomic across DB contexts +- Where found: + - [backend/app/services/setup_service.py](backend/app/services/setup_service.py) + - [backend/app/repositories/settings_repo.py](backend/app/repositories/settings_repo.py) +- Why this is needed: + - Partial commits during setup can leave inconsistent state. +- Goal: + - Make setup operations transactional and crash-safe. +- What to do: + - Introduce staged setup state and transaction boundaries. +- Possible traps and issues: + - SQLite transaction handling across multiple DB files is limited. +- Docs changes needed: + - Add setup state machine and rollback behavior. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + +--- +## 31) Fire-and-forget reschedule may fail silently +- Where found: + - [backend/app/tasks/blocklist_import.py](backend/app/tasks/blocklist_import.py#L108) +- Why this is needed: + - Schedule update requests can succeed while background reschedule fails. +- Goal: + - Make schedule updates deterministic and observable. +- What to do: + - Await reschedule path or persist task outcome status and surface errors. +- Possible traps and issues: + - Blocking request path might add latency if scheduler is busy. +- Docs changes needed: + - Document scheduling reliability guarantees. +- Doc references: + - [Docs/Features.md](Docs/Features.md) + +--- +## 32) RateLimiter cleanup function is not scheduled/used +- Where found: + - [backend/app/utils/rate_limiter.py](backend/app/utils/rate_limiter.py#L84) + - [backend/app/startup.py](backend/app/startup.py) +- Why this is needed: + - Rate limiter state can grow over long runtimes. +- Goal: + - Ensure periodic cleanup or bounded memory strategy. +- What to do: + - Add scheduled cleanup or auto-eviction structure. +- Possible traps and issues: + - Cleanup cadence too frequent can add overhead. +- Docs changes needed: + - Add operational notes for auth throttling lifecycle. +- Doc references: + - [backend/app/utils/rate_limiter.py](backend/app/utils/rate_limiter.py) + +--- +## 33) Trusted proxy configuration is hardcoded in auth router +- Where found: + - [backend/app/routers/auth.py](backend/app/routers/auth.py#L46) + - [backend/app/utils/client_ip.py](backend/app/utils/client_ip.py) +- Why this is needed: + - Incorrect client IP extraction can break per-IP rate limiting behind proxies. +- Goal: + - Move trusted proxies to validated runtime config. +- What to do: + - Add settings for trusted proxy IPs/CIDRs. + - Validate and use these in client IP extraction. +- Possible traps and issues: + - Over-trusting headers can enable spoofing. +- Docs changes needed: + - Add reverse-proxy deployment configuration section. +- Doc references: + - [Docs/Instructions.md](Docs/Instructions.md) + +--- +## 34) Setup redirect allowlist uses broad prefix matching +- Where found: + - [backend/app/main.py](backend/app/main.py#L434) +- Why this is needed: + - Prefix-based allow rules are fragile for future route additions. +- Goal: + - Use exact path or route-level allow policy. +- What to do: + - Replace startswith matching with explicit allowlist checks. +- Possible traps and issues: + - API docs and setup flow paths must remain reachable. +- Docs changes needed: + - Add setup guard route policy documentation. +- Doc references: + - [backend/app/main.py](backend/app/main.py) + +--- +## 35) API client sends JSON and CSRF header for every request method +- Where found: + - [frontend/src/api/client.ts](frontend/src/api/client.ts) +- Why this is needed: + - Extra headers on GET increase unnecessary CORS preflights and noise. +- Goal: + - Apply headers by method/body requirements. +- What to do: + - Only set Content-Type for requests with JSON body. + - Send CSRF header for mutating cookie-authenticated requests only. +- Possible traps and issues: + - CSRF protection assumptions must still hold for all mutating paths. +- Docs changes needed: + - Update frontend API client contract and CSRF notes. +- Doc references: + - [backend/app/middleware/csrf.py](backend/app/middleware/csrf.py) + +--- +## 36) Polling continues when tab is not visible +- Where found: + - [frontend/src/hooks/usePolledData.ts](frontend/src/hooks/usePolledData.ts#L90) + - [frontend/src/hooks/useBlocklistStatus.ts](frontend/src/hooks/useBlocklistStatus.ts) +- Why this is needed: + - Unnecessary backend load and client resource usage in background tabs. +- Goal: + - Pause/reduce polling when page is hidden. +- What to do: + - Add visibility-aware polling strategy and optional backoff. +- Possible traps and issues: + - Data may appear stale immediately after tab restore if refresh is delayed. +- Docs changes needed: + - Add frontend polling lifecycle policy. +- Doc references: + - [Docs/Web-Development.md](Docs/Web-Development.md) + +--- +## 37) Multi-worker safety check depends on one environment variable +- Where found: + - [backend/app/startup.py](backend/app/startup.py#L61) +- Why this is needed: + - Other process managers can still launch multiple workers without this variable. +- Goal: + - Enforce scheduler single-executor safety regardless of launcher. +- What to do: + - Add robust single-run lock/leader mechanism for scheduler ownership. +- Possible traps and issues: + - Locking strategy must be reliable in container orchestration. +- Docs changes needed: + - Expand deployment constraints and supported run modes. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + +--- +## 38) History archive query paths may need explicit indexing plan +- Where found: + - [backend/app/db.py](backend/app/db.py) + - [backend/app/repositories/history_archive_repo.py](backend/app/repositories/history_archive_repo.py) +- Why this is needed: + - Large archive datasets can degrade filter/sort performance. +- Goal: + - Add indexes aligned with real query patterns. +- What to do: + - Benchmark common history queries. + - Add migration with targeted indexes. +- Possible traps and issues: + - Extra indexes increase write cost and DB size. +- Docs changes needed: + - Add DB performance/indexing section for history. +- Doc references: + - [Docs/Backend-Development.md](Docs/Backend-Development.md) + - https://www.sqlite.org/queryplanner.html + +--- +## 39) No explicit DI container strategy for backend service graph +- Where found: + - [backend/app/dependencies.py](backend/app/dependencies.py) + - [backend/app/services](backend/app/services) +- Why this is needed: + - Dependency construction and lifecycle are partly implicit. +- Goal: + - Define a clear dependency wiring pattern for services and repositories. +- What to do: + - Create service composition root pattern and document usage. +- Possible traps and issues: + - Over-engineering if container abstraction is too heavy for current size. +- Docs changes needed: + - Add dependency wiring chapter. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + +--- +## 40) Frontend and backend observability are not aligned +- Where found: + - [backend/app/main.py](backend/app/main.py) + - [frontend/src](frontend/src) +- Why this is needed: + - Backend uses structured logging while frontend error telemetry is mostly local and ad-hoc. +- Goal: + - Define unified error telemetry and correlation approach. +- What to do: + - Introduce frontend error reporting pipeline and request correlation IDs. +- Possible traps and issues: + - PII/sensitive payload leakage risk in client-side telemetry. +- Docs changes needed: + - Add observability and privacy-safe logging guidelines. +- Doc references: + - [Docs/Architekture.md](Docs/Architekture.md) + - [Docs/Web-Development.md](Docs/Web-Development.md) diff --git a/Docs/runner.csx b/Docs/runner.csx index 02125c3..95c9d06 100644 --- a/Docs/runner.csx +++ b/Docs/runner.csx @@ -48,7 +48,7 @@ async Task RunCopilot(IEnumerable extraArgs, string prompt) { var output = new StringBuilder(); - var argList = new List { "--model", "claude-haiku-4.5", "--allow-all-tools" }; + var argList = new List { "--model", "auto", "--allow-all-tools" }; argList.AddRange(extraArgs); argList.Add("-p"); argList.Add(prompt); diff --git a/backend/app/services/ban_service.py b/backend/app/services/ban_service.py index bc4e7d2..1950185 100644 --- a/backend/app/services/ban_service.py +++ b/backend/app/services/ban_service.py @@ -15,6 +15,7 @@ import contextlib import ipaddress from typing import TYPE_CHECKING, cast +import aiohttp import structlog from app.exceptions import JailNotFoundError, JailOperationError @@ -59,7 +60,8 @@ from app.utils.fail2ban_response import ( from app.utils.time_utils import since_unix if TYPE_CHECKING: - import aiohttp + from collections.abc import Awaitable + import aiosqlite from app.models.geo import GeoCacheLookup, GeoEnricher, GeoInfo @@ -302,7 +304,7 @@ async def get_active_bans( all_ips: list[str] = [ban.ip for ban in bans] try: geo_map = await geo_cache.lookup_batch(all_ips, http_session, db=app_db) - except Exception: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, OSError): log.warning("active_bans_batch_geo_failed") geo_map = {} enriched: list[ActiveBan] = [] @@ -420,7 +422,7 @@ async def list_bans( page_ips: list[str] = [r.ip for r in rows] try: geo_map = await geo_cache.lookup_batch(page_ips, http_session, db=app_db) - except Exception: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, OSError): log.warning("ban_service_batch_geo_failed_list_bans") items: list[DashboardBanItem] = [] @@ -460,8 +462,10 @@ async def list_bans( country_name = geo.country_name asn = geo.asn org = geo.org - except Exception: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, OSError): log.warning("ban_service_geo_lookup_failed", ip=ip) + except Exception as exc: + log.error("ban_service_geo_lookup_unexpected_error", ip=ip, error=type(exc).__name__) items.append( DashboardBanItem( @@ -624,9 +628,12 @@ async def bans_by_country( async def _safe_lookup(ip: str) -> tuple[str, GeoInfo | None]: try: return ip, await geo_enricher(ip) - except Exception: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, OSError): log.warning("ban_service_geo_lookup_failed", ip=ip) return ip, None + except Exception as exc: + log.error("ban_service_geo_lookup_unexpected_error", ip=ip, error=type(exc).__name__) + return ip, None results = await asyncio.gather(*(_safe_lookup(ip) for ip in unique_ips)) geo_map = {ip: geo for ip, geo in results if geo is not None} diff --git a/backend/app/services/blocklist_service.py b/backend/app/services/blocklist_service.py index 80a619e..db6f7d3 100644 --- a/backend/app/services/blocklist_service.py +++ b/backend/app/services/blocklist_service.py @@ -21,7 +21,7 @@ from typing import TYPE_CHECKING import aiohttp import structlog -from app.exceptions import JailNotFoundError +from app.exceptions import JailNotFoundError, JailOperationError from app.models.blocklist import ( BlocklistSource, ImportLogEntry, @@ -39,7 +39,6 @@ from app.utils.ip_utils import is_valid_ip, is_valid_network if TYPE_CHECKING: from collections.abc import Awaitable, Callable - import aiohttp import aiosqlite from apscheduler.schedulers.asyncio import AsyncIOScheduler @@ -92,7 +91,7 @@ async def _download_text_with_retries( await asyncio.sleep(backoff) continue return resp.status, text - except Exception as exc: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError) as exc: last_exception = exc if attempt >= _BLOCKLIST_HTTP_RETRY_ATTEMPTS: raise @@ -105,6 +104,20 @@ async def _download_text_with_retries( backoff=backoff, ) await asyncio.sleep(backoff) + except Exception as exc: + last_exception = exc + if attempt >= _BLOCKLIST_HTTP_RETRY_ATTEMPTS: + raise + backoff = _BLOCKLIST_HTTP_BACKOFF_BASE_SECONDS * (2 ** (attempt - 1)) + log.warning( + "blocklist_download_retry_error", + url=url, + attempt=attempt, + error=repr(exc), + error_type="unexpected", + backoff=backoff, + ) + await asyncio.sleep(backoff) assert last_exception is not None raise last_exception @@ -281,8 +294,8 @@ async def preview_source( ) if status != 200: raise ValueError(f"HTTP {status} from {url}") - except Exception as exc: - log.warning("blocklist_preview_failed", url=url, error=str(exc)) + except (TimeoutError, aiohttp.ClientError, ValueError) as exc: + log.warning("blocklist_preview_failed", url=url, error=type(exc).__name__) raise ValueError(str(exc)) from exc lines = raw.splitlines() @@ -363,7 +376,7 @@ async def import_source( ips_skipped=0, error=error_msg, ) - except Exception as exc: + except (TimeoutError, aiohttp.ClientError) as exc: error_msg = str(exc) await _log_result(db, source, 0, 0, error_msg) log.warning("blocklist_import_download_error", url=source.url, error=error_msg) @@ -407,7 +420,7 @@ async def import_source( error=str(exc), ) break - except Exception as exc: + except JailOperationError as exc: skipped += 1 if ban_error is None: ban_error = str(exc) @@ -444,11 +457,10 @@ async def import_source( source_id=source.id, count=len(uncached_ips), ) - except Exception as exc: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, OSError): log.warning( "blocklist_geo_prewarm_failed", source_id=source.id, - error=str(exc), ) return ImportSourceResult( @@ -606,8 +618,8 @@ async def get_schedule(db: aiosqlite.Connection) -> ScheduleConfig: try: data = json.loads(raw) return ScheduleConfig.model_validate(data) - except Exception: - log.warning("blocklist_schedule_invalid", raw=raw) + except (json.JSONDecodeError, ValueError) as exc: + log.warning("blocklist_schedule_invalid", raw=raw, error=type(exc).__name__) return _DEFAULT_SCHEDULE diff --git a/backend/app/services/geo_cache.py b/backend/app/services/geo_cache.py index ffa3600..729cbc1 100644 --- a/backend/app/services/geo_cache.py +++ b/backend/app/services/geo_cache.py @@ -271,8 +271,8 @@ class GeoCache: return GeoInfo(country_code=code, country_name=name, asn=None, org=None) except geoip2.errors.AddressNotFoundError: return None - except Exception as exc: # noqa: BLE001 - log.warning("geoip_lookup_failed", ip=ip, error=str(exc)) + except (OSError, geoip2.errors.GeoIP2Error) as exc: + log.warning("geoip_lookup_failed", ip=ip, error=type(exc).__name__) return None async def load_cache_from_db(self, db: aiosqlite.Connection) -> None: @@ -386,8 +386,8 @@ class GeoCache: asn=result.asn, org=result.org, ) - except Exception as exc: # noqa: BLE001 - log.warning("geo_persist_failed", ip=ip, error=str(exc)) + except (OSError) as exc: + log.warning("geo_persist_failed", ip=ip, error=type(exc).__name__) log.debug("geo_lookup_success_mmdb", ip=ip, country=result.country_code) return result @@ -399,8 +399,8 @@ class GeoCache: if db is not None: try: await geo_cache_repo.upsert_neg_entry_and_commit(db=db, ip=ip) - except Exception as exc: # noqa: BLE001 - log.warning("geo_persist_neg_failed", ip=ip, error=str(exc)) + except (OSError) as exc: + log.warning("geo_persist_neg_failed", ip=ip, error=type(exc).__name__) return GeoInfo(country_code=None, country_name=None, asn=None, org=None) # HTTP API call (only when allow_http_fallback is True). @@ -426,8 +426,8 @@ class GeoCache: asn=result.asn, org=result.org, ) - except Exception as exc: # noqa: BLE001 - log.warning("geo_persist_failed", ip=ip, error=str(exc)) + except (OSError) as exc: + log.warning("geo_persist_failed", ip=ip, error=type(exc).__name__) log.debug("geo_lookup_success_http", ip=ip, country=result.country_code, asn=result.asn) return result log.debug( @@ -435,7 +435,7 @@ class GeoCache: ip=ip, message=data.get("message", "unknown"), ) - except Exception as exc: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, ValueError) as exc: log.warning( "geo_lookup_http_request_failed", ip=ip, @@ -565,11 +565,11 @@ class GeoCache: if db is not None and pos_rows: try: await geo_cache_repo.bulk_upsert_entries_and_commit(db, pos_rows) - except Exception as exc: # noqa: BLE001 + except (OSError) as exc: log.warning( "geo_batch_persist_mmdb_failed", count=len(pos_rows), - error=str(exc), + error=type(exc).__name__, ) # FALLBACK: Try HTTP API only if enabled and there are remaining IPs. @@ -584,11 +584,11 @@ class GeoCache: if db is not None and neg_ips: try: await geo_cache_repo.bulk_upsert_neg_entries_and_commit(db, neg_ips) - except Exception as exc: # noqa: BLE001 + except (OSError) as exc: log.warning( "geo_batch_persist_neg_failed", count=len(neg_ips), - error=str(exc), + error=type(exc).__name__, ) log.info( @@ -657,12 +657,12 @@ class GeoCache: pos_rows, neg_ips, ) - except Exception as exc: # noqa: BLE001 + except (OSError) as exc: log.warning( "geo_batch_persist_failed", positive_count=len(pos_rows), negative_count=len(neg_ips), - error=str(exc), + error=type(exc).__name__, ) pos_rows.clear() neg_ips.clear() @@ -704,7 +704,7 @@ class GeoCache: log.warning("geo_batch_non_200", status=resp.status, count=len(ips)) return fallback data: list[dict[str, object]] = await resp.json(content_type=None) - except Exception as exc: # noqa: BLE001 + except (TimeoutError, aiohttp.ClientError, ValueError) as exc: log.warning( "geo_batch_request_failed", count=len(ips), @@ -816,8 +816,8 @@ class GeoCache: try: await geo_cache_repo.bulk_upsert_entries_and_commit(db, rows) - except Exception as exc: # noqa: BLE001 - log.warning("geo_flush_dirty_failed", error=str(exc)) + except (OSError) as exc: + log.warning("geo_flush_dirty_failed", error=type(exc).__name__) # Re-add to dirty so they are retried on the next flush cycle. self._dirty.update(to_flush) return 0