Files
BanGUI/backend/app/tasks/blocklist_import.py
Lukas 0133489920 Update observability docs and task utilities
- Add Observability.md documentation
- Standardize task logging with correlation_id support
- Add log_sanitizer utility for PII masking
- Update Tasks.md tracking
- Update geo_cache tasks and other task modules with correlation_id

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-05-03 11:52:09 +02:00

162 lines
5.6 KiB
Python

"""External blocklist import background task.
Registers an APScheduler job that downloads all enabled blocklist sources,
validates their entries, and applies bans via fail2ban on a configurable
schedule. The default schedule is daily at 03:00 UTC; it is stored in the
application :class:`~app.models.blocklist.ScheduleConfig` settings and can
be updated at runtime through the blocklist router.
The scheduler job ID is ``"blocklist_import"`` — using a stable id means
re-registering the job (e.g. after a schedule update) safely replaces the
existing entry without creating duplicates.
Correlation IDs are propagated through the task using
:mod:`app.utils.correlation` so that task logs can be correlated with the
triggering request (for manually triggered imports) or assigned a unique
ID per run (for scheduled imports).
"""
from __future__ import annotations
import uuid
from typing import TYPE_CHECKING, Any
import structlog
from app.services import ban_service, blocklist_service
from app.tasks.db import task_db
from app.tasks.timeout_utils import run_with_timeout
from app.utils.correlation import get_correlation_id, reset_correlation_id, set_correlation_id
from app.utils.runtime_state import get_effective_settings
if TYPE_CHECKING:
from aiohttp import ClientSession
from fastapi import FastAPI
from app.config import Settings
log: structlog.stdlib.BoundLogger = structlog.get_logger()
#: Stable APScheduler job id so the job can be replaced without duplicates.
JOB_ID: str = "blocklist_import"
#: Maximum seconds to allow for blocklist import task to complete.
TASK_TIMEOUT_SECONDS: int = 300
async def _run_import_with_resources(
settings: Settings,
http_session: ClientSession,
correlation_id: str | None = None,
) -> None:
"""APScheduler callback that imports all enabled blocklist sources.
Args:
settings: The resolved application settings used for database access.
http_session: The shared aiohttp session used for blocklist downloads.
correlation_id: Optional correlation ID from the triggering request.
If ``None``, a new UUID4 is generated to identify this run.
"""
if correlation_id is None:
correlation_id = str(uuid.uuid4())
token = set_correlation_id(correlation_id)
try:
await _do_import_with_settings(settings, http_session)
finally:
reset_correlation_id(token)
async def _do_import_with_settings(settings: Settings, http_session: ClientSession) -> None:
"""Inner import logic that runs with correlation context set."""
async def _do_import() -> None:
socket_path: str = settings.fail2ban_socket
log.info("blocklist_import_starting", correlation_id=get_correlation_id())
try:
async with task_db(settings) as db:
result = await blocklist_service.import_all(
db,
http_session,
socket_path,
ban_ip=ban_service.ban_ip,
)
log.info(
"blocklist_import_finished",
correlation_id=get_correlation_id(),
total_imported=result.total_imported,
total_skipped=result.total_skipped,
errors=result.errors_count,
)
except Exception:
log.exception("blocklist_import_unexpected_error", correlation_id=get_correlation_id())
await run_with_timeout("blocklist_import", _do_import(), TASK_TIMEOUT_SECONDS)
run_import_with_resources = _run_import_with_resources
async def _run_import(app: FastAPI) -> None:
await _run_import_with_resources(get_effective_settings(app), app.state.http_session)
async def register(app: FastAPI) -> None:
"""Add (or replace) the blocklist import job in the application scheduler.
Reads the persisted :class:`~app.models.blocklist.ScheduleConfig` from
the database and translates it into the appropriate APScheduler trigger.
Should be called inside the lifespan handler after the scheduler and
database have been initialised.
Args:
app: The :class:`fastapi.FastAPI` application instance whose
``app.state.scheduler`` will receive the job.
"""
settings = get_effective_settings(app)
async with task_db(settings) as db:
config = await blocklist_service.get_schedule(db)
_apply_schedule(app, config)
async def reschedule(app: FastAPI) -> None:
"""Re-register the blocklist import job with the latest schedule config.
Called by the blocklist router after a schedule update so changes take
effect immediately without a server restart. Failures are logged and
exceptions are propagated to the caller.
Args:
app: The :class:`fastapi.FastAPI` application instance.
Raises:
Exception: If retrieving the schedule or applying it fails.
"""
settings = get_effective_settings(app)
async with task_db(settings) as db:
config = await blocklist_service.get_schedule(db)
log.info("blocklist_reschedule_applying", frequency=config.frequency)
_apply_schedule(app, config)
log.info("blocklist_reschedule_applied")
def _apply_schedule(app: FastAPI, config: Any) -> None:
"""Add or replace the APScheduler cron/interval job for the given config.
Args:
app: FastAPI application instance.
config: :class:`~app.models.blocklist.ScheduleConfig` to apply.
"""
from app.services import blocklist_service
blocklist_service.schedule_blocklist_job(
app.state.scheduler,
get_effective_settings(app),
app.state.http_session,
config,
run_import_with_resources,
)