refactor(scheduler): drop separate scheduler.db in favour of MemoryJobStore

Scheduler used a separate SQLite file (scheduler.db) only to persist one
cron job. This was originally required because APScheduler's
SQLAlchemyJobStore is sync-only, creating an async/sync driver conflict
when accessing the same file.

The job is rebuilt from config.json on every startup regardless
(replace_existing=True), so the persisted state only served misfire
detection. Moved misfire detection into the app layer by querying
system_settings.last_scan_timestamp on startup: if the last scan is
>23h but <25h ago, an immediate rescan is triggered.

Change summary:
- Remove SQLAlchemyJobStore; use default MemoryJobStore instead
- Add _check_missed_run() that reads last_scan_timestamp from aniworld.db
- Update docs/DEVELOPMENT.md scheduler troubleshooting section
- Update the scheduler unit test that verified SQLAlchemyJobStore
This commit is contained in:
2026-05-27 22:09:18 +02:00
parent 7ded5a6e4d
commit bc87bee416
3 changed files with 95 additions and 41 deletions

View File

@@ -4,17 +4,16 @@ Uses APScheduler's AsyncIOScheduler with CronTrigger for precise
cron-based scheduling. The legacy interval-based loop has been removed
in favour of the cron approach.
Jobs are persisted to a SQLite database so they survive process restarts.
On startup, if the last scheduled run was missed (server was down at the
cron time), the job is triggered immediately within a grace period.
Jobs are held in memory (no separate scheduler database). On startup,
if the last scan timestamp indicates a missed run (server was down at the
scheduled cron time), a rescan is triggered immediately.
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from datetime import datetime, timedelta, timezone
from typing import List, Optional
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
@@ -83,10 +82,9 @@ class SchedulerService:
logger.error("Failed to load scheduler configuration: %s", exc)
raise SchedulerServiceError(f"Failed to load config: {exc}") from exc
jobstores = {
"default": SQLAlchemyJobStore(url="sqlite:///./data/scheduler.db"),
}
self._scheduler = AsyncIOScheduler(jobstores=jobstores)
# Use in-memory job store — no separate scheduler.db needed.
# Jobs are reconstructed from config on every startup.
self._scheduler = AsyncIOScheduler()
if not self._config.enabled:
logger.info("Scheduler is disabled in configuration — not adding jobs")
@@ -125,10 +123,7 @@ class SchedulerService:
self._scheduler.start()
self._is_running = True
# Startup recovery: if the server was down at the scheduled time and
# the job is within the misfire window, APScheduler will run it
# automatically. Log the scheduled time for visibility.
# Note: next_run_time is only available AFTER scheduler.start()
# Log next scheduled run for visibility.
job = self._scheduler.get_job(_JOB_ID)
if job:
next_run = job.next_run_time
@@ -137,6 +132,11 @@ class SchedulerService:
next_run.isoformat() if next_run else None,
)
# Startup misfire recovery: check if the last scan was missed while
# the server was down. If overdue by more than one interval but within
# the grace period, trigger an immediate rescan.
await self._check_missed_run()
async def stop(self) -> None:
"""Stop the APScheduler gracefully."""
logger.info("SchedulerService.stop() called")
@@ -303,6 +303,67 @@ class SchedulerService:
)
return trigger
async def _check_missed_run(self) -> None:
"""Check if a scheduled rescan was missed while the server was down.
Compares system_settings.last_scan_timestamp against the expected
schedule. If the last scan is overdue (more than 24h ago for a daily
schedule) but within the grace period, triggers an immediate rescan.
"""
if not self._config or not self._config.enabled:
return
if not self._config.schedule_days:
return
try:
from src.server.database.connection import ( # noqa: PLC0415
get_db_session,
)
from src.server.database.system_settings_service import ( # noqa: PLC0415
SystemSettingsService,
)
async with get_db_session() as db:
settings = await SystemSettingsService.get_or_create(db)
last_scan = settings.last_scan_timestamp
if last_scan is None:
# Never scanned before — trigger immediately
logger.info("No previous scan recorded — triggering immediate rescan")
await self._perform_rescan()
return
# Ensure timezone-aware comparison
if last_scan.tzinfo is None:
last_scan = last_scan.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
elapsed = now - last_scan
# If last scan was more than 24h + grace period ago, don't trigger
# (avoids surprise rescans after long downtime).
max_overdue = timedelta(hours=24, seconds=_MISFIRE_GRACE_SECONDS)
# If last scan was more than ~25h ago, skip (too stale)
if elapsed > max_overdue:
logger.info(
"Last scan was %s ago (> %s) — skipping missed-run recovery",
elapsed,
max_overdue,
)
return
# Check if a run should have happened between last_scan and now.
# Simple heuristic: if elapsed > 24h, we missed at least one daily run.
if elapsed > timedelta(hours=23):
logger.info(
"Missed scheduled rescan detected (last scan %s ago) — triggering now",
elapsed,
)
await self._perform_rescan()
except Exception as exc: # pylint: disable=broad-exception-caught
logger.warning("Missed-run check failed (non-fatal): %s", exc)
async def _broadcast(self, event_type: str, data: dict) -> None:
"""Broadcast a WebSocket event to all connected clients."""
try: