refactor(scheduler): drop separate scheduler.db in favour of MemoryJobStore
Scheduler used a separate SQLite file (scheduler.db) only to persist one cron job. This was originally required because APScheduler's SQLAlchemyJobStore is sync-only, creating an async/sync driver conflict when accessing the same file. The job is rebuilt from config.json on every startup regardless (replace_existing=True), so the persisted state only served misfire detection. Moved misfire detection into the app layer by querying system_settings.last_scan_timestamp on startup: if the last scan is >23h but <25h ago, an immediate rescan is triggered. Change summary: - Remove SQLAlchemyJobStore; use default MemoryJobStore instead - Add _check_missed_run() that reads last_scan_timestamp from aniworld.db - Update docs/DEVELOPMENT.md scheduler troubleshooting section - Update the scheduler unit test that verified SQLAlchemyJobStore
This commit is contained in:
@@ -4,17 +4,16 @@ Uses APScheduler's AsyncIOScheduler with CronTrigger for precise
|
||||
cron-based scheduling. The legacy interval-based loop has been removed
|
||||
in favour of the cron approach.
|
||||
|
||||
Jobs are persisted to a SQLite database so they survive process restarts.
|
||||
On startup, if the last scheduled run was missed (server was down at the
|
||||
cron time), the job is triggered immediately within a grace period.
|
||||
Jobs are held in memory (no separate scheduler database). On startup,
|
||||
if the last scan timestamp indicates a missed run (server was down at the
|
||||
scheduled cron time), a rescan is triggered immediately.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import List, Optional
|
||||
|
||||
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
|
||||
@@ -83,10 +82,9 @@ class SchedulerService:
|
||||
logger.error("Failed to load scheduler configuration: %s", exc)
|
||||
raise SchedulerServiceError(f"Failed to load config: {exc}") from exc
|
||||
|
||||
jobstores = {
|
||||
"default": SQLAlchemyJobStore(url="sqlite:///./data/scheduler.db"),
|
||||
}
|
||||
self._scheduler = AsyncIOScheduler(jobstores=jobstores)
|
||||
# Use in-memory job store — no separate scheduler.db needed.
|
||||
# Jobs are reconstructed from config on every startup.
|
||||
self._scheduler = AsyncIOScheduler()
|
||||
|
||||
if not self._config.enabled:
|
||||
logger.info("Scheduler is disabled in configuration — not adding jobs")
|
||||
@@ -125,10 +123,7 @@ class SchedulerService:
|
||||
self._scheduler.start()
|
||||
self._is_running = True
|
||||
|
||||
# Startup recovery: if the server was down at the scheduled time and
|
||||
# the job is within the misfire window, APScheduler will run it
|
||||
# automatically. Log the scheduled time for visibility.
|
||||
# Note: next_run_time is only available AFTER scheduler.start()
|
||||
# Log next scheduled run for visibility.
|
||||
job = self._scheduler.get_job(_JOB_ID)
|
||||
if job:
|
||||
next_run = job.next_run_time
|
||||
@@ -137,6 +132,11 @@ class SchedulerService:
|
||||
next_run.isoformat() if next_run else None,
|
||||
)
|
||||
|
||||
# Startup misfire recovery: check if the last scan was missed while
|
||||
# the server was down. If overdue by more than one interval but within
|
||||
# the grace period, trigger an immediate rescan.
|
||||
await self._check_missed_run()
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the APScheduler gracefully."""
|
||||
logger.info("SchedulerService.stop() called")
|
||||
@@ -303,6 +303,67 @@ class SchedulerService:
|
||||
)
|
||||
return trigger
|
||||
|
||||
async def _check_missed_run(self) -> None:
|
||||
"""Check if a scheduled rescan was missed while the server was down.
|
||||
|
||||
Compares system_settings.last_scan_timestamp against the expected
|
||||
schedule. If the last scan is overdue (more than 24h ago for a daily
|
||||
schedule) but within the grace period, triggers an immediate rescan.
|
||||
"""
|
||||
if not self._config or not self._config.enabled:
|
||||
return
|
||||
if not self._config.schedule_days:
|
||||
return
|
||||
|
||||
try:
|
||||
from src.server.database.connection import ( # noqa: PLC0415
|
||||
get_db_session,
|
||||
)
|
||||
from src.server.database.system_settings_service import ( # noqa: PLC0415
|
||||
SystemSettingsService,
|
||||
)
|
||||
|
||||
async with get_db_session() as db:
|
||||
settings = await SystemSettingsService.get_or_create(db)
|
||||
last_scan = settings.last_scan_timestamp
|
||||
|
||||
if last_scan is None:
|
||||
# Never scanned before — trigger immediately
|
||||
logger.info("No previous scan recorded — triggering immediate rescan")
|
||||
await self._perform_rescan()
|
||||
return
|
||||
|
||||
# Ensure timezone-aware comparison
|
||||
if last_scan.tzinfo is None:
|
||||
last_scan = last_scan.replace(tzinfo=timezone.utc)
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
elapsed = now - last_scan
|
||||
|
||||
# If last scan was more than 24h + grace period ago, don't trigger
|
||||
# (avoids surprise rescans after long downtime).
|
||||
max_overdue = timedelta(hours=24, seconds=_MISFIRE_GRACE_SECONDS)
|
||||
# If last scan was more than ~25h ago, skip (too stale)
|
||||
if elapsed > max_overdue:
|
||||
logger.info(
|
||||
"Last scan was %s ago (> %s) — skipping missed-run recovery",
|
||||
elapsed,
|
||||
max_overdue,
|
||||
)
|
||||
return
|
||||
|
||||
# Check if a run should have happened between last_scan and now.
|
||||
# Simple heuristic: if elapsed > 24h, we missed at least one daily run.
|
||||
if elapsed > timedelta(hours=23):
|
||||
logger.info(
|
||||
"Missed scheduled rescan detected (last scan %s ago) — triggering now",
|
||||
elapsed,
|
||||
)
|
||||
await self._perform_rescan()
|
||||
|
||||
except Exception as exc: # pylint: disable=broad-exception-caught
|
||||
logger.warning("Missed-run check failed (non-fatal): %s", exc)
|
||||
|
||||
async def _broadcast(self, event_type: str, data: dict) -> None:
|
||||
"""Broadcast a WebSocket event to all connected clients."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user