Files
Aniworld/src/server/services/scheduler/scheduler_service.py
Lukas 9c3f03d610 refactor(scheduler): separate scheduler logic from scan/rescan logic
- Extract rescan logic into new RescanService (src/server/services/rescan_service.py)
- SchedulerService now only handles APScheduler cron scheduling
- Move scheduler sub-services (folder_rename, folder_scan, key_resolution) to scheduler/ folder
- Keep RescanOrchestrator as backward-compatible alias
- Update all imports across api/, server/, and test files
2026-06-03 20:58:30 +02:00

430 lines
15 KiB
Python

"""Scheduler service for automatic library rescans.
Uses APScheduler's AsyncIOScheduler with CronTrigger for precise
cron-based scheduling.
Jobs are held in memory (no separate scheduler database). On startup,
if the last scan timestamp indicates a missed run (server was down at the
scheduled cron time), a rescan is triggered immediately.
Actual rescan logic is delegated to RescanService.
"""
from __future__ import annotations
import logging
from datetime import datetime, timedelta, timezone
from typing import Optional
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from src.server.models.config import SchedulerConfig
from src.server.services.config_service import ConfigServiceError, get_config_service
logger = logging.getLogger(__name__)
_JOB_ID = "scheduled_rescan"
# Grace period for missed jobs (1 hour — handles server downtime between
# scheduled time and startup).
_MISFIRE_GRACE_SECONDS = 3600
class SchedulerServiceError(Exception):
"""Service-level exception for scheduler operations."""
class SchedulerService:
"""Manages automatic library rescans on a cron-based schedule.
Uses APScheduler's AsyncIOScheduler so scheduling integrates cleanly
with the running asyncio event loop. Supports:
- Cron-based scheduling (time of day + days of week)
- Immediate manual trigger
- Live config reloading without app restart
Actual rescan/folder-scan/auto-download work is delegated to
RescanService.
"""
def __init__(self) -> None:
"""Initialise the scheduler service."""
self._is_running: bool = False
self._scheduler: Optional[AsyncIOScheduler] = None
self._config: Optional[SchedulerConfig] = None
self._scan_in_progress: bool = False
logger.info("SchedulerService initialised")
# ------------------------------------------------------------------
# Public lifecycle methods
# ------------------------------------------------------------------
async def start(self) -> None:
"""Start the APScheduler with the configured cron trigger.
Raises:
SchedulerServiceError: If the scheduler is already running or
config cannot be loaded.
"""
logger.info("SchedulerService.start() called")
if self._is_running:
logger.warning("Scheduler start called but already running")
raise SchedulerServiceError("Scheduler is already running")
try:
config_service = get_config_service()
config = config_service.load_config()
self._config = config.scheduler
logger.info("Scheduler config loaded successfully")
except ConfigServiceError as exc:
logger.error("Failed to load scheduler configuration: %s", exc)
raise SchedulerServiceError(f"Failed to load config: {exc}") from exc
self._scheduler = AsyncIOScheduler()
if not self._config.enabled:
logger.info("Scheduler is disabled in configuration — not adding jobs")
self._is_running = True
return
logger.info(
"Scheduler config loaded: enabled=%s time=%s days=%s auto_download=%s folder_scan=%s",
self._config.enabled,
self._config.schedule_time,
self._config.schedule_days,
self._config.auto_download_after_rescan,
self._config.folder_scan_enabled,
)
trigger = self._build_cron_trigger()
if trigger is None:
logger.warning(
"schedule_days is empty — scheduler started but no job scheduled"
)
else:
self._scheduler.add_job(
_run_rescan_job,
trigger=trigger,
id=_JOB_ID,
replace_existing=True,
misfire_grace_time=_MISFIRE_GRACE_SECONDS,
coalesce=True,
)
logger.info(
"Scheduler started with cron trigger: time=%s days=%s",
self._config.schedule_time,
self._config.schedule_days,
)
self._scheduler.start()
self._is_running = True
# Log next scheduled run for visibility.
job = self._scheduler.get_job(_JOB_ID)
if job:
next_run = job.next_run_time
logger.info(
"Scheduler next run: %s",
next_run.isoformat() if next_run else None,
)
# Startup misfire recovery: check if the last scan was missed while
# the server was down.
await self._check_missed_run()
async def stop(self) -> None:
"""Stop the APScheduler gracefully."""
logger.info("SchedulerService.stop() called")
if not self._is_running:
logger.debug("Scheduler stop called but not running")
return
if self._scheduler and self._scheduler.running:
self._scheduler.shutdown(wait=False)
logger.info("Scheduler stopped")
else:
logger.info("Scheduler stop: scheduler was not running")
self._is_running = False
logger.info("SchedulerService stopped successfully")
async def ensure_started(self) -> None:
"""Ensure the scheduler is running (idempotent).
If already running, returns immediately. Otherwise, starts the scheduler.
This method is safe to call multiple times and from multiple callers.
Raises:
SchedulerServiceError: If startup fails (except for already running).
"""
if self._is_running:
logger.debug("Scheduler ensure_started called but already running")
return
logger.info("Scheduler ensure_started: starting scheduler")
await self.start()
async def trigger_rescan(self) -> bool:
"""Manually trigger a library rescan.
Returns:
True if rescan was started; False if a scan is already running.
Raises:
SchedulerServiceError: If the scheduler service is not started.
"""
if not self._is_running:
raise SchedulerServiceError("Scheduler is not running")
if self._scan_in_progress:
logger.warning("Cannot trigger rescan: scan already in progress")
return False
logger.info("Manual rescan triggered")
await self._perform_rescan()
return True
def reload_config(self, config: SchedulerConfig) -> None:
"""Apply a new SchedulerConfig immediately.
If the scheduler is already running the job is rescheduled (or
removed) without stopping the scheduler.
Args:
config: New scheduler configuration to apply.
"""
self._config = config
logger.info(
"Scheduler config reloaded: enabled=%s time=%s days=%s auto_download=%s folder_scan=%s",
config.enabled,
config.schedule_time,
config.schedule_days,
config.auto_download_after_rescan,
config.folder_scan_enabled,
)
if not self._scheduler or not self._scheduler.running:
return
if not config.enabled:
if self._scheduler.get_job(_JOB_ID):
self._scheduler.remove_job(_JOB_ID)
logger.info("Scheduler job removed (disabled)")
return
trigger = self._build_cron_trigger()
if trigger is None:
if self._scheduler.get_job(_JOB_ID):
self._scheduler.remove_job(_JOB_ID)
logger.warning("Scheduler job removed — schedule_days is empty")
else:
if self._scheduler.get_job(_JOB_ID):
self._scheduler.reschedule_job(_JOB_ID, trigger=trigger)
logger.info(
"Scheduler rescheduled with cron trigger: time=%s days=%s",
config.schedule_time,
config.schedule_days,
)
else:
self._scheduler.add_job(
_run_rescan_job,
trigger=trigger,
id=_JOB_ID,
replace_existing=True,
misfire_grace_time=_MISFIRE_GRACE_SECONDS,
coalesce=True,
)
logger.info(
"Scheduler job added with cron trigger: time=%s days=%s",
config.schedule_time,
config.schedule_days,
)
def get_status(self) -> dict:
"""Return current scheduler status including cron configuration.
Returns:
Dict containing scheduler state and config fields.
"""
from src.server.services.rescan_service import get_rescan_service
rescan_service = get_rescan_service()
next_run: Optional[str] = None
if self._scheduler and self._scheduler.running:
job = self._scheduler.get_job(_JOB_ID)
if job and job.next_run_time:
next_run = job.next_run_time.isoformat()
return {
"is_running": self._is_running,
"enabled": self._config.enabled if self._config else False,
"interval_minutes": self._config.interval_minutes if self._config else None,
"schedule_time": self._config.schedule_time if self._config else None,
"schedule_days": self._config.schedule_days if self._config else [],
"auto_download_after_rescan": (
self._config.auto_download_after_rescan if self._config else False
),
"folder_scan_enabled": (
self._config.folder_scan_enabled if self._config else False
),
"last_run": (
rescan_service.last_scan_time.isoformat()
if rescan_service.last_scan_time
else None
),
"next_run": next_run,
"scan_in_progress": self._scan_in_progress,
}
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _build_cron_trigger(self) -> Optional[CronTrigger]:
"""Convert config fields into an APScheduler CronTrigger.
Returns:
CronTrigger instance or None if schedule_days is empty.
"""
if not self._config or not self._config.schedule_days:
return None
hour_str, minute_str = self._config.schedule_time.split(":")
day_of_week = ",".join(self._config.schedule_days)
trigger = CronTrigger(
hour=int(hour_str),
minute=int(minute_str),
day_of_week=day_of_week,
)
logger.debug(
"CronTrigger built: hour=%s minute=%s day_of_week=%s",
hour_str,
minute_str,
day_of_week,
)
return trigger
async def _check_missed_run(self) -> None:
"""Check if a scheduled rescan was missed while the server was down.
Compares system_settings.last_scan_timestamp against the expected
schedule. If the last scan is overdue (more than 24h ago for a daily
schedule) but within the grace period, triggers an immediate rescan.
"""
if not self._config or not self._config.enabled:
return
if not self._config.schedule_days:
return
try:
from src.server.database.connection import get_db_session
from src.server.database.system_settings_service import (
SystemSettingsService,
)
async with get_db_session() as db:
settings = await SystemSettingsService.get_or_create(db)
last_scan = settings.last_scan_timestamp
if last_scan is None:
# Never scanned before — trigger immediately
logger.info("No previous scan recorded — triggering immediate rescan")
await self._perform_rescan()
return
# Ensure timezone-aware comparison
if last_scan.tzinfo is None:
last_scan = last_scan.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
elapsed = now - last_scan
# If last scan was more than 24h + grace period ago, don't trigger
# (avoids surprise rescans after long downtime).
max_overdue = timedelta(hours=24, seconds=_MISFIRE_GRACE_SECONDS)
if elapsed > max_overdue:
logger.info(
"Last scan was %s ago (> %s) — skipping missed-run recovery",
elapsed,
max_overdue,
)
return
# Check if a run should have happened between last_scan and now.
if elapsed > timedelta(hours=23):
logger.info(
"Missed scheduled rescan detected (last scan %s ago) — triggering now",
elapsed,
)
await self._perform_rescan()
except Exception as exc: # pylint: disable=broad-exception-caught
logger.warning("Missed-run check failed (non-fatal): %s", exc)
async def _perform_rescan(self) -> None:
"""Execute a library rescan via RescanService."""
from src.server.services.rescan_service import get_rescan_service
logger.info(
"Scheduler _perform_rescan entered: scan_in_progress=%s",
self._scan_in_progress,
)
if self._scan_in_progress:
logger.warning("Skipping rescan: previous scan still in progress")
return
self._scan_in_progress = True
try:
rescan_service = get_rescan_service(config=self._config)
await rescan_service.execute()
finally:
self._scan_in_progress = False
logger.info("Scheduled rescan finished: scan_in_progress reset to False")
# ---------------------------------------------------------------------------
# Module-level job runner
#
# APScheduler cannot serialize bound methods (SchedulerService instance
# contains a reference to the scheduler itself, creating a circular pickle
# error). Using a module-level function avoids this.
# ---------------------------------------------------------------------------
async def _run_rescan_job() -> None:
"""Module-level job entry point — delegates to the current service."""
logger.info("=" * 60)
logger.info("APScheduler triggered _run_rescan_job")
logger.info("Getting scheduler service singleton...")
svc = get_scheduler_service()
logger.info("Scheduler service obtained, calling _perform_rescan()")
await svc._perform_rescan()
logger.info("_run_rescan_job completed")
logger.info("=" * 60)
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------
_scheduler_service: Optional[SchedulerService] = None
def get_scheduler_service() -> SchedulerService:
"""Return the singleton SchedulerService instance."""
global _scheduler_service
if _scheduler_service is None:
logger.info("Creating new SchedulerService singleton")
_scheduler_service = SchedulerService()
else:
logger.debug("Returning existing SchedulerService singleton")
return _scheduler_service
def reset_scheduler_service() -> None:
"""Reset the singleton (used in tests)."""
global _scheduler_service
_scheduler_service = None