feat(NFO): add TMDB search fallback with alt_titles support

- New _search_with_fallback() method tries multiple strategies:
  1. Primary query with year filter (de-DE locale)
  2. Alternative titles with ja-JP / en-US locales
  3. English search (en-US)
  4. Search without year constraint
  5. Punctuation-normalized query
- create_nfo() accepts new alt_titles param for Japanese/title fallback
- Better match rate for anime with non-English titles

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
2026-05-23 21:57:00 +02:00
parent 3f7651404d
commit 9a20541598
7 changed files with 588 additions and 43 deletions

View File

@@ -26,6 +26,8 @@ class HealthStatus(BaseModel):
service: str = "aniworld-api"
series_app_initialized: bool = False
anime_directory_configured: bool = False
scheduler_next_run: Optional[str] = None
scheduler_last_run: Optional[str] = None
class DatabaseHealth(BaseModel):
@@ -177,13 +179,22 @@ async def basic_health_check() -> HealthStatus:
This endpoint does not depend on anime_directory configuration
and should always return 200 OK for basic health monitoring.
Includes service information for identification.
Includes scheduler next/last run times for monitoring tools.
Returns:
HealthStatus: Simple health status with timestamp and service info.
"""
from src.config.settings import settings
from src.server.utils.dependencies import _series_app
# Get scheduler status for health monitoring
scheduler_status: dict = {}
try:
from src.server.services.scheduler_service import get_scheduler_service
scheduler_status = get_scheduler_service().get_status()
except Exception:
pass
logger.debug("Basic health check requested")
return HealthStatus(
status="healthy",
@@ -191,6 +202,8 @@ async def basic_health_check() -> HealthStatus:
service="aniworld-api",
series_app_initialized=_series_app is not None,
anime_directory_configured=bool(settings.anime_directory),
scheduler_next_run=scheduler_status.get("next_run"),
scheduler_last_run=scheduler_status.get("last_run"),
)

View File

@@ -3,6 +3,10 @@
Uses APScheduler's AsyncIOScheduler with CronTrigger for precise
cron-based scheduling. The legacy interval-based loop has been removed
in favour of the cron approach.
Jobs are persisted to a SQLite database so they survive process restarts.
On startup, if the last scheduled run was missed (server was down at the
cron time), the job is triggered immediately within a grace period.
"""
from __future__ import annotations
@@ -10,6 +14,7 @@ from datetime import datetime, timezone
from typing import List, Optional
import structlog
from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
@@ -20,6 +25,10 @@ logger = structlog.get_logger(__name__)
_JOB_ID = "scheduled_rescan"
# Grace period for missed jobs (1 hour — handles server downtime between
# scheduled time and startup).
_MISFIRE_GRACE_SECONDS = 3600
class SchedulerServiceError(Exception):
"""Service-level exception for scheduler operations."""
@@ -71,7 +80,10 @@ class SchedulerService:
logger.error("Failed to load scheduler configuration", error=str(exc))
raise SchedulerServiceError(f"Failed to load config: {exc}") from exc
self._scheduler = AsyncIOScheduler()
jobstores = {
"default": SQLAlchemyJobStore(url="sqlite:///./data/scheduler.db"),
}
self._scheduler = AsyncIOScheduler(jobstores=jobstores)
if not self._config.enabled:
logger.info("Scheduler is disabled in configuration — not adding jobs")
@@ -85,11 +97,12 @@ class SchedulerService:
)
else:
self._scheduler.add_job(
self._perform_rescan,
_run_rescan_job,
trigger=trigger,
id=_JOB_ID,
replace_existing=True,
misfire_grace_time=300,
misfire_grace_time=_MISFIRE_GRACE_SECONDS,
coalesce=True,
)
logger.info(
"Scheduler started with cron trigger",
@@ -100,6 +113,16 @@ class SchedulerService:
self._scheduler.start()
self._is_running = True
# Startup recovery: if the server was down at the scheduled time and
# the job is within the misfire window, APScheduler will run it
# automatically. Log the scheduled time for visibility.
job = self._scheduler.get_job(_JOB_ID)
if job and job.next_run_time:
logger.info(
"Scheduler next run",
next_run=job.next_run_time.isoformat(),
)
async def stop(self) -> None:
"""Stop the APScheduler gracefully."""
if not self._is_running:
@@ -175,11 +198,12 @@ class SchedulerService:
)
else:
self._scheduler.add_job(
self._perform_rescan,
_run_rescan_job,
trigger=trigger,
id=_JOB_ID,
replace_existing=True,
misfire_grace_time=300,
misfire_grace_time=_MISFIRE_GRACE_SECONDS,
coalesce=True,
)
logger.info(
"Scheduler job added with cron trigger",
@@ -409,6 +433,20 @@ class SchedulerService:
self._scan_in_progress = False
# ---------------------------------------------------------------------------
# Module-level job runner
#
# APScheduler cannot serialize bound methods (SchedulerService instance
# contains a reference to the scheduler itself, creating a circular pickle
# error). Using a module-level function avoids this.
# ---------------------------------------------------------------------------
async def _run_rescan_job() -> None:
"""Module-level job entry point — delegates to the current service."""
svc = get_scheduler_service()
await svc._perform_rescan()
# ---------------------------------------------------------------------------
# Module-level singleton
# ---------------------------------------------------------------------------