Add year extraction from folder names for existing series

- New migration script: populate year from folder (YYYY) pattern
- SerieScanner: refactor year extraction logic
- anime_service: pass year when syncing from data files
This commit is contained in:
2026-05-25 15:30:28 +02:00
parent e2a373816a
commit d5e955a731
3 changed files with 943 additions and 806 deletions

View File

@@ -0,0 +1,135 @@
#!/usr/bin/env python3
"""Migration script to populate year for existing series from folder names.
This script:
1. Finds all series in the database with year=NULL
2. Extracts year from their folder names using the same pattern as SerieScanner
3. Updates the database records
Usage:
python scripts/migrate_populate_year_from_folder.py [--dry-run]
"""
import argparse
import re
import sys
from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from sqlalchemy import select, update
from src.server.database.models import AnimeSeries
from src.server.database.service import DatabaseSession
def extract_year_from_folder_name(folder_name: str) -> int | None:
"""Extract year from folder name if present.
Same logic as SerieScanner._extract_year_from_folder_name.
Args:
folder_name: The folder name to check
Returns:
int or None: Year if found, None otherwise
"""
if not folder_name:
return None
# Look for year in format (YYYY) - typically at end of name
match = re.search(r'\((\d{4})\)', folder_name)
if match:
try:
year = int(match.group(1))
# Validate year is reasonable (between 1900 and 2100)
if 1900 <= year <= 2100:
return year
except ValueError:
pass
return None
async def migrate_year_from_folder(dry_run: bool = True) -> tuple[int, int]:
"""Migrate year field for existing series.
Args:
dry_run: If True, only report what would be changed
Returns:
Tuple of (updated_count, skipped_count)
"""
updated_count = 0
skipped_count = 0
async with DatabaseSession() as db:
# Find all series with NULL year
result = await db.execute(
select(AnimeSeries).where(AnimeSeries.year.is_(None))
)
series_list = result.scalars().all()
print(f"Found {len(series_list)} series with year=NULL")
for series in series_list:
year_from_folder = extract_year_from_folder_name(series.folder)
if year_from_folder:
print(f" {series.folder} -> {year_from_folder}")
if not dry_run:
await db.execute(
update(AnimeSeries)
.where(AnimeSeries.id == series.id)
.values(year=year_from_folder)
)
updated_count += 1
else:
print(f" {series.folder} -> (no year found)")
skipped_count += 1
return updated_count, skipped_count
def main():
parser = argparse.ArgumentParser(description="Migrate year from folder name")
parser.add_argument(
"--dry-run",
action="store_true",
default=True,
help="Show what would be changed without making changes"
)
parser.add_argument(
"--execute",
action="store_true",
help="Actually execute the migration (disabled by default)"
)
args = parser.parse_args()
dry_run = not args.execute
if dry_run:
print("=== DRY RUN MODE ===")
print("No changes will be made. Use --execute to apply changes.\n")
import asyncio
try:
updated, skipped = asyncio.run(migrate_year_from_folder(dry_run=dry_run))
print(f"\n{'Would update' if dry_run else 'Updated'}: {updated} series")
print(f"Skipped (no year in folder): {skipped} series")
if dry_run:
print("\nRun with --execute to apply these changes.")
return 0
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -496,7 +496,8 @@ class SerieScanner:
folder_name,
key
)
return Serie(key, "", "aniworld.to", folder_name, dict())
year_from_folder = self._extract_year_from_folder_name(folder_name)
return Serie(key, "", "aniworld.to", folder_name, dict(), year=year_from_folder)
if os.path.exists(serie_file):
with open(serie_file, "rb") as file:

View File

@@ -1550,6 +1550,7 @@ async def sync_series_from_data_files(
name=serie.name,
site=serie.site,
folder=serie.folder,
year=serie.year if hasattr(serie, 'year') else None,
)
# Create Episode records for each episode in episodeDict