Add year extraction from folder names for existing series
- New migration script: populate year from folder (YYYY) pattern - SerieScanner: refactor year extraction logic - anime_service: pass year when syncing from data files
This commit is contained in:
135
scripts/migrate_populate_year_from_folder.py
Normal file
135
scripts/migrate_populate_year_from_folder.py
Normal file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Migration script to populate year for existing series from folder names.
|
||||
|
||||
This script:
|
||||
1. Finds all series in the database with year=NULL
|
||||
2. Extracts year from their folder names using the same pattern as SerieScanner
|
||||
3. Updates the database records
|
||||
|
||||
Usage:
|
||||
python scripts/migrate_populate_year_from_folder.py [--dry-run]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from sqlalchemy import select, update
|
||||
from src.server.database.models import AnimeSeries
|
||||
from src.server.database.service import DatabaseSession
|
||||
|
||||
|
||||
def extract_year_from_folder_name(folder_name: str) -> int | None:
|
||||
"""Extract year from folder name if present.
|
||||
|
||||
Same logic as SerieScanner._extract_year_from_folder_name.
|
||||
|
||||
Args:
|
||||
folder_name: The folder name to check
|
||||
|
||||
Returns:
|
||||
int or None: Year if found, None otherwise
|
||||
"""
|
||||
if not folder_name:
|
||||
return None
|
||||
|
||||
# Look for year in format (YYYY) - typically at end of name
|
||||
match = re.search(r'\((\d{4})\)', folder_name)
|
||||
if match:
|
||||
try:
|
||||
year = int(match.group(1))
|
||||
# Validate year is reasonable (between 1900 and 2100)
|
||||
if 1900 <= year <= 2100:
|
||||
return year
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
async def migrate_year_from_folder(dry_run: bool = True) -> tuple[int, int]:
|
||||
"""Migrate year field for existing series.
|
||||
|
||||
Args:
|
||||
dry_run: If True, only report what would be changed
|
||||
|
||||
Returns:
|
||||
Tuple of (updated_count, skipped_count)
|
||||
"""
|
||||
updated_count = 0
|
||||
skipped_count = 0
|
||||
|
||||
async with DatabaseSession() as db:
|
||||
# Find all series with NULL year
|
||||
result = await db.execute(
|
||||
select(AnimeSeries).where(AnimeSeries.year.is_(None))
|
||||
)
|
||||
series_list = result.scalars().all()
|
||||
|
||||
print(f"Found {len(series_list)} series with year=NULL")
|
||||
|
||||
for series in series_list:
|
||||
year_from_folder = extract_year_from_folder_name(series.folder)
|
||||
|
||||
if year_from_folder:
|
||||
print(f" {series.folder} -> {year_from_folder}")
|
||||
|
||||
if not dry_run:
|
||||
await db.execute(
|
||||
update(AnimeSeries)
|
||||
.where(AnimeSeries.id == series.id)
|
||||
.values(year=year_from_folder)
|
||||
)
|
||||
|
||||
updated_count += 1
|
||||
else:
|
||||
print(f" {series.folder} -> (no year found)")
|
||||
skipped_count += 1
|
||||
|
||||
return updated_count, skipped_count
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Migrate year from folder name")
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
default=True,
|
||||
help="Show what would be changed without making changes"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--execute",
|
||||
action="store_true",
|
||||
help="Actually execute the migration (disabled by default)"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
dry_run = not args.execute
|
||||
|
||||
if dry_run:
|
||||
print("=== DRY RUN MODE ===")
|
||||
print("No changes will be made. Use --execute to apply changes.\n")
|
||||
|
||||
import asyncio
|
||||
|
||||
try:
|
||||
updated, skipped = asyncio.run(migrate_year_from_folder(dry_run=dry_run))
|
||||
|
||||
print(f"\n{'Would update' if dry_run else 'Updated'}: {updated} series")
|
||||
print(f"Skipped (no year in folder): {skipped} series")
|
||||
|
||||
if dry_run:
|
||||
print("\nRun with --execute to apply these changes.")
|
||||
|
||||
return 0
|
||||
except Exception as e:
|
||||
print(f"Error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -496,7 +496,8 @@ class SerieScanner:
|
||||
folder_name,
|
||||
key
|
||||
)
|
||||
return Serie(key, "", "aniworld.to", folder_name, dict())
|
||||
year_from_folder = self._extract_year_from_folder_name(folder_name)
|
||||
return Serie(key, "", "aniworld.to", folder_name, dict(), year=year_from_folder)
|
||||
|
||||
if os.path.exists(serie_file):
|
||||
with open(serie_file, "rb") as file:
|
||||
|
||||
@@ -1550,6 +1550,7 @@ async def sync_series_from_data_files(
|
||||
name=serie.name,
|
||||
site=serie.site,
|
||||
folder=serie.folder,
|
||||
year=serie.year if hasattr(serie, 'year') else None,
|
||||
)
|
||||
|
||||
# Create Episode records for each episode in episodeDict
|
||||
|
||||
Reference in New Issue
Block a user