feat(download): persist retry state and dead-letter
Retry count and queue status were in-memory only and lost on restart, so failed downloads could not be safely resumed and permanently-failed episodes silently blocked re-queueing via the episode-id unique index. - Add status + retry_count columns to DownloadQueueItem - Replace unique(episode_id) with unique(episode_id, status) so permanently_failed rows do not block new pending entries - Add PERMANENTLY_FAILED to DownloadStatus enum - Persist retry_count on each failure; mark permanently_failed once max_retries reached - QueueRepository reads status/retry_count from DB instead of defaulting to PENDING/0 - Stop double-incrementing retry_count in retry_failed_items; increment only happens in _process_download on failure Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -170,6 +170,27 @@ class DownloadService:
|
||||
logger.error("Failed to save item to database: %s", e)
|
||||
return item
|
||||
|
||||
async def _set_status_in_database(
|
||||
self,
|
||||
item_id: str,
|
||||
status: str,
|
||||
) -> bool:
|
||||
"""Set status on an item in the database.
|
||||
|
||||
Args:
|
||||
item_id: Download item ID
|
||||
status: New status value
|
||||
|
||||
Returns:
|
||||
True if update succeeded
|
||||
"""
|
||||
try:
|
||||
repository = self._get_repository()
|
||||
return await repository.set_status(item_id, status)
|
||||
except Exception as e:
|
||||
logger.error("Failed to set status in database: %s", e)
|
||||
return False
|
||||
|
||||
async def _set_error_in_database(
|
||||
self,
|
||||
item_id: str,
|
||||
@@ -191,6 +212,25 @@ class DownloadService:
|
||||
logger.error("Failed to set error in database: %s", e)
|
||||
return False
|
||||
|
||||
async def _increment_retry_in_database(
|
||||
self,
|
||||
item_id: str,
|
||||
) -> bool:
|
||||
"""Increment retry count on an item in the database.
|
||||
|
||||
Args:
|
||||
item_id: Download item ID
|
||||
|
||||
Returns:
|
||||
True if update succeeded
|
||||
"""
|
||||
try:
|
||||
repository = self._get_repository()
|
||||
return await repository.increment_retry(item_id)
|
||||
except Exception as e:
|
||||
logger.error("Failed to increment retry in database: %s", e)
|
||||
return False
|
||||
|
||||
async def _delete_from_database(self, item_id: str) -> bool:
|
||||
"""Delete an item from the database.
|
||||
|
||||
@@ -1051,17 +1091,15 @@ class DownloadService:
|
||||
if item.retry_count >= self._max_retries:
|
||||
continue
|
||||
|
||||
# Move back to pending
|
||||
# Move back to pending (retry_count will be incremented
|
||||
# by _process_download when the item fails again)
|
||||
self._failed_items.remove(item)
|
||||
item.status = DownloadStatus.PENDING
|
||||
item.retry_count += 1
|
||||
item.error = None
|
||||
item.progress = None
|
||||
self._add_to_pending_queue(item)
|
||||
retried_ids.append(item.id)
|
||||
|
||||
# Status is now managed in-memory only
|
||||
|
||||
logger.info(
|
||||
"Retrying failed item: item_id=%s, retry_count=%d",
|
||||
item.id,
|
||||
@@ -1069,18 +1107,23 @@ class DownloadService:
|
||||
)
|
||||
|
||||
if retried_ids:
|
||||
# Notify via progress service
|
||||
queue_status = await self.get_queue_status()
|
||||
await self._progress_service.update_progress(
|
||||
progress_id="download_queue",
|
||||
message=f"Retried {len(retried_ids)} failed items",
|
||||
metadata={
|
||||
"action": "items_retried",
|
||||
"retried_ids": retried_ids,
|
||||
"queue_status": queue_status.model_dump(mode="json"),
|
||||
},
|
||||
force_broadcast=True,
|
||||
)
|
||||
# Notify via progress service if available
|
||||
try:
|
||||
queue_status = await self.get_queue_status()
|
||||
await self._progress_service.update_progress(
|
||||
progress_id="download_queue",
|
||||
message=f"Retried {len(retried_ids)} failed items",
|
||||
metadata={
|
||||
"action": "items_retried",
|
||||
"retried_ids": retried_ids,
|
||||
"queue_status": queue_status.model_dump(mode="json"),
|
||||
},
|
||||
force_broadcast=True,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to broadcast retry progress: %s", e
|
||||
)
|
||||
|
||||
return retried_ids
|
||||
|
||||
@@ -1220,17 +1263,35 @@ class DownloadService:
|
||||
item.status = DownloadStatus.FAILED
|
||||
item.completed_at = datetime.now(timezone.utc)
|
||||
item.error = str(e)
|
||||
|
||||
# Increment retry count in memory and database
|
||||
item.retry_count += 1
|
||||
await self._increment_retry_in_database(item.id)
|
||||
|
||||
self._failed_items.append(item)
|
||||
|
||||
# Set error in database
|
||||
await self._set_error_in_database(item.id, str(e))
|
||||
|
||||
logger.error(
|
||||
"Download failed: item_id=%s, error=%s, retry_count=%d",
|
||||
item.id,
|
||||
str(e),
|
||||
item.retry_count,
|
||||
)
|
||||
# Check if max retries exceeded - move to dead-letter
|
||||
if item.retry_count >= self._max_retries:
|
||||
await self._set_status_in_database(
|
||||
item.id, DownloadStatus.PERMANENTLY_FAILED.value
|
||||
)
|
||||
logger.error(
|
||||
"Download permanently failed after max retries: "
|
||||
"item_id=%s, error=%s, retry_count=%d",
|
||||
item.id,
|
||||
str(e),
|
||||
item.retry_count,
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"Download failed: item_id=%s, error=%s, retry_count=%d",
|
||||
item.id,
|
||||
str(e),
|
||||
item.retry_count,
|
||||
)
|
||||
# Note: Failure is already broadcast by AnimeService
|
||||
# via ProgressService when SeriesApp fires failed event
|
||||
|
||||
|
||||
@@ -83,15 +83,12 @@ class QueueRepository:
|
||||
) -> DownloadItem:
|
||||
"""Convert database model to DownloadItem.
|
||||
|
||||
Note: Since the database model is simplified, status, priority,
|
||||
progress, and retry_count default to initial values.
|
||||
|
||||
Args:
|
||||
db_item: SQLAlchemy download queue item
|
||||
item_id: Optional override for item ID
|
||||
|
||||
Returns:
|
||||
Pydantic download item with default status/priority
|
||||
Pydantic download item with status/retry_count from database
|
||||
"""
|
||||
# Get episode info from the related Episode object
|
||||
episode = db_item.episode
|
||||
@@ -109,14 +106,14 @@ class QueueRepository:
|
||||
serie_folder=series.folder if series else "",
|
||||
serie_name=series.name if series else "",
|
||||
episode=episode_identifier,
|
||||
status=DownloadStatus.PENDING, # Default - managed in-memory
|
||||
priority=DownloadPriority.NORMAL, # Default - managed in-memory
|
||||
status=DownloadStatus(db_item.status), # From database
|
||||
priority=DownloadPriority.NORMAL, # Managed in-memory
|
||||
added_at=db_item.created_at or datetime.now(timezone.utc),
|
||||
started_at=db_item.started_at,
|
||||
completed_at=db_item.completed_at,
|
||||
progress=None, # Managed in-memory
|
||||
error=db_item.error_message,
|
||||
retry_count=0, # Managed in-memory
|
||||
retry_count=db_item.retry_count, # From database
|
||||
source_url=db_item.download_url,
|
||||
)
|
||||
|
||||
@@ -350,6 +347,110 @@ class QueueRepository:
|
||||
finally:
|
||||
if manage_session:
|
||||
await session.close()
|
||||
|
||||
async def set_status(
|
||||
self,
|
||||
item_id: str,
|
||||
status: str,
|
||||
db: Optional[AsyncSession] = None,
|
||||
) -> bool:
|
||||
"""Set status on a download item.
|
||||
|
||||
Args:
|
||||
item_id: Download item ID
|
||||
status: New status value
|
||||
db: Optional existing database session
|
||||
|
||||
Returns:
|
||||
True if update succeeded, False if item not found
|
||||
|
||||
Raises:
|
||||
QueueRepositoryError: If update fails
|
||||
"""
|
||||
session = db or self._db_session_factory()
|
||||
manage_session = db is None
|
||||
|
||||
try:
|
||||
result = await DownloadQueueService.set_status(
|
||||
session,
|
||||
int(item_id),
|
||||
status,
|
||||
)
|
||||
|
||||
if manage_session:
|
||||
await session.commit()
|
||||
|
||||
success = result is not None
|
||||
|
||||
if success:
|
||||
logger.debug(
|
||||
"Set status on queue item: item_id=%s, status=%s",
|
||||
item_id,
|
||||
status,
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
except ValueError:
|
||||
return False
|
||||
except Exception as e:
|
||||
if manage_session:
|
||||
await session.rollback()
|
||||
logger.error("Failed to set status: %s", e)
|
||||
raise QueueRepositoryError(f"Failed to set status: {e}") from e
|
||||
finally:
|
||||
if manage_session:
|
||||
await session.close()
|
||||
|
||||
async def increment_retry(
|
||||
self,
|
||||
item_id: str,
|
||||
db: Optional[AsyncSession] = None,
|
||||
) -> bool:
|
||||
"""Increment retry count on a download item.
|
||||
|
||||
Args:
|
||||
item_id: Download item ID
|
||||
db: Optional existing database session
|
||||
|
||||
Returns:
|
||||
True if update succeeded, False if item not found
|
||||
|
||||
Raises:
|
||||
QueueRepositoryError: If update fails
|
||||
"""
|
||||
session = db or self._db_session_factory()
|
||||
manage_session = db is None
|
||||
|
||||
try:
|
||||
result = await DownloadQueueService.increment_retry_count(
|
||||
session,
|
||||
int(item_id),
|
||||
)
|
||||
|
||||
if manage_session:
|
||||
await session.commit()
|
||||
|
||||
success = result is not None
|
||||
|
||||
if success:
|
||||
logger.debug(
|
||||
"Incremented retry count on queue item: item_id=%s",
|
||||
item_id,
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
except ValueError:
|
||||
return False
|
||||
except Exception as e:
|
||||
if manage_session:
|
||||
await session.rollback()
|
||||
logger.error("Failed to increment retry: %s", e)
|
||||
raise QueueRepositoryError(f"Failed to increment retry: {e}") from e
|
||||
finally:
|
||||
if manage_session:
|
||||
await session.close()
|
||||
|
||||
async def delete_item(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user