feat(download): persist retry state and dead-letter
Retry count and queue status were in-memory only and lost on restart, so failed downloads could not be safely resumed and permanently-failed episodes silently blocked re-queueing via the episode-id unique index. - Add status + retry_count columns to DownloadQueueItem - Replace unique(episode_id) with unique(episode_id, status) so permanently_failed rows do not block new pending entries - Add PERMANENTLY_FAILED to DownloadStatus enum - Persist retry_count on each failure; mark permanently_failed once max_retries reached - QueueRepository reads status/retry_count from DB instead of defaulting to PENDING/0 - Stop double-incrementing retry_count in retry_failed_items; increment only happens in _process_download on failure Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -170,6 +170,27 @@ class DownloadService:
|
||||
logger.error("Failed to save item to database: %s", e)
|
||||
return item
|
||||
|
||||
async def _set_status_in_database(
|
||||
self,
|
||||
item_id: str,
|
||||
status: str,
|
||||
) -> bool:
|
||||
"""Set status on an item in the database.
|
||||
|
||||
Args:
|
||||
item_id: Download item ID
|
||||
status: New status value
|
||||
|
||||
Returns:
|
||||
True if update succeeded
|
||||
"""
|
||||
try:
|
||||
repository = self._get_repository()
|
||||
return await repository.set_status(item_id, status)
|
||||
except Exception as e:
|
||||
logger.error("Failed to set status in database: %s", e)
|
||||
return False
|
||||
|
||||
async def _set_error_in_database(
|
||||
self,
|
||||
item_id: str,
|
||||
@@ -191,6 +212,25 @@ class DownloadService:
|
||||
logger.error("Failed to set error in database: %s", e)
|
||||
return False
|
||||
|
||||
async def _increment_retry_in_database(
|
||||
self,
|
||||
item_id: str,
|
||||
) -> bool:
|
||||
"""Increment retry count on an item in the database.
|
||||
|
||||
Args:
|
||||
item_id: Download item ID
|
||||
|
||||
Returns:
|
||||
True if update succeeded
|
||||
"""
|
||||
try:
|
||||
repository = self._get_repository()
|
||||
return await repository.increment_retry(item_id)
|
||||
except Exception as e:
|
||||
logger.error("Failed to increment retry in database: %s", e)
|
||||
return False
|
||||
|
||||
async def _delete_from_database(self, item_id: str) -> bool:
|
||||
"""Delete an item from the database.
|
||||
|
||||
@@ -1051,17 +1091,15 @@ class DownloadService:
|
||||
if item.retry_count >= self._max_retries:
|
||||
continue
|
||||
|
||||
# Move back to pending
|
||||
# Move back to pending (retry_count will be incremented
|
||||
# by _process_download when the item fails again)
|
||||
self._failed_items.remove(item)
|
||||
item.status = DownloadStatus.PENDING
|
||||
item.retry_count += 1
|
||||
item.error = None
|
||||
item.progress = None
|
||||
self._add_to_pending_queue(item)
|
||||
retried_ids.append(item.id)
|
||||
|
||||
# Status is now managed in-memory only
|
||||
|
||||
logger.info(
|
||||
"Retrying failed item: item_id=%s, retry_count=%d",
|
||||
item.id,
|
||||
@@ -1069,18 +1107,23 @@ class DownloadService:
|
||||
)
|
||||
|
||||
if retried_ids:
|
||||
# Notify via progress service
|
||||
queue_status = await self.get_queue_status()
|
||||
await self._progress_service.update_progress(
|
||||
progress_id="download_queue",
|
||||
message=f"Retried {len(retried_ids)} failed items",
|
||||
metadata={
|
||||
"action": "items_retried",
|
||||
"retried_ids": retried_ids,
|
||||
"queue_status": queue_status.model_dump(mode="json"),
|
||||
},
|
||||
force_broadcast=True,
|
||||
)
|
||||
# Notify via progress service if available
|
||||
try:
|
||||
queue_status = await self.get_queue_status()
|
||||
await self._progress_service.update_progress(
|
||||
progress_id="download_queue",
|
||||
message=f"Retried {len(retried_ids)} failed items",
|
||||
metadata={
|
||||
"action": "items_retried",
|
||||
"retried_ids": retried_ids,
|
||||
"queue_status": queue_status.model_dump(mode="json"),
|
||||
},
|
||||
force_broadcast=True,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
"Failed to broadcast retry progress: %s", e
|
||||
)
|
||||
|
||||
return retried_ids
|
||||
|
||||
@@ -1220,17 +1263,35 @@ class DownloadService:
|
||||
item.status = DownloadStatus.FAILED
|
||||
item.completed_at = datetime.now(timezone.utc)
|
||||
item.error = str(e)
|
||||
|
||||
# Increment retry count in memory and database
|
||||
item.retry_count += 1
|
||||
await self._increment_retry_in_database(item.id)
|
||||
|
||||
self._failed_items.append(item)
|
||||
|
||||
# Set error in database
|
||||
await self._set_error_in_database(item.id, str(e))
|
||||
|
||||
logger.error(
|
||||
"Download failed: item_id=%s, error=%s, retry_count=%d",
|
||||
item.id,
|
||||
str(e),
|
||||
item.retry_count,
|
||||
)
|
||||
# Check if max retries exceeded - move to dead-letter
|
||||
if item.retry_count >= self._max_retries:
|
||||
await self._set_status_in_database(
|
||||
item.id, DownloadStatus.PERMANENTLY_FAILED.value
|
||||
)
|
||||
logger.error(
|
||||
"Download permanently failed after max retries: "
|
||||
"item_id=%s, error=%s, retry_count=%d",
|
||||
item.id,
|
||||
str(e),
|
||||
item.retry_count,
|
||||
)
|
||||
else:
|
||||
logger.error(
|
||||
"Download failed: item_id=%s, error=%s, retry_count=%d",
|
||||
item.id,
|
||||
str(e),
|
||||
item.retry_count,
|
||||
)
|
||||
# Note: Failure is already broadcast by AnimeService
|
||||
# via ProgressService when SeriesApp fires failed event
|
||||
|
||||
|
||||
Reference in New Issue
Block a user