feat(core): Standardize SerieScanner to use 'key' as primary identifier

Task 1.3: Update SerieScanner to Use Key Consistently

Changes:
- Renamed self.folderDict to self.keyDict for clarity and consistency
- Updated internal storage to use serie.key as dictionary key
- Modified scan() method to store series by key
- Enhanced logging to show both key (identifier) and folder (metadata)
- Added debug logging when storing series
- Updated error contexts to include both key and folder in metadata
- Updated completion statistics to use keyDict
- Enhanced docstrings to clarify identifier vs metadata usage
- Fixed import formatting to comply with PEP 8 line length

Success criteria met:
 Scanner stores series by 'key'
 Progress callbacks use 'key' for identification
 Error messages reference both 'key' and 'folder' appropriately
 All 554 unit tests pass

Related to: Series Identifier Standardization (Phase 1, Task 1.3)
This commit is contained in:
Lukas 2025-11-23 13:06:33 +01:00
parent 8b5b06ca9a
commit 920a5b0eaf
2 changed files with 51 additions and 17 deletions

View File

@ -280,10 +280,10 @@ conda run -n AniWorld python -m pytest tests/unit/ -k "SerieList" -v
**Success Criteria:**
- [ ] Scanner stores series by `key`
- [ ] Progress callbacks use `key` for identification
- [ ] Error messages reference `key` and `folder` appropriately
- [ ] All scanner tests pass
- [x] Scanner stores series by `key`
- [x] Progress callbacks use `key` for identification
- [x] Error messages reference `key` and `folder` appropriately
- [x] All scanner tests pass
**Test Command:**
@ -291,6 +291,23 @@ conda run -n AniWorld python -m pytest tests/unit/ -k "SerieList" -v
conda run -n AniWorld python -m pytest tests/unit/ -k "SerieScanner" -v
```
**Status:** ✅ COMPLETED
**Implementation Details:**
- Renamed `self.folderDict` to `self.keyDict` for clarity and consistency
- Updated internal dictionary storage to use `serie.key` as the dictionary key
- Modified `scan()` method to store series by key: `self.keyDict[serie.key] = serie`
- Enhanced logging in duplicate detection to show both key and folder for clarity
- Added debug logging when storing series showing both identifiers
- Updated error contexts in exception handlers to include both `key` and `folder` in metadata
- Updated completion statistics to use `len(self.keyDict)` for series count
- Updated `reinit()` method docstring to reflect key-based storage
- Enhanced `__read_data_from_file()` docstring to clarify that `folder_name` is only used to locate data files
- All error messages now properly reference both `key` (identifier) and `folder` (metadata)
- All 554 unit tests pass successfully, confirming no regressions
- Code follows PEP 8 style guidelines (max 79 characters per line)
---
#### Task 1.4: Update Provider Classes to Use Key

View File

@ -13,7 +13,10 @@ import uuid
from typing import Callable, Iterable, Iterator, Optional
from src.core.entities.series import Serie
from src.core.exceptions.Exceptions import MatchNotFoundError, NoKeyFoundException
from src.core.exceptions.Exceptions import (
MatchNotFoundError,
NoKeyFoundException,
)
from src.core.interfaces.callbacks import (
CallbackManager,
CompletionContext,
@ -65,7 +68,7 @@ class SerieScanner:
raise ValueError(f"Base path is not a directory: {abs_path}")
self.directory: str = abs_path
self.folderDict: dict[str, Serie] = {}
self.keyDict: dict[str, Serie] = {}
self.loader: Loader = loader
self._callback_manager: CallbackManager = (
callback_manager or CallbackManager()
@ -80,8 +83,8 @@ class SerieScanner:
return self._callback_manager
def reinit(self) -> None:
"""Reinitialize the folder dictionary."""
self.folderDict: dict[str, Serie] = {}
"""Reinitialize the series dictionary (keyed by serie.key)."""
self.keyDict: dict[str, Serie] = {}
def get_total_to_scan(self) -> int:
"""Get the total number of folders to scan.
@ -187,12 +190,21 @@ class SerieScanner:
)
serie.save_to_file(data_path)
if serie.key in self.folderDict:
# Store by key (primary identifier), not folder
if serie.key in self.keyDict:
logger.error(
"Duplication found: %s", serie.key
"Duplicate series found with key '%s' "
"(folder: '%s')",
serie.key,
folder
)
else:
self.folderDict[serie.key] = serie
self.keyDict[serie.key] = serie
logger.debug(
"Stored series with key '%s' (folder: '%s')",
serie.key,
folder
)
no_key_found_logger.info(
"Saved Serie: '%s'", str(serie)
)
@ -209,7 +221,7 @@ class SerieScanner:
error=nkfe,
message=error_msg,
recoverable=True,
metadata={"folder": folder}
metadata={"folder": folder, "key": None}
)
)
except Exception as e:
@ -231,7 +243,7 @@ class SerieScanner:
error=e,
message=error_msg,
recoverable=True,
metadata={"folder": folder}
metadata={"folder": folder, "key": None}
)
)
continue
@ -245,7 +257,7 @@ class SerieScanner:
message=f"Scan completed. Processed {counter} folders.",
statistics={
"total_folders": counter,
"series_found": len(self.folderDict)
"series_found": len(self.keyDict)
}
)
)
@ -253,7 +265,7 @@ class SerieScanner:
logger.info(
"Scan completed. Processed %d folders, found %d series",
counter,
len(self.folderDict)
len(self.keyDict)
)
except Exception as e:
@ -311,10 +323,15 @@ class SerieScanner:
"""Read serie data from file or key file.
Args:
folder_name: Name of the folder containing serie data
folder_name: Filesystem folder name
(used only to locate data files)
Returns:
Serie object if found, None otherwise
Serie object with valid key if found, None otherwise
Note:
The returned Serie will have its 'key' as the primary identifier.
The 'folder' field is metadata only.
"""
folder_path = os.path.join(self.directory, folder_name)
key = None