Fix empty error field in geo_lookup_request_failed log events

- Replace str(exc) with repr(exc) in lookup() and _batch_api_call()
  so exception class name is always present even for no-message errors
  (e.g. aiohttp.ServerDisconnectedError() whose str() is empty)
- Add exc_type=type(exc).__name__ field to network-error log events
  for easy structured-log filtering
- Move import aiohttp to runtime import; use aiohttp.ClientTimeout()
  instead of raw float, removing # type: ignore[arg-type] workarounds
- Add TestErrorLogging with 3 tests covering empty-message exceptions
This commit is contained in:
2026-03-12 17:50:58 +01:00
parent 029c094e18
commit a61c9dc969
3 changed files with 351 additions and 690 deletions

View File

@@ -572,3 +572,198 @@ class TestFlushDirty:
assert not geo_service._dirty # type: ignore[attr-defined]
db.commit.assert_awaited_once()
# ---------------------------------------------------------------------------
# Rate-limit throttling and retry tests (Task 5)
# ---------------------------------------------------------------------------
class TestLookupBatchThrottling:
"""Verify the inter-batch delay, retry, and give-up behaviour."""
async def test_lookup_batch_throttles_between_chunks(self) -> None:
"""When more than _BATCH_SIZE IPs are sent, asyncio.sleep is called
between consecutive batch HTTP calls with at least _BATCH_DELAY."""
# Generate _BATCH_SIZE + 1 IPs so we get exactly 2 batch calls.
batch_size: int = geo_service._BATCH_SIZE # type: ignore[attr-defined]
ips = [f"10.0.{i // 256}.{i % 256}" for i in range(batch_size + 1)]
def _make_result(chunk: list[str], _session: object) -> dict[str, GeoInfo]:
return {
ip: GeoInfo(country_code="DE", country_name="Germany", asn=None, org=None)
for ip in chunk
}
with (
patch(
"app.services.geo_service._batch_api_call",
new_callable=AsyncMock,
side_effect=_make_result,
) as mock_batch,
patch("app.services.geo_service.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
):
await geo_service.lookup_batch(ips, MagicMock())
# Two chunks → one sleep between them.
assert mock_batch.call_count == 2
mock_sleep.assert_awaited_once()
delay_arg: float = mock_sleep.call_args[0][0]
assert delay_arg >= geo_service._BATCH_DELAY # type: ignore[attr-defined]
async def test_lookup_batch_retries_on_full_chunk_failure(self) -> None:
"""When a chunk returns all-None on first try, it retries and succeeds."""
ips = ["1.2.3.4", "5.6.7.8"]
_empty = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
_success = {
"1.2.3.4": GeoInfo(country_code="DE", country_name="Germany", asn=None, org=None),
"5.6.7.8": GeoInfo(country_code="US", country_name="United States", asn=None, org=None),
}
_failure: dict[str, GeoInfo] = dict.fromkeys(ips, _empty)
call_count = 0
async def _side_effect(chunk: list[str], _session: object) -> dict[str, GeoInfo]:
nonlocal call_count
call_count += 1
if call_count == 1:
return _failure
return _success
with (
patch(
"app.services.geo_service._batch_api_call",
new_callable=AsyncMock,
side_effect=_side_effect,
),
patch("app.services.geo_service.asyncio.sleep", new_callable=AsyncMock),
):
result = await geo_service.lookup_batch(ips, MagicMock())
assert call_count == 2
assert result["1.2.3.4"].country_code == "DE"
assert result["5.6.7.8"].country_code == "US"
async def test_lookup_batch_gives_up_after_max_retries(self) -> None:
"""After _BATCH_MAX_RETRIES + 1 attempts, IPs end up in the neg cache."""
ips = ["9.9.9.9"]
_empty = GeoInfo(country_code=None, country_name=None, asn=None, org=None)
_failure: dict[str, GeoInfo] = dict.fromkeys(ips, _empty)
max_retries: int = geo_service._BATCH_MAX_RETRIES # type: ignore[attr-defined]
with (
patch(
"app.services.geo_service._batch_api_call",
new_callable=AsyncMock,
return_value=_failure,
) as mock_batch,
patch("app.services.geo_service.asyncio.sleep", new_callable=AsyncMock) as mock_sleep,
):
result = await geo_service.lookup_batch(ips, MagicMock())
# Initial attempt + max_retries retries.
assert mock_batch.call_count == max_retries + 1
# IP should have no country.
assert result["9.9.9.9"].country_code is None
# Negative cache should contain the IP.
assert "9.9.9.9" in geo_service._neg_cache # type: ignore[attr-defined]
# Sleep called for each retry with exponential backoff.
assert mock_sleep.call_count == max_retries
backoff_values = [call.args[0] for call in mock_sleep.call_args_list]
batch_delay: float = geo_service._BATCH_DELAY # type: ignore[attr-defined]
for i, val in enumerate(backoff_values):
expected = batch_delay * (2 ** (i + 1))
assert val == pytest.approx(expected)
# ---------------------------------------------------------------------------
# Error logging improvements (Task 2)
# ---------------------------------------------------------------------------
class TestErrorLogging:
"""Verify that exception details are properly captured in log events.
Previously ``str(exc)`` was used which yields an empty string for
aiohttp exceptions such as ``ServerDisconnectedError`` that carry no
message. The fix uses ``repr(exc)`` so the exception class name is
always present, and adds an ``exc_type`` field for easy log filtering.
"""
async def test_empty_message_exception_logs_exc_type(self, caplog: pytest.LogCaptureFixture) -> None:
"""When exception str() is empty, exc_type and repr are still logged."""
class _EmptyMessageError(Exception):
"""Exception whose str() representation is empty."""
def __str__(self) -> str:
return ""
session = MagicMock()
mock_ctx = AsyncMock()
mock_ctx.__aenter__ = AsyncMock(side_effect=_EmptyMessageError())
mock_ctx.__aexit__ = AsyncMock(return_value=False)
session.get = MagicMock(return_value=mock_ctx)
import structlog.testing
with structlog.testing.capture_logs() as captured:
result = await geo_service.lookup("197.221.98.153", session) # type: ignore[arg-type]
assert result is not None
assert result.country_code is None
request_failed = [e for e in captured if e.get("event") == "geo_lookup_request_failed"]
assert len(request_failed) == 1
event = request_failed[0]
# exc_type must name the exception class — never empty.
assert event["exc_type"] == "_EmptyMessageError"
# repr() must include the class name even when str() is empty.
assert "_EmptyMessageError" in event["error"]
async def test_connection_error_logs_exc_type(self, caplog: pytest.LogCaptureFixture) -> None:
"""A standard OSError with message is logged both in error and exc_type."""
session = MagicMock()
mock_ctx = AsyncMock()
mock_ctx.__aenter__ = AsyncMock(side_effect=OSError("connection refused"))
mock_ctx.__aexit__ = AsyncMock(return_value=False)
session.get = MagicMock(return_value=mock_ctx)
import structlog.testing
with structlog.testing.capture_logs() as captured:
await geo_service.lookup("10.0.0.1", session) # type: ignore[arg-type]
request_failed = [e for e in captured if e.get("event") == "geo_lookup_request_failed"]
assert len(request_failed) == 1
event = request_failed[0]
assert event["exc_type"] == "OSError"
assert "connection refused" in event["error"]
async def test_batch_empty_message_exception_logs_exc_type(self) -> None:
"""Batch API call: empty-message exceptions include exc_type in the log."""
class _EmptyMessageError(Exception):
def __str__(self) -> str:
return ""
session = MagicMock()
mock_ctx = AsyncMock()
mock_ctx.__aenter__ = AsyncMock(side_effect=_EmptyMessageError())
mock_ctx.__aexit__ = AsyncMock(return_value=False)
session.post = MagicMock(return_value=mock_ctx)
import structlog.testing
with structlog.testing.capture_logs() as captured:
result = await geo_service._batch_api_call(["1.2.3.4"], session) # type: ignore[attr-defined]
assert result["1.2.3.4"].country_code is None
batch_failed = [e for e in captured if e.get("event") == "geo_batch_request_failed"]
assert len(batch_failed) == 1
event = batch_failed[0]
assert event["exc_type"] == "_EmptyMessageError"
assert "_EmptyMessageError" in event["error"]