From 2b59d3265fc2b777a88635324e6a916e7b0eabe1 Mon Sep 17 00:00:00 2001 From: raman325 <7243222+raman325@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:01:02 -0500 Subject: [PATCH 1/2] Add exponential backoff to coordinator for persistent lock failures When a lock is persistently unreachable, the coordinator now tracks consecutive failures and applies exponential backoff after 3 failures. For poll-based providers, the update_interval is dynamically increased (60s * 2^n, capped at 30 minutes). Drift checks are also skipped during backoff. On success, the counter resets and the original interval is restored. Co-Authored-By: Claude Opus 4.6 --- custom_components/lock_code_manager/const.py | 5 + .../lock_code_manager/coordinator.py | 55 +++- tests/test_coordinator.py | 283 +++++++++++++++++- 3 files changed, 339 insertions(+), 4 deletions(-) diff --git a/custom_components/lock_code_manager/const.py b/custom_components/lock_code_manager/const.py index d41d9467..6cbeb78c 100644 --- a/custom_components/lock_code_manager/const.py +++ b/custom_components/lock_code_manager/const.py @@ -101,6 +101,11 @@ # These create switch/binary_sensor entities but their states don't map to access control EXCLUDED_CONDITION_PLATFORMS = frozenset({"scheduler"}) +# Coordinator backoff +BACKOFF_FAILURE_THRESHOLD: int = 3 +BACKOFF_INITIAL_SECONDS: int = 60 +BACKOFF_MAX_SECONDS: int = 1800 # 30 minutes + # Defaults DEFAULT_NUM_SLOTS = 3 DEFAULT_START = 1 diff --git a/custom_components/lock_code_manager/coordinator.py b/custom_components/lock_code_manager/coordinator.py index ee02ed72..fb98a56f 100644 --- a/custom_components/lock_code_manager/coordinator.py +++ b/custom_components/lock_code_manager/coordinator.py @@ -3,7 +3,7 @@ from __future__ import annotations from collections.abc import Callable -from datetime import datetime +from datetime import datetime, timedelta import logging from typing import TYPE_CHECKING, Any @@ -11,7 +11,12 @@ from homeassistant.helpers.event import async_track_time_interval from homeassistant.helpers.update_coordinator import DataUpdateCoordinator, UpdateFailed -from .const import DOMAIN +from .const import ( + BACKOFF_FAILURE_THRESHOLD, + BACKOFF_INITIAL_SECONDS, + BACKOFF_MAX_SECONDS, + DOMAIN, +) from .exceptions import LockCodeManagerError if TYPE_CHECKING: @@ -40,6 +45,8 @@ def __init__(self, hass: HomeAssistant, lock: BaseLock, config_entry: Any) -> No config_entry=config_entry, ) self.data: dict[int, int | str] = {} + self._consecutive_failures: int = 0 + self._original_update_interval: timedelta | None = update_interval # Set up drift detection timer for locks with hard_refresh_interval if lock.hard_refresh_interval: @@ -76,16 +83,50 @@ def push_update(self, updates: dict[int, int | str]) -> None: self.async_set_updated_data(new_data) + def _apply_backoff(self) -> None: + """Increment failure counter and apply exponential backoff if threshold met.""" + self._consecutive_failures += 1 + if self._consecutive_failures >= BACKOFF_FAILURE_THRESHOLD: + backoff_secs = min( + BACKOFF_INITIAL_SECONDS + * 2 ** (self._consecutive_failures - BACKOFF_FAILURE_THRESHOLD), + BACKOFF_MAX_SECONDS, + ) + if self._original_update_interval is not None: + self.update_interval = timedelta(seconds=backoff_secs) + _LOGGER.warning( + "Update failed %d consecutive times for %s, backing off %ds", + self._consecutive_failures, + self._lock.lock.entity_id, + backoff_secs, + ) + + def _reset_backoff(self) -> None: + """Reset failure counter and restore original update interval.""" + if self._consecutive_failures > 0: + _LOGGER.info( + "Update succeeded for %s after %d consecutive failures, " + "restoring normal update interval", + self._lock.lock.entity_id, + self._consecutive_failures, + ) + self._consecutive_failures = 0 + self.update_interval = self._original_update_interval # type: ignore[assignment] # setter accepts None + async def async_get_usercodes(self) -> dict[int, int | str]: """Update usercodes.""" try: - return await self._lock.async_internal_get_usercodes() + data = await self._lock.async_internal_get_usercodes() except LockCodeManagerError as err: + self._apply_backoff() # We can silently fail if we've never been able to retrieve data if not self.last_update_success: return {} raise UpdateFailed from err + self._reset_backoff() + return data + async def _async_drift_check(self, now: datetime) -> None: """ Perform periodic drift detection. @@ -98,6 +139,14 @@ async def _async_drift_check(self, now: datetime) -> None: if not self.last_update_success: return + if self._consecutive_failures >= BACKOFF_FAILURE_THRESHOLD: + _LOGGER.debug( + "Skipping drift check for %s (in backoff after %d failures)", + self._lock.lock.entity_id, + self._consecutive_failures, + ) + return + _LOGGER.debug( "Performing drift detection hard refresh for %s", self._lock.lock.entity_id, diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py index a04a5dab..a2fbdcf8 100644 --- a/tests/test_coordinator.py +++ b/tests/test_coordinator.py @@ -4,13 +4,20 @@ from datetime import timedelta from unittest.mock import AsyncMock, patch +import pytest from pytest_homeassistant_custom_component.common import MockConfigEntry from homeassistant.core import HomeAssistant, callback from homeassistant.helpers import device_registry as dr, entity_registry as er +from homeassistant.helpers.update_coordinator import UpdateFailed from homeassistant.util import dt as dt_util -from custom_components.lock_code_manager.const import DOMAIN +from custom_components.lock_code_manager.const import ( + BACKOFF_FAILURE_THRESHOLD, + BACKOFF_INITIAL_SECONDS, + BACKOFF_MAX_SECONDS, + DOMAIN, +) from custom_components.lock_code_manager.coordinator import ( LockUsercodeUpdateCoordinator, ) @@ -641,3 +648,277 @@ async def test_drift_check_handles_hard_refresh_error( # Data should remain unchanged assert coordinator.data == {1: "1234"} + + +# --- Backoff tests --- + + +def _create_poll_coordinator( + hass: HomeAssistant, +) -> tuple[LockUsercodeUpdateCoordinator, MockLockWithHardRefresh]: + """Create a coordinator with a poll-based (non-push) lock.""" + entity_reg = er.async_get(hass) + config_entry = MockConfigEntry(domain=DOMAIN) + config_entry.add_to_hass(hass) + + lock_entity = entity_reg.async_get_or_create( + "lock", + "test", + "test_lock", + config_entry=config_entry, + ) + + lock = MockLockWithHardRefresh( + hass, + dr.async_get(hass), + entity_reg, + config_entry, + lock_entity, + ) + + coordinator = LockUsercodeUpdateCoordinator(hass, lock, config_entry) + return coordinator, lock + + +def _create_push_coordinator( + hass: HomeAssistant, +) -> tuple[LockUsercodeUpdateCoordinator, MockLockWithPush]: + """Create a coordinator with a push-based lock.""" + entity_reg = er.async_get(hass) + config_entry = MockConfigEntry(domain=DOMAIN) + config_entry.add_to_hass(hass) + + lock_entity = entity_reg.async_get_or_create( + "lock", + "test", + "test_lock", + config_entry=config_entry, + ) + + lock = MockLockWithPush( + hass, + dr.async_get(hass), + entity_reg, + config_entry, + lock_entity, + ) + lock._hard_refresh_interval = timedelta(hours=1) + + coordinator = LockUsercodeUpdateCoordinator(hass, lock, config_entry) + return coordinator, lock + + +async def test_backoff_failure_counter_increments(hass: HomeAssistant) -> None: + """Test that consecutive failure counter increments on each failure.""" + coordinator, lock = _create_poll_coordinator(hass) + # Simulate that we previously had a successful update so UpdateFailed is raised + coordinator.last_update_success = True + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + for i in range(1, 4): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + assert coordinator._consecutive_failures == i + + +async def test_backoff_first_failure_returns_empty_dict( + hass: HomeAssistant, +) -> None: + """Test that first failure returns empty dict when no prior success.""" + coordinator, lock = _create_poll_coordinator(hass) + # No successful update yet + coordinator.last_update_success = False + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + result = await coordinator.async_get_usercodes() + + assert result == {} + assert coordinator._consecutive_failures == 1 + + +async def test_backoff_subsequent_failure_raises_update_failed( + hass: HomeAssistant, +) -> None: + """Test that subsequent failures raise UpdateFailed after prior success.""" + coordinator, lock = _create_poll_coordinator(hass) + coordinator.last_update_success = True + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator._consecutive_failures == 1 + + +async def test_backoff_activates_after_threshold(hass: HomeAssistant) -> None: + """Test that backoff activates after BACKOFF_FAILURE_THRESHOLD failures.""" + coordinator, lock = _create_poll_coordinator(hass) + original_interval = coordinator.update_interval + coordinator.last_update_success = True + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + # Failures below threshold should not change interval + for _ in range(BACKOFF_FAILURE_THRESHOLD - 1): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator.update_interval == original_interval + + # Failure at threshold should activate backoff + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator._consecutive_failures == BACKOFF_FAILURE_THRESHOLD + expected_backoff = timedelta( + seconds=BACKOFF_INITIAL_SECONDS * 2**0 # 2^(3-3) = 1 + ) + assert coordinator.update_interval == expected_backoff + + +async def test_backoff_interval_increases_exponentially( + hass: HomeAssistant, +) -> None: + """Test that update_interval increases exponentially for poll-based providers.""" + coordinator, lock = _create_poll_coordinator(hass) + coordinator.last_update_success = True + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + # Reach threshold + additional failures + for _ in range(BACKOFF_FAILURE_THRESHOLD + 3): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + # After threshold+3 failures, exponent = 3, backoff = 60 * 2^3 = 480s + expected_backoff = timedelta(seconds=BACKOFF_INITIAL_SECONDS * 2**3) + assert coordinator.update_interval == expected_backoff + + +async def test_backoff_caps_at_max(hass: HomeAssistant) -> None: + """Test that backoff interval is capped at BACKOFF_MAX_SECONDS.""" + coordinator, lock = _create_poll_coordinator(hass) + coordinator.last_update_success = True + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + # Many failures to exceed max + for _ in range(BACKOFF_FAILURE_THRESHOLD + 20): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator.update_interval == timedelta(seconds=BACKOFF_MAX_SECONDS) + + +async def test_backoff_resets_on_success(hass: HomeAssistant) -> None: + """Test that counters and interval reset on success.""" + coordinator, lock = _create_poll_coordinator(hass) + original_interval = coordinator.update_interval + coordinator.last_update_success = True + + mock_get_fail = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get_fail): + # Accumulate failures past threshold + for _ in range(BACKOFF_FAILURE_THRESHOLD + 1): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator._consecutive_failures == BACKOFF_FAILURE_THRESHOLD + 1 + assert coordinator.update_interval != original_interval + + # Now succeed + mock_get_success = AsyncMock(return_value={1: "1234"}) + with patch.object(lock, "async_internal_get_usercodes", mock_get_success): + result = await coordinator.async_get_usercodes() + + assert result == {1: "1234"} + assert coordinator._consecutive_failures == 0 + assert coordinator.update_interval == original_interval + + +async def test_backoff_no_reset_when_no_prior_failures( + hass: HomeAssistant, +) -> None: + """Test that success with no prior failures does not modify interval.""" + coordinator, lock = _create_poll_coordinator(hass) + original_interval = coordinator.update_interval + + mock_get = AsyncMock(return_value={1: "1234"}) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + result = await coordinator.async_get_usercodes() + + assert result == {1: "1234"} + assert coordinator._consecutive_failures == 0 + assert coordinator.update_interval == original_interval + + +async def test_drift_check_skipped_during_backoff(hass: HomeAssistant) -> None: + """Test that drift check is skipped when in backoff.""" + coordinator, lock = _create_push_coordinator(hass) + coordinator.last_update_success = True + coordinator._consecutive_failures = BACKOFF_FAILURE_THRESHOLD + + mock_hard_refresh = AsyncMock() + with patch.object(lock, "async_internal_hard_refresh_codes", mock_hard_refresh): + await coordinator._async_drift_check(dt_util.utcnow()) + + # Hard refresh should NOT be called during backoff + mock_hard_refresh.assert_not_called() + + +async def test_drift_check_runs_below_backoff_threshold( + hass: HomeAssistant, +) -> None: + """Test that drift check runs when failures are below threshold.""" + coordinator, lock = _create_push_coordinator(hass) + coordinator.last_update_success = True + coordinator._consecutive_failures = BACKOFF_FAILURE_THRESHOLD - 1 + + mock_hard_refresh = AsyncMock(return_value={1: "1234"}) + with patch.object(lock, "async_internal_hard_refresh_codes", mock_hard_refresh): + await coordinator._async_drift_check(dt_util.utcnow()) + + # Hard refresh SHOULD be called below threshold + mock_hard_refresh.assert_called_once() + + +async def test_backoff_push_provider_does_not_change_interval( + hass: HomeAssistant, +) -> None: + """Test that push-based providers do not modify update_interval during backoff.""" + coordinator, lock = _create_push_coordinator(hass) + # Push providers have update_interval=None + assert coordinator.update_interval is None + coordinator.last_update_success = True + + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + for _ in range(BACKOFF_FAILURE_THRESHOLD + 2): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + # update_interval should remain None for push providers + assert coordinator.update_interval is None + # But failure counter should still be tracked + assert coordinator._consecutive_failures == BACKOFF_FAILURE_THRESHOLD + 2 + + +async def test_backoff_init_stores_original_interval( + hass: HomeAssistant, +) -> None: + """Test that __init__ stores the original update interval.""" + coordinator, lock = _create_poll_coordinator(hass) + assert coordinator._original_update_interval == lock.usercode_scan_interval + assert coordinator._consecutive_failures == 0 + + +async def test_backoff_init_push_stores_none_interval( + hass: HomeAssistant, +) -> None: + """Test that __init__ stores None for push-based providers.""" + coordinator, _ = _create_push_coordinator(hass) + assert coordinator._original_update_interval is None + assert coordinator._consecutive_failures == 0 From 08aa7dd5401bb28d4db97127fa562b8f8be3ce28 Mon Sep 17 00:00:00 2001 From: raman325 <7243222+raman325@users.noreply.github.com> Date: Mon, 2 Mar 2026 18:04:32 -0500 Subject: [PATCH 2/2] Address review: reset backoff on push_update, fix logging and type ignore - Reset backoff state in push_update() when data actually changes, so push-based providers recover from backoff without needing async_get_usercodes() to succeed first - Differentiate log messages for poll vs push providers in _apply_backoff() - Remove incorrect type: ignore comment in _reset_backoff() - Only restore update_interval in _reset_backoff() for poll-based providers - Add tests for push_update backoff reset (data changed + unchanged) Co-Authored-By: Claude Opus 4.6 Entire-Checkpoint: 3b174f2659d4 --- .../lock_code_manager/coordinator.py | 30 +++++++++---- tests/test_coordinator.py | 44 +++++++++++++++++++ 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/custom_components/lock_code_manager/coordinator.py b/custom_components/lock_code_manager/coordinator.py index fb98a56f..1d5d9303 100644 --- a/custom_components/lock_code_manager/coordinator.py +++ b/custom_components/lock_code_manager/coordinator.py @@ -81,6 +81,10 @@ def push_update(self, updates: dict[int, int | str]) -> None: if new_data == self.data: return + # A successful push update proves the lock is reachable, so reset + # backoff to re-enable drift checks and normal polling. + self._reset_backoff() + self.async_set_updated_data(new_data) def _apply_backoff(self) -> None: @@ -94,24 +98,32 @@ def _apply_backoff(self) -> None: ) if self._original_update_interval is not None: self.update_interval = timedelta(seconds=backoff_secs) - _LOGGER.warning( - "Update failed %d consecutive times for %s, backing off %ds", - self._consecutive_failures, - self._lock.lock.entity_id, - backoff_secs, - ) + _LOGGER.warning( + "Update failed %d consecutive times for %s, " + "backing off polling interval to %ds", + self._consecutive_failures, + self._lock.lock.entity_id, + backoff_secs, + ) + else: + _LOGGER.warning( + "Update failed %d consecutive times for %s, " + "suppressing drift checks until recovery", + self._consecutive_failures, + self._lock.lock.entity_id, + ) def _reset_backoff(self) -> None: """Reset failure counter and restore original update interval.""" if self._consecutive_failures > 0: _LOGGER.info( - "Update succeeded for %s after %d consecutive failures, " - "restoring normal update interval", + "Lock %s recovered after %d consecutive failures", self._lock.lock.entity_id, self._consecutive_failures, ) self._consecutive_failures = 0 - self.update_interval = self._original_update_interval # type: ignore[assignment] # setter accepts None + if self._original_update_interval is not None: + self.update_interval = self._original_update_interval async def async_get_usercodes(self) -> dict[int, int | str]: """Update usercodes.""" diff --git a/tests/test_coordinator.py b/tests/test_coordinator.py index a2fbdcf8..c014caf1 100644 --- a/tests/test_coordinator.py +++ b/tests/test_coordinator.py @@ -922,3 +922,47 @@ async def test_backoff_init_push_stores_none_interval( coordinator, _ = _create_push_coordinator(hass) assert coordinator._original_update_interval is None assert coordinator._consecutive_failures == 0 + + +async def test_push_update_resets_backoff(hass: HomeAssistant) -> None: + """Test that push_update resets backoff state when data changes.""" + coordinator, lock = _create_push_coordinator(hass) + coordinator.last_update_success = True + + # Simulate failures past threshold + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + for _ in range(BACKOFF_FAILURE_THRESHOLD + 2): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator._consecutive_failures == BACKOFF_FAILURE_THRESHOLD + 2 + + # Push update with new data should reset backoff + coordinator.data = {1: "old"} + coordinator.push_update({1: "1234"}) + + assert coordinator._consecutive_failures == 0 + + +async def test_push_update_no_reset_when_data_unchanged( + hass: HomeAssistant, +) -> None: + """Test that push_update does not reset backoff when data is unchanged.""" + coordinator, lock = _create_push_coordinator(hass) + coordinator.last_update_success = True + + # Simulate failures past threshold + mock_get = AsyncMock(side_effect=LockDisconnected("Lock offline")) + with patch.object(lock, "async_internal_get_usercodes", mock_get): + for _ in range(BACKOFF_FAILURE_THRESHOLD + 1): + with pytest.raises(UpdateFailed): + await coordinator.async_get_usercodes() + + assert coordinator._consecutive_failures == BACKOFF_FAILURE_THRESHOLD + 1 + + # Push update with same data should NOT reset backoff + coordinator.data = {1: "1234"} + coordinator.push_update({1: "1234"}) + + assert coordinator._consecutive_failures == BACKOFF_FAILURE_THRESHOLD + 1