Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 18 additions & 12 deletions tensorboard/backend/event_processing/data_ingester.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,18 +98,24 @@ def start(self):

def _reload():
while True:
start = time.time()
logger.info("TensorBoard reload process beginning")
for path, name in self._path_to_run.items():
self._multiplexer.AddRunsFromDirectory(path, name)
logger.info(
"TensorBoard reload process: Reload the whole Multiplexer"
)
self._multiplexer.Reload()
duration = time.time() - start
logger.info(
"TensorBoard done reloading. Load took %0.3f secs", duration
)
try:
start = time.time()
logger.info("TensorBoard reload process beginning")
for path, name in self._path_to_run.items():
self._multiplexer.AddRunsFromDirectory(path, name)
logger.info(
"TensorBoard reload process: Reload the whole Multiplexer"
)
self._multiplexer.Reload()
duration = time.time() - start
logger.info(
"TensorBoard done reloading. Load took %0.3f secs",
duration,
)
except Exception:
logger.error(
"TensorBoard reload failed", exc_info=True
)
if self._reload_interval == 0:
# Only load the multiplexer once. Do not continuously reload.
break
Expand Down
34 changes: 34 additions & 0 deletions tensorboard/backend/event_processing/data_ingester_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,40 @@ def __init__(
self.window_title = window_title


class ReloadErrorHandlingTest(tb_test.TestCase):
"""Tests that the reload loop survives transient errors."""

def test_reload_continues_after_exception(self):
"""Reload loop should log errors and continue, not crash."""
flags = FakeFlags(logdir="logdir", reload_interval=0, reload_task="blocking")
ingester = data_ingester.LocalDataIngester(flags)
# Make AddRunsFromDirectory raise on the first call.
with mock.patch.object(
ingester._multiplexer, "AddRunsFromDirectory", side_effect=OSError("network error")
) as mock_add:
with mock.patch.object(
ingester._multiplexer, "Reload"
) as mock_reload:
# Should not raise despite the OSError.
ingester.start()
mock_add.assert_called_once()
# Reload should not be called since AddRunsFromDirectory raised first.
mock_reload.assert_not_called()

def test_reload_continues_after_reload_exception(self):
"""Reload loop should survive errors from Reload() as well."""
flags = FakeFlags(logdir="logdir", reload_interval=0, reload_task="blocking")
ingester = data_ingester.LocalDataIngester(flags)
with mock.patch.object(
ingester._multiplexer, "AddRunsFromDirectory"
):
with mock.patch.object(
ingester._multiplexer, "Reload", side_effect=RuntimeError("reload failed")
):
# Should not raise despite the RuntimeError.
ingester.start()


class GetEventFileActiveFilterTest(tb_test.TestCase):
def testDisabled(self):
flags = FakeFlags(logdir="logdir", reload_multifile=False)
Expand Down