Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 51 additions & 13 deletions tensorboard/backend/event_processing/io_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,44 @@ def ListRecursivelyViaWalking(top):
)


def _GetLogdirSubdirectoriesViaCloudEventGlobbing(path):
"""Finds event-file directories in cloud logdirs via targeted globbing.

Some TensorFlow `tf.io.gfile.glob` backends return false negatives for
`**/*tfevents*` even when deeper descendants contain matching event files.
Try a second recursive pattern before falling back to the legacy globbing
traversal, which is slower but has the expected behavior.
"""
escaped = _EscapeGlobCharacters(path)
glob_patterns = (
escaped + "/**/*tfevents*",
escaped + "/**/**/*tfevents*",
)
dirs = set()
for glob_pattern in glob_patterns:
event_files = tf.io.gfile.glob(glob_pattern)
logger.info(
"GetLogdirSubdirectories: Found %d event files via glob %r.",
len(event_files),
glob_pattern,
)
for f in event_files:
if IsTensorFlowEventsFile(f):
dirs.add(os.path.dirname(f))
if dirs:
return tuple(dirs)

logger.info(
"GetLogdirSubdirectories: Targeted globbing found no event files; "
"falling back to legacy cloud globbing."
)
return tuple(
subdir
for (subdir, files) in ListRecursivelyViaGlobbing(path)
if any(IsTensorFlowEventsFile(f) for f in files)
)


def GetLogdirSubdirectories(path):
"""Obtains all subdirectories with events files.

Expand Down Expand Up @@ -203,22 +241,22 @@ def GetLogdirSubdirectories(path):
)

if io_util.IsCloudPath(path):
# Glob-ing for files can be significantly faster than recursively
# walking through directories for some file systems.
# For cloud filesystems, use a single targeted recursive glob for
# event files instead of listing all files level by level. If the
# backend returns a false negative for the fast path, fall back to the
# legacy globbing traversal for correctness.
logger.info(
"GetLogdirSubdirectories: Starting to list directories via glob-ing."
"GetLogdirSubdirectories: Starting to find event files via targeted glob."
)
traversal_method = ListRecursivelyViaGlobbing
return _GetLogdirSubdirectoriesViaCloudEventGlobbing(path)
else:
# For other file systems, the glob-ing based method might be slower because
# each call to glob could involve performing a recursive walk.
# For local file systems, walking is more efficient because each
# glob call could itself involve a recursive walk.
logger.info(
"GetLogdirSubdirectories: Starting to list directories via walking."
)
traversal_method = ListRecursivelyViaWalking

return (
subdir
for (subdir, files) in traversal_method(path)
if any(IsTensorFlowEventsFile(f) for f in files)
)
return (
subdir
for (subdir, files) in ListRecursivelyViaWalking(path)
if any(IsTensorFlowEventsFile(f) for f in files)
)
83 changes: 83 additions & 0 deletions tensorboard/backend/event_processing/io_wrapper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,89 @@ def testGetLogdirSubdirectories(self):
io_wrapper.GetLogdirSubdirectories(temp_dir),
)

def testGetLogdirSubdirectoriesCloudUsesSecondTargetedPattern(self):
path = "gs://bucket/logdir"
self.stubs.Set(io_wrapper.tf.io.gfile, "exists", lambda _: True)
self.stubs.Set(io_wrapper.tf.io.gfile, "isdir", lambda _: True)
self.stubs.Set(io_wrapper.io_util, "IsCloudPath", lambda _: True)

event_files = [
"gs://bucket/logdir/run1/tensorboard/events.out.tfevents.1",
"gs://bucket/logdir/run2/tensorboard/events.out.tfevents.2",
]
glob_calls = []
expected_first = "gs://bucket/logdir/**/*tfevents*"
expected_second = "gs://bucket/logdir/**/**/*tfevents*"

def fake_glob(pattern):
glob_calls.append(pattern)
if pattern == expected_first:
return []
if pattern == expected_second:
return event_files
self.fail("unexpected glob pattern: %r" % pattern)

self.stubs.Set(io_wrapper.tf.io.gfile, "glob", fake_glob)
self.stubs.Set(
io_wrapper,
"ListRecursivelyViaGlobbing",
lambda _: self.fail("legacy fallback should not run"),
)

self.assertCountEqual(
[
"gs://bucket/logdir/run1/tensorboard",
"gs://bucket/logdir/run2/tensorboard",
],
io_wrapper.GetLogdirSubdirectories(path),
)
self.assertEqual(
[expected_first, expected_second],
glob_calls,
)

def testGetLogdirSubdirectoriesCloudFallsBackToLegacyGlobbing(self):
path = "gs://bucket/logdir"
self.stubs.Set(io_wrapper.tf.io.gfile, "exists", lambda _: True)
self.stubs.Set(io_wrapper.tf.io.gfile, "isdir", lambda _: True)
self.stubs.Set(io_wrapper.io_util, "IsCloudPath", lambda _: True)
self.stubs.Set(io_wrapper.tf.io.gfile, "glob", lambda _: [])

def legacy_listing(_):
return iter(
[
(
"gs://bucket/logdir/run1/tensorboard",
(
"gs://bucket/logdir/run1/tensorboard/events.out.tfevents.1",
),
),
(
"gs://bucket/logdir/run2/tensorboard",
(
"gs://bucket/logdir/run2/tensorboard/model.ckpt",
"gs://bucket/logdir/run2/tensorboard/events.out.tfevents.2",
),
),
(
"gs://bucket/logdir/run3",
("gs://bucket/logdir/run3/model.ckpt",),
),
]
)

self.stubs.Set(
io_wrapper, "ListRecursivelyViaGlobbing", legacy_listing
)

self.assertCountEqual(
[
"gs://bucket/logdir/run1/tensorboard",
"gs://bucket/logdir/run2/tensorboard",
],
io_wrapper.GetLogdirSubdirectories(path),
)

def _CreateDeepDirectoryStructure(self, top_directory):
"""Creates a reasonable deep structure of subdirectories with files.

Expand Down