From 299a72b02fff1a52e29c26883d2379424cf26b5d Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Wed, 11 Mar 2026 10:44:55 -0600 Subject: [PATCH 01/11] Backend: Add RetryableBackend to retry IO One challenge here was ensuring that the right stats counter gets incremented when an IO is retried, or trying to prevent an exception being raised by the stats module from causing the IO to be retried. Parts of RetryableBackend may want to be brought up to Backend, like the update_stats functions. --- src/amd_detail/CMakeLists.txt | 1 + src/amd_detail/backend.cpp | 63 +++++++++++++++++++++++++++++ src/amd_detail/backend.h | 50 +++++++++++++++++++++++ src/amd_detail/backend/fastpath.cpp | 26 ++++++------ src/amd_detail/backend/fastpath.h | 9 +++-- src/amd_detail/state.cpp | 15 +++++-- 6 files changed, 145 insertions(+), 19 deletions(-) create mode 100644 src/amd_detail/backend.cpp diff --git a/src/amd_detail/CMakeLists.txt b/src/amd_detail/CMakeLists.txt index c264f630..1153b87a 100644 --- a/src/amd_detail/CMakeLists.txt +++ b/src/amd_detail/CMakeLists.txt @@ -7,6 +7,7 @@ set(HIPFILE_SOURCES "${HIPFILE_SRC_COMMON_PATH}/hipfile-common.cpp" async.cpp + backend.cpp backend/asyncop-fallback.cpp backend/memcpy-kernel.hip backend/fallback.cpp diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp new file mode 100644 index 00000000..361ab552 --- /dev/null +++ b/src/amd_detail/backend.cpp @@ -0,0 +1,63 @@ +/* Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + */ + +#include "backend.h" +#include "buffer.h" +#include "file.h" +#include "io.h" + +#include +#include +#include +#include +#include + +using namespace hipFile; + +ssize_t +RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) +{ + ssize_t nbytes{0}; + try { + nbytes = retryable_io(type, file, buffer, size, file_offset, buffer_offset); + } + catch (...) { + std::exception_ptr e_ptr = std::current_exception(); + if (is_retryable(e_ptr, nbytes)) { + nbytes = retry_backend->io(type, file, buffer, size, file_offset, buffer_offset); + } + else { + throw; + } + // For now, assume retry_backend will handle its own stats (which is true at this moment). + return nbytes; + } + switch (type) { + case (IoType::Read): + update_read_stats(nbytes); + break; + case (IoType::Write): + update_write_stats(nbytes); + break; + default: + break; + } + return nbytes; +} + +bool +RetryableBackend::is_retryable(std::exception_ptr e_ptr, ssize_t nbytes) const +{ + (void)e_ptr; + (void)nbytes; + return static_cast(retry_backend); +} + +void +RetryableBackend::register_retry_backend(std::shared_ptr backend) noexcept +{ + retry_backend = backend; +} diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index c2e040df..9da3adfe 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -58,4 +58,54 @@ struct Backend { hoff_t file_offset, hoff_t buffer_offset) = 0; }; +// This kind of Backend allows for an IO to be retried automatically in the +// event of an error. The RetryableBackend implements the logic to determine +// which errors are retryable, and which are not, in Backend::io(). +struct RetryableBackend : public Backend { + ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) override final; + + /// @brief Check if a failed IO operation is retryable. + /// + /// @param e_ptr Pointer to the thrown Exception by the failed IO + /// @param nbytes Number of bytes/error code returned by `retryable_io()` + /// + /// @note By default, RetryableBackend just checks if a Backend has been + /// registered for retrying an IO. + /// + /// @return True if this RetryableBackend can retry the IO, else False. + virtual bool is_retryable(std::exception_ptr e_ptr, ssize_t nbytes) const; + + /// @brief Register a Backend to call into to retry a failed IO operation. + /// + /// @param backend Backend to retry a failed IO operation. + void register_retry_backend(std::shared_ptr backend) noexcept; + + /// @brief Process an IO Request that can be resubmitted to another Backend if the IO fails. + /// + /// @param type IO type (read/write) + /// @param file File to read from or write to + /// @param buffer Buffer to write to or read from + /// @param size Number of bytes to transfer + /// @param file_offset Offset from the start of the file + /// @param buffer_offset Offset from the start of the buffer + /// + /// @return Number of bytes transferred, negative on error + /// + /// @throws Hip::RuntimeError Sys::RuntimeError + virtual ssize_t retryable_io(IoType type, std::shared_ptr file, std::shared_ptr buffer, + size_t size, hoff_t file_offset, hoff_t buffer_offset) = 0; + + // This maybe should be moved to Backend + /// @brief Update the read stats for this RetryableBackend + virtual void update_read_stats(ssize_t nbytes) = 0; + + // This maybe should be moved to Backend + /// @brief Update the write stats for this RetryableBackend + virtual void update_write_stats(ssize_t nbytes) = 0; + +protected: + std::shared_ptr retry_backend; +}; + } diff --git a/src/amd_detail/backend/fastpath.cpp b/src/amd_detail/backend/fastpath.cpp index aba587c8..a23a5a64 100644 --- a/src/amd_detail/backend/fastpath.cpp +++ b/src/amd_detail/backend/fastpath.cpp @@ -155,8 +155,8 @@ Fastpath::score(shared_ptr file, shared_ptr buffer, size_t size, } ssize_t -Fastpath::io(IoType type, shared_ptr file, shared_ptr buffer, size_t size, hoff_t file_offset, - hoff_t buffer_offset) +Fastpath::retryable_io(IoType type, shared_ptr file, shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) { void *devptr{reinterpret_cast(reinterpret_cast(buffer->getBuffer()) + buffer_offset)}; hipAmdFileHandle_t handle{}; @@ -184,15 +184,17 @@ Fastpath::io(IoType type, shared_ptr file, shared_ptr buffer, si default: throw std::runtime_error("Invalid IoType"); } - switch (type) { - case IoType::Read: - statsAddFastPathRead(nbytes); - break; - case IoType::Write: - statsAddFastPathWrite(nbytes); - break; - default: - break; - } return static_cast(nbytes); } + +void +Fastpath::update_read_stats(ssize_t nbytes) +{ + statsAddFastPathRead(static_cast(nbytes)); +} + +void +Fastpath::update_write_stats(ssize_t nbytes) +{ + statsAddFastPathWrite(static_cast(nbytes)); +} diff --git a/src/amd_detail/backend/fastpath.h b/src/amd_detail/backend/fastpath.h index 61f0915e..bbe92ece 100644 --- a/src/amd_detail/backend/fastpath.h +++ b/src/amd_detail/backend/fastpath.h @@ -9,6 +9,7 @@ #include "hipfile.h" #include +#include #include namespace hipFile { @@ -23,14 +24,16 @@ enum class IoType; namespace hipFile { -struct Fastpath : public Backend { +struct Fastpath : public RetryableBackend { virtual ~Fastpath() override = default; int score(std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset) const override; - ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, - hoff_t file_offset, hoff_t buffer_offset) override; + ssize_t retryable_io(IoType type, std::shared_ptr file, std::shared_ptr buffer, + size_t size, hoff_t file_offset, hoff_t buffer_offset) override; + void update_read_stats(ssize_t nbytes) override; + void update_write_stats(ssize_t nbytes) override; }; } diff --git a/src/amd_detail/state.cpp b/src/amd_detail/state.cpp index fd4aacfa..36965523 100644 --- a/src/amd_detail/state.cpp +++ b/src/amd_detail/state.cpp @@ -275,11 +275,18 @@ std::vector> DriverState::getBackends() const { static bool once = [&]() { - if (Context::get()->fastpath()) { - backends.emplace_back(new Fastpath{}); - } + std::shared_ptr fallback_backend; if (Context::get()->fallback()) { - backends.emplace_back(new Fallback{}); + fallback_backend = std::make_shared(); + backends.push_back(fallback_backend); + } + + if (Context::get()->fastpath()) { + auto new_backend = std::make_shared(); + if (fallback_backend) { + new_backend->register_retry_backend(fallback_backend); + } + backends.push_back(new_backend); } return true; }(); From c77c747c61d1e761d6cc06b1c5503c1874b795ef Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Fri, 13 Mar 2026 16:16:48 -0600 Subject: [PATCH 02/11] Test: Add new RetryableBackend unit tests Adds tests for the behaviour of the retry mechanism, as well as any default behaviour from the RetryableBackend. --- test/amd_detail/CMakeLists.txt | 1 + test/amd_detail/backend.cpp | 108 +++++++++++++++++++++++++++++++++ test/amd_detail/mbackend.h | 11 ++++ 3 files changed, 120 insertions(+) create mode 100644 test/amd_detail/backend.cpp diff --git a/test/amd_detail/CMakeLists.txt b/test/amd_detail/CMakeLists.txt index c44514f5..e42f17a9 100644 --- a/test/amd_detail/CMakeLists.txt +++ b/test/amd_detail/CMakeLists.txt @@ -12,6 +12,7 @@ set(SHARED_SOURCE_FILES set(TEST_SOURCE_FILES async.cpp + backend.cpp batch/batch.cpp configuration.cpp context.cpp diff --git a/test/amd_detail/backend.cpp b/test/amd_detail/backend.cpp new file mode 100644 index 00000000..3bfe85f3 --- /dev/null +++ b/test/amd_detail/backend.cpp @@ -0,0 +1,108 @@ +/* Copyright (c) Advanced Micro Devices, Inc. All rights reserved. + * + * SPDX-License-Identifier: MIT + */ + +#include "io.h" +#include "hipfile-test.h" +#include "hipfile-warnings.h" +#include "mbackend.h" +#include "mbuffer.h" +#include "mfile.h" + +#include +#include +#include + +using namespace hipFile; + +using ::testing::AnyNumber; +using ::testing::Invoke; +using ::testing::Return; +using ::testing::StrictMock; +using ::testing::Throw; + +// Put tests inside the macros to suppress the global constructor +// warnings +HIPFILE_WARN_NO_GLOBAL_CTOR_OFF + +struct DummyRetryableBackend : public MRetryableBackend {}; + +struct DummyFallbackBackend : MBackend {}; + +struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInterface { + std::shared_ptr> mock_buffer; + std::shared_ptr> mock_file; + + std::shared_ptr> default_backend; + std::shared_ptr> fallback_backend; + + IoType io_type; + ssize_t successful_io_size = 0x1234; + + void SetUp() override + { + mock_buffer = std::make_shared>(); + mock_file = std::make_shared>(); + + default_backend = std::make_shared>(); + // By default, use the default is_retryable() implementation unless overriden. + EXPECT_CALL(*default_backend, is_retryable) + .WillRepeatedly(Invoke([&](std::exception_ptr e_ptr, ssize_t nbytes) { + return default_backend->RetryableBackend::is_retryable(e_ptr, nbytes); + })); + EXPECT_CALL(*default_backend, retryable_io).Times(AnyNumber()); + EXPECT_CALL(*default_backend, update_read_stats).Times(AnyNumber()); + EXPECT_CALL(*default_backend, update_write_stats).Times(AnyNumber()); + + fallback_backend = std::make_shared>(); + EXPECT_CALL(*fallback_backend, io).Times(AnyNumber()); + + io_type = GetParam(); + } + + HipFileRetryableBackend() + { + } +}; + +TEST_P(HipFileRetryableBackend, IOSuccess) +{ + EXPECT_CALL(*default_backend, retryable_io).WillOnce(Return(successful_io_size)); + + ssize_t nbytes = default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0); + + ASSERT_EQ(nbytes, successful_io_size); +} + +TEST_P(HipFileRetryableBackend, IOFailureNoFallback) +{ + EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + + EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); +} + +TEST_P(HipFileRetryableBackend, IOFailureWithIneligibleRetry) +{ + // The Backend has registered a fallback, but has determined that the + // IO error should not be retried. + default_backend->register_retry_backend(fallback_backend); + EXPECT_CALL(*default_backend, is_retryable).WillOnce(Return(false)); + EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + + EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); +} + +TEST_P(HipFileRetryableBackend, IOFailureWithGoodFallback) +{ + default_backend->register_retry_backend(fallback_backend); + EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + EXPECT_CALL(*fallback_backend, io).WillOnce(Return(successful_io_size)); + + ssize_t nbytes = default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0); + ASSERT_EQ(nbytes, successful_io_size); +} + +INSTANTIATE_TEST_SUITE_P(, HipFileRetryableBackend, ::testing::Values(IoType::Read, IoType::Write)); + +HIPFILE_WARN_NO_GLOBAL_CTOR_ON diff --git a/test/amd_detail/mbackend.h b/test/amd_detail/mbackend.h index f4c395df..4880f6be 100644 --- a/test/amd_detail/mbackend.h +++ b/test/amd_detail/mbackend.h @@ -20,4 +20,15 @@ struct MBackend : Backend { (override)); }; +struct MRetryableBackend : RetryableBackend { + MOCK_METHOD(int, score, (std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), + (const, override)); + MOCK_METHOD(bool, is_retryable, (std::exception_ptr e_ptr, ssize_t nbytes), (const, override)); + MOCK_METHOD(ssize_t, retryable_io, + (IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset), + (override)); + MOCK_METHOD(void, update_read_stats, (ssize_t nbytes), (override)); + MOCK_METHOD(void, update_write_stats, (ssize_t nbytes), (override)); +}; } From 4c8aa694a9405763280ceea415f1d1415ae60158 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Fri, 13 Mar 2026 16:42:02 -0600 Subject: [PATCH 03/11] Rework default implementation of `is_retryable` This change passes in the original parameters of the IO request for `is_retryable()` to process. By default, we now check that the fallback IO engine will accept the IO request prior to submitting it. This is useful in cases where the Fallback backend is available, but for some reason the request is still invalid. It also removes a hack that was used for testing this scenario with "optionally" mocking `is_retryable()`. --- src/amd_detail/backend.cpp | 9 ++++++--- src/amd_detail/backend.h | 17 +++++++++++++---- test/amd_detail/backend.cpp | 8 ++------ test/amd_detail/mbackend.h | 1 - 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp index 361ab552..08583cc0 100644 --- a/src/amd_detail/backend.cpp +++ b/src/amd_detail/backend.cpp @@ -26,7 +26,7 @@ RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptrio(type, file, buffer, size, file_offset, buffer_offset); } else { @@ -49,11 +49,14 @@ RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptr file, + std::shared_ptr buffer, size_t size, hoff_t file_offset, + hoff_t buffer_offset) const { (void)e_ptr; (void)nbytes; - return static_cast(retry_backend); + return static_cast(retry_backend) && + retry_backend->score(file, buffer, size, file_offset, buffer_offset) >= 0; } void diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index 9da3adfe..631d6cee 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -67,14 +67,23 @@ struct RetryableBackend : public Backend { /// @brief Check if a failed IO operation is retryable. /// - /// @param e_ptr Pointer to the thrown Exception by the failed IO - /// @param nbytes Number of bytes/error code returned by `retryable_io()` + /// @param e_ptr Pointer to the thrown Exception by the failed IO + /// @param nbytes Return value from `retryable_io`, or 0 if an exception was thrown. + /// @param file File to read from or write to + /// @param buffer Buffer to write to or read from + /// @param size Number of bytes to transfer + /// @param file_offset Offset from the start of the file + /// @param buffer_offset Offset from the start of the buffer /// /// @note By default, RetryableBackend just checks if a Backend has been - /// registered for retrying an IO. + /// registered for retrying an IO, and that backend supports the + /// the request. + /// @note The parameters from the original IO request are passed to this function. /// /// @return True if this RetryableBackend can retry the IO, else False. - virtual bool is_retryable(std::exception_ptr e_ptr, ssize_t nbytes) const; + virtual bool is_retryable(std::exception_ptr e_ptr, ssize_t nbytes, std::shared_ptr file, + std::shared_ptr buffer, size_t size, hoff_t file_offset, + hoff_t buffer_offset) const; /// @brief Register a Backend to call into to retry a failed IO operation. /// diff --git a/test/amd_detail/backend.cpp b/test/amd_detail/backend.cpp index 3bfe85f3..27fdee16 100644 --- a/test/amd_detail/backend.cpp +++ b/test/amd_detail/backend.cpp @@ -46,17 +46,13 @@ struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInt mock_file = std::make_shared>(); default_backend = std::make_shared>(); - // By default, use the default is_retryable() implementation unless overriden. - EXPECT_CALL(*default_backend, is_retryable) - .WillRepeatedly(Invoke([&](std::exception_ptr e_ptr, ssize_t nbytes) { - return default_backend->RetryableBackend::is_retryable(e_ptr, nbytes); - })); EXPECT_CALL(*default_backend, retryable_io).Times(AnyNumber()); EXPECT_CALL(*default_backend, update_read_stats).Times(AnyNumber()); EXPECT_CALL(*default_backend, update_write_stats).Times(AnyNumber()); fallback_backend = std::make_shared>(); EXPECT_CALL(*fallback_backend, io).Times(AnyNumber()); + EXPECT_CALL(*fallback_backend, score).WillRepeatedly(Return(0)); io_type = GetParam(); } @@ -87,8 +83,8 @@ TEST_P(HipFileRetryableBackend, IOFailureWithIneligibleRetry) // The Backend has registered a fallback, but has determined that the // IO error should not be retried. default_backend->register_retry_backend(fallback_backend); - EXPECT_CALL(*default_backend, is_retryable).WillOnce(Return(false)); EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + EXPECT_CALL(*fallback_backend, score).WillOnce(Return(-1)); EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); } diff --git a/test/amd_detail/mbackend.h b/test/amd_detail/mbackend.h index 4880f6be..48c64517 100644 --- a/test/amd_detail/mbackend.h +++ b/test/amd_detail/mbackend.h @@ -23,7 +23,6 @@ struct MBackend : Backend { struct MRetryableBackend : RetryableBackend { MOCK_METHOD(int, score, (std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), (const, override)); - MOCK_METHOD(bool, is_retryable, (std::exception_ptr e_ptr, ssize_t nbytes), (const, override)); MOCK_METHOD(ssize_t, retryable_io, (IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset), From 722d723948ee200664d666122852c68ef2216d23 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Mon, 16 Mar 2026 14:22:33 -0600 Subject: [PATCH 04/11] Backend: Use a common method across all Backends for IO implementation. Note: retryable_io() -> _io_impl() Formatter moved the order of some functions around. Previously, a "RetryableBackend" would use io() to wrap the fallback mechanism before calling retryable_io() to actually perform the request. Now, every Backend will use _io_impl() which will be responsible for handling the IO request, leaving io() as the public front-end. --- src/amd_detail/backend.cpp | 2 +- src/amd_detail/backend.h | 38 +++++++++++++++++------------ src/amd_detail/backend/fallback.cpp | 18 ++++++++++++-- src/amd_detail/backend/fallback.h | 8 ++++-- src/amd_detail/backend/fastpath.cpp | 28 ++++++++++----------- src/amd_detail/backend/fastpath.h | 15 ++++++------ test/amd_detail/backend.cpp | 10 ++++---- test/amd_detail/mbackend.h | 10 +++++--- 8 files changed, 79 insertions(+), 50 deletions(-) diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp index 08583cc0..44c247b7 100644 --- a/src/amd_detail/backend.cpp +++ b/src/amd_detail/backend.cpp @@ -22,7 +22,7 @@ RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptr file, std::shared_ptr buffer, size_t size, - hoff_t file_offset, hoff_t buffer_offset) = 0; + hoff_t file_offset, hoff_t buffer_offset) + { + return _io_impl(type, file, buffer, size, file_offset, buffer_offset); + } + +protected: + /// @brief Perform a read or write operation + /// + /// @note Provides a common target across all Backends that provides the + /// implementation for running IO. + /// @param type IO type (read/write) + /// @param file File to read from or write to + /// @param buffer Buffer to write to or read from + /// @param size Number of bytes to transfer + /// @param file_offset Offset from the start of the file + /// @param buffer_offset Offset from the start of the buffer + /// + /// @return Number of bytes transferred, negative on error + /// + /// @throws Hip::RuntimeError Sys::RuntimeError + virtual ssize_t _io_impl(IoType type, std::shared_ptr file, std::shared_ptr buffer, + size_t size, hoff_t file_offset, hoff_t buffer_offset) = 0; }; // This kind of Backend allows for an IO to be retried automatically in the @@ -90,21 +111,6 @@ struct RetryableBackend : public Backend { /// @param backend Backend to retry a failed IO operation. void register_retry_backend(std::shared_ptr backend) noexcept; - /// @brief Process an IO Request that can be resubmitted to another Backend if the IO fails. - /// - /// @param type IO type (read/write) - /// @param file File to read from or write to - /// @param buffer Buffer to write to or read from - /// @param size Number of bytes to transfer - /// @param file_offset Offset from the start of the file - /// @param buffer_offset Offset from the start of the buffer - /// - /// @return Number of bytes transferred, negative on error - /// - /// @throws Hip::RuntimeError Sys::RuntimeError - virtual ssize_t retryable_io(IoType type, std::shared_ptr file, std::shared_ptr buffer, - size_t size, hoff_t file_offset, hoff_t buffer_offset) = 0; - // This maybe should be moved to Backend /// @brief Update the read stats for this RetryableBackend virtual void update_read_stats(ssize_t nbytes) = 0; diff --git a/src/amd_detail/backend/fallback.cpp b/src/amd_detail/backend/fallback.cpp index 1d2caa73..26ba8b08 100644 --- a/src/amd_detail/backend/fallback.cpp +++ b/src/amd_detail/backend/fallback.cpp @@ -54,12 +54,26 @@ ssize_t Fallback::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset) { - return io(type, file, buffer, size, file_offset, buffer_offset, DefaultChunkSize); + return _io_impl(type, file, buffer, size, file_offset, buffer_offset, DefaultChunkSize); } ssize_t -Fallback::io(IoType io_type, shared_ptr file, shared_ptr buffer, size_t size, +Fallback::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset, size_t chunk_size) +{ + return _io_impl(type, file, buffer, size, file_offset, buffer_offset, chunk_size); +} + +ssize_t +Fallback::_io_impl(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) +{ + return _io_impl(type, file, buffer, size, file_offset, buffer_offset, DefaultChunkSize); +} + +ssize_t +Fallback::_io_impl(IoType io_type, shared_ptr file, shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset, size_t chunk_size) { size = min(size, hipFile::MAX_RW_COUNT); diff --git a/src/amd_detail/backend/fallback.h b/src/amd_detail/backend/fallback.h index 14e3b483..24f05cad 100644 --- a/src/amd_detail/backend/fallback.h +++ b/src/amd_detail/backend/fallback.h @@ -40,10 +40,14 @@ struct Fallback : public Backend { // Once we can import gtest.h and make test suites or test friends everything // below here should be made protected. - // protected: - ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset, size_t chunk_size); + +protected: + ssize_t _io_impl(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) override; + ssize_t _io_impl(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset, size_t chunk_size); }; } diff --git a/src/amd_detail/backend/fastpath.cpp b/src/amd_detail/backend/fastpath.cpp index a23a5a64..fa06eeac 100644 --- a/src/amd_detail/backend/fastpath.cpp +++ b/src/amd_detail/backend/fastpath.cpp @@ -154,9 +154,21 @@ Fastpath::score(shared_ptr file, shared_ptr buffer, size_t size, return accept_io ? 100 : -1; } +void +Fastpath::update_read_stats(ssize_t nbytes) +{ + statsAddFastPathRead(static_cast(nbytes)); +} + +void +Fastpath::update_write_stats(ssize_t nbytes) +{ + statsAddFastPathWrite(static_cast(nbytes)); +} + ssize_t -Fastpath::retryable_io(IoType type, shared_ptr file, shared_ptr buffer, size_t size, - hoff_t file_offset, hoff_t buffer_offset) +Fastpath::_io_impl(IoType type, shared_ptr file, shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) { void *devptr{reinterpret_cast(reinterpret_cast(buffer->getBuffer()) + buffer_offset)}; hipAmdFileHandle_t handle{}; @@ -186,15 +198,3 @@ Fastpath::retryable_io(IoType type, shared_ptr file, shared_ptr } return static_cast(nbytes); } - -void -Fastpath::update_read_stats(ssize_t nbytes) -{ - statsAddFastPathRead(static_cast(nbytes)); -} - -void -Fastpath::update_write_stats(ssize_t nbytes) -{ - statsAddFastPathWrite(static_cast(nbytes)); -} diff --git a/src/amd_detail/backend/fastpath.h b/src/amd_detail/backend/fastpath.h index bbe92ece..c798821d 100644 --- a/src/amd_detail/backend/fastpath.h +++ b/src/amd_detail/backend/fastpath.h @@ -27,13 +27,14 @@ namespace hipFile { struct Fastpath : public RetryableBackend { virtual ~Fastpath() override = default; - int score(std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, - hoff_t buffer_offset) const override; - - ssize_t retryable_io(IoType type, std::shared_ptr file, std::shared_ptr buffer, - size_t size, hoff_t file_offset, hoff_t buffer_offset) override; - void update_read_stats(ssize_t nbytes) override; - void update_write_stats(ssize_t nbytes) override; + int score(std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, + hoff_t buffer_offset) const override; + void update_read_stats(ssize_t nbytes) override; + void update_write_stats(ssize_t nbytes) override; + +protected: + ssize_t _io_impl(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) override; }; } diff --git a/test/amd_detail/backend.cpp b/test/amd_detail/backend.cpp index 27fdee16..df6b5b54 100644 --- a/test/amd_detail/backend.cpp +++ b/test/amd_detail/backend.cpp @@ -46,7 +46,7 @@ struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInt mock_file = std::make_shared>(); default_backend = std::make_shared>(); - EXPECT_CALL(*default_backend, retryable_io).Times(AnyNumber()); + EXPECT_CALL(*default_backend, _io_impl).Times(AnyNumber()); EXPECT_CALL(*default_backend, update_read_stats).Times(AnyNumber()); EXPECT_CALL(*default_backend, update_write_stats).Times(AnyNumber()); @@ -64,7 +64,7 @@ struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInt TEST_P(HipFileRetryableBackend, IOSuccess) { - EXPECT_CALL(*default_backend, retryable_io).WillOnce(Return(successful_io_size)); + EXPECT_CALL(*default_backend, _io_impl).WillOnce(Return(successful_io_size)); ssize_t nbytes = default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0); @@ -73,7 +73,7 @@ TEST_P(HipFileRetryableBackend, IOSuccess) TEST_P(HipFileRetryableBackend, IOFailureNoFallback) { - EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + EXPECT_CALL(*default_backend, _io_impl).WillOnce(Throw(std::runtime_error("IO failure"))); EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); } @@ -83,7 +83,7 @@ TEST_P(HipFileRetryableBackend, IOFailureWithIneligibleRetry) // The Backend has registered a fallback, but has determined that the // IO error should not be retried. default_backend->register_retry_backend(fallback_backend); - EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + EXPECT_CALL(*default_backend, _io_impl).WillOnce(Throw(std::runtime_error("IO failure"))); EXPECT_CALL(*fallback_backend, score).WillOnce(Return(-1)); EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); @@ -92,7 +92,7 @@ TEST_P(HipFileRetryableBackend, IOFailureWithIneligibleRetry) TEST_P(HipFileRetryableBackend, IOFailureWithGoodFallback) { default_backend->register_retry_backend(fallback_backend); - EXPECT_CALL(*default_backend, retryable_io).WillOnce(Throw(std::runtime_error("IO failure"))); + EXPECT_CALL(*default_backend, _io_impl).WillOnce(Throw(std::runtime_error("IO failure"))); EXPECT_CALL(*fallback_backend, io).WillOnce(Return(successful_io_size)); ssize_t nbytes = default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0); diff --git a/test/amd_detail/mbackend.h b/test/amd_detail/mbackend.h index 48c64517..0cd3697f 100644 --- a/test/amd_detail/mbackend.h +++ b/test/amd_detail/mbackend.h @@ -18,16 +18,20 @@ struct MBackend : Backend { (hipFile::IoType type, std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), (override)); + MOCK_METHOD(ssize_t, _io_impl, + (hipFile::IoType type, std::shared_ptr, std::shared_ptr, size_t, hoff_t, + hoff_t), + (override)); }; struct MRetryableBackend : RetryableBackend { MOCK_METHOD(int, score, (std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), (const, override)); - MOCK_METHOD(ssize_t, retryable_io, + MOCK_METHOD(void, update_read_stats, (ssize_t nbytes), (override)); + MOCK_METHOD(void, update_write_stats, (ssize_t nbytes), (override)); + MOCK_METHOD(ssize_t, _io_impl, (IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset), (override)); - MOCK_METHOD(void, update_read_stats, (ssize_t nbytes), (override)); - MOCK_METHOD(void, update_write_stats, (ssize_t nbytes), (override)); }; } From 8d2cf8233f3977b0e6a0cb911fcf7321afb89959 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Mon, 16 Mar 2026 15:01:13 -0600 Subject: [PATCH 05/11] Backend: Rename RetryableBackend "Retryable" was considered to be a misnomer in this context. Instead of retrying the IO with the same backend, we were resubmitting the IO to a different Backend. While "BackendWithFallback" is not exactly a great name, its something we can change later. --- src/amd_detail/backend.cpp | 23 ++++++++++----------- src/amd_detail/backend.h | 33 +++++++++++++++---------------- src/amd_detail/backend/fastpath.h | 2 +- src/amd_detail/state.cpp | 2 +- test/amd_detail/backend.cpp | 26 ++++++++++++------------ test/amd_detail/mbackend.h | 2 +- 6 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp index 44c247b7..1b6d2e31 100644 --- a/src/amd_detail/backend.cpp +++ b/src/amd_detail/backend.cpp @@ -17,8 +17,8 @@ using namespace hipFile; ssize_t -RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, - hoff_t file_offset, hoff_t buffer_offset) +BackendWithFallback::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, + size_t size, hoff_t file_offset, hoff_t buffer_offset) { ssize_t nbytes{0}; try { @@ -26,8 +26,8 @@ RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptrio(type, file, buffer, size, file_offset, buffer_offset); + if (is_fallback_eligible(e_ptr, nbytes, file, buffer, size, file_offset, buffer_offset)) { + nbytes = fallback_backend->io(type, file, buffer, size, file_offset, buffer_offset); } else { throw; @@ -49,18 +49,19 @@ RetryableBackend::io(IoType type, std::shared_ptr file, std::shared_ptr file, - std::shared_ptr buffer, size_t size, hoff_t file_offset, - hoff_t buffer_offset) const +BackendWithFallback::is_fallback_eligible(std::exception_ptr e_ptr, ssize_t nbytes, + std::shared_ptr file, std::shared_ptr buffer, + size_t size, hoff_t file_offset, + hoff_t buffer_offset) const { (void)e_ptr; (void)nbytes; - return static_cast(retry_backend) && - retry_backend->score(file, buffer, size, file_offset, buffer_offset) >= 0; + return static_cast(fallback_backend) && + fallback_backend->score(file, buffer, size, file_offset, buffer_offset) >= 0; } void -RetryableBackend::register_retry_backend(std::shared_ptr backend) noexcept +BackendWithFallback::register_fallback_backend(std::shared_ptr backend) noexcept { - retry_backend = backend; + fallback_backend = backend; } diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index d7483117..2c53ca11 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -79,48 +79,47 @@ struct Backend { size_t size, hoff_t file_offset, hoff_t buffer_offset) = 0; }; -// This kind of Backend allows for an IO to be retried automatically in the -// event of an error. The RetryableBackend implements the logic to determine -// which errors are retryable, and which are not, in Backend::io(). -struct RetryableBackend : public Backend { +// BackendWithFallback allows for an IO to be retried automatically with a +// different Backend in the event of an error. +struct BackendWithFallback : public Backend { ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset) override final; - /// @brief Check if a failed IO operation is retryable. + /// @brief Check if a failed IO operation can be re-issued to the fallback Backend. /// /// @param e_ptr Pointer to the thrown Exception by the failed IO - /// @param nbytes Return value from `retryable_io`, or 0 if an exception was thrown. + /// @param nbytes Return value from `_io_impl`, or 0 if an exception was thrown. /// @param file File to read from or write to /// @param buffer Buffer to write to or read from /// @param size Number of bytes to transfer /// @param file_offset Offset from the start of the file /// @param buffer_offset Offset from the start of the buffer /// - /// @note By default, RetryableBackend just checks if a Backend has been - /// registered for retrying an IO, and that backend supports the + /// @note By default, BackendWithFallback checks if a Backend has been + /// registered for retrying an IO, and that fallback backend supports /// the request. /// @note The parameters from the original IO request are passed to this function. /// - /// @return True if this RetryableBackend can retry the IO, else False. - virtual bool is_retryable(std::exception_ptr e_ptr, ssize_t nbytes, std::shared_ptr file, - std::shared_ptr buffer, size_t size, hoff_t file_offset, - hoff_t buffer_offset) const; + /// @return True if this BackendWithFallback can retry the IO, else False. + virtual bool is_fallback_eligible(std::exception_ptr e_ptr, ssize_t nbytes, std::shared_ptr file, + std::shared_ptr buffer, size_t size, hoff_t file_offset, + hoff_t buffer_offset) const; - /// @brief Register a Backend to call into to retry a failed IO operation. + /// @brief Register a Backend to retry a failed IO operation. /// /// @param backend Backend to retry a failed IO operation. - void register_retry_backend(std::shared_ptr backend) noexcept; + void register_fallback_backend(std::shared_ptr backend) noexcept; // This maybe should be moved to Backend - /// @brief Update the read stats for this RetryableBackend + /// @brief Update the read stats for this BackendWithFallback virtual void update_read_stats(ssize_t nbytes) = 0; // This maybe should be moved to Backend - /// @brief Update the write stats for this RetryableBackend + /// @brief Update the write stats for this BackendWithFallback virtual void update_write_stats(ssize_t nbytes) = 0; protected: - std::shared_ptr retry_backend; + std::shared_ptr fallback_backend; }; } diff --git a/src/amd_detail/backend/fastpath.h b/src/amd_detail/backend/fastpath.h index c798821d..87e72433 100644 --- a/src/amd_detail/backend/fastpath.h +++ b/src/amd_detail/backend/fastpath.h @@ -24,7 +24,7 @@ enum class IoType; namespace hipFile { -struct Fastpath : public RetryableBackend { +struct Fastpath : public BackendWithFallback { virtual ~Fastpath() override = default; int score(std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, diff --git a/src/amd_detail/state.cpp b/src/amd_detail/state.cpp index 36965523..19cb29aa 100644 --- a/src/amd_detail/state.cpp +++ b/src/amd_detail/state.cpp @@ -284,7 +284,7 @@ DriverState::getBackends() const if (Context::get()->fastpath()) { auto new_backend = std::make_shared(); if (fallback_backend) { - new_backend->register_retry_backend(fallback_backend); + new_backend->register_fallback_backend(fallback_backend); } backends.push_back(new_backend); } diff --git a/test/amd_detail/backend.cpp b/test/amd_detail/backend.cpp index df6b5b54..c167d36c 100644 --- a/test/amd_detail/backend.cpp +++ b/test/amd_detail/backend.cpp @@ -26,16 +26,16 @@ using ::testing::Throw; // warnings HIPFILE_WARN_NO_GLOBAL_CTOR_OFF -struct DummyRetryableBackend : public MRetryableBackend {}; +struct DummyBackendWithFallback : public MBackendWithFallback {}; struct DummyFallbackBackend : MBackend {}; -struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInterface { +struct HipFileBackendWithFallback : public HipFileUnopened, ::testing::WithParamInterface { std::shared_ptr> mock_buffer; std::shared_ptr> mock_file; - std::shared_ptr> default_backend; - std::shared_ptr> fallback_backend; + std::shared_ptr> default_backend; + std::shared_ptr> fallback_backend; IoType io_type; ssize_t successful_io_size = 0x1234; @@ -45,7 +45,7 @@ struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInt mock_buffer = std::make_shared>(); mock_file = std::make_shared>(); - default_backend = std::make_shared>(); + default_backend = std::make_shared>(); EXPECT_CALL(*default_backend, _io_impl).Times(AnyNumber()); EXPECT_CALL(*default_backend, update_read_stats).Times(AnyNumber()); EXPECT_CALL(*default_backend, update_write_stats).Times(AnyNumber()); @@ -57,12 +57,12 @@ struct HipFileRetryableBackend : public HipFileUnopened, ::testing::WithParamInt io_type = GetParam(); } - HipFileRetryableBackend() + HipFileBackendWithFallback() { } }; -TEST_P(HipFileRetryableBackend, IOSuccess) +TEST_P(HipFileBackendWithFallback, IOSuccess) { EXPECT_CALL(*default_backend, _io_impl).WillOnce(Return(successful_io_size)); @@ -71,27 +71,27 @@ TEST_P(HipFileRetryableBackend, IOSuccess) ASSERT_EQ(nbytes, successful_io_size); } -TEST_P(HipFileRetryableBackend, IOFailureNoFallback) +TEST_P(HipFileBackendWithFallback, IOFailureNoFallback) { EXPECT_CALL(*default_backend, _io_impl).WillOnce(Throw(std::runtime_error("IO failure"))); EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); } -TEST_P(HipFileRetryableBackend, IOFailureWithIneligibleRetry) +TEST_P(HipFileBackendWithFallback, IOFailureWithIneligibleRetry) { // The Backend has registered a fallback, but has determined that the // IO error should not be retried. - default_backend->register_retry_backend(fallback_backend); + default_backend->register_fallback_backend(fallback_backend); EXPECT_CALL(*default_backend, _io_impl).WillOnce(Throw(std::runtime_error("IO failure"))); EXPECT_CALL(*fallback_backend, score).WillOnce(Return(-1)); EXPECT_THROW(default_backend->io(io_type, mock_file, mock_buffer, 0, 0, 0), std::runtime_error); } -TEST_P(HipFileRetryableBackend, IOFailureWithGoodFallback) +TEST_P(HipFileBackendWithFallback, IOFailureWithGoodFallback) { - default_backend->register_retry_backend(fallback_backend); + default_backend->register_fallback_backend(fallback_backend); EXPECT_CALL(*default_backend, _io_impl).WillOnce(Throw(std::runtime_error("IO failure"))); EXPECT_CALL(*fallback_backend, io).WillOnce(Return(successful_io_size)); @@ -99,6 +99,6 @@ TEST_P(HipFileRetryableBackend, IOFailureWithGoodFallback) ASSERT_EQ(nbytes, successful_io_size); } -INSTANTIATE_TEST_SUITE_P(, HipFileRetryableBackend, ::testing::Values(IoType::Read, IoType::Write)); +INSTANTIATE_TEST_SUITE_P(, HipFileBackendWithFallback, ::testing::Values(IoType::Read, IoType::Write)); HIPFILE_WARN_NO_GLOBAL_CTOR_ON diff --git a/test/amd_detail/mbackend.h b/test/amd_detail/mbackend.h index 0cd3697f..b74ea753 100644 --- a/test/amd_detail/mbackend.h +++ b/test/amd_detail/mbackend.h @@ -24,7 +24,7 @@ struct MBackend : Backend { (override)); }; -struct MRetryableBackend : RetryableBackend { +struct MBackendWithFallback : BackendWithFallback { MOCK_METHOD(int, score, (std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), (const, override)); MOCK_METHOD(void, update_read_stats, (ssize_t nbytes), (override)); From b3a3330f6d00d72019b29f2d1820213b1cb82b82 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Mon, 16 Mar 2026 16:02:50 -0600 Subject: [PATCH 06/11] Backend: Move update_stats_X to Backend This was originally written to BackendWithFallback. However, this could be further generalized to all Backends. _io_impl() is then only concerned with issuing the IO, letting the io() method do any additional processing. --- src/amd_detail/backend.cpp | 19 ++++++++++++++++++- src/amd_detail/backend.h | 23 +++++++++++------------ src/amd_detail/backend/fallback.cpp | 22 ++++++++++++---------- src/amd_detail/backend/fallback.h | 4 ++++ test/amd_detail/mbackend.h | 2 ++ 5 files changed, 47 insertions(+), 23 deletions(-) diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp index 1b6d2e31..f5531fa7 100644 --- a/src/amd_detail/backend.cpp +++ b/src/amd_detail/backend.cpp @@ -16,6 +16,24 @@ using namespace hipFile; +ssize_t +Backend::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, + hoff_t file_offset, hoff_t buffer_offset) +{ + ssize_t nbytes = _io_impl(type, file, buffer, size, file_offset, buffer_offset); + switch (type) { + case (IoType::Read): + update_read_stats(nbytes); + break; + case (IoType::Write): + update_write_stats(nbytes); + break; + default: + break; + } + return nbytes; +} + ssize_t BackendWithFallback::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset) @@ -32,7 +50,6 @@ BackendWithFallback::io(IoType type, std::shared_ptr file, std::shared_pt else { throw; } - // For now, assume retry_backend will handle its own stats (which is true at this moment). return nbytes; } switch (type) { diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index 2c53ca11..60717131 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -55,10 +55,17 @@ struct Backend { /// /// @throws Hip::RuntimeError Sys::RuntimeError virtual ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, - hoff_t file_offset, hoff_t buffer_offset) - { - return _io_impl(type, file, buffer, size, file_offset, buffer_offset); - } + hoff_t file_offset, hoff_t buffer_offset); + + /// @brief Update the read stats for this Backend + /// + /// @param nbytes Number of bytes read + virtual void update_read_stats(ssize_t nbytes) = 0; + + /// @brief Update the write stats for this Backend + /// + /// @param nbytes Number of bytes written + virtual void update_write_stats(ssize_t nbytes) = 0; protected: /// @brief Perform a read or write operation @@ -110,14 +117,6 @@ struct BackendWithFallback : public Backend { /// @param backend Backend to retry a failed IO operation. void register_fallback_backend(std::shared_ptr backend) noexcept; - // This maybe should be moved to Backend - /// @brief Update the read stats for this BackendWithFallback - virtual void update_read_stats(ssize_t nbytes) = 0; - - // This maybe should be moved to Backend - /// @brief Update the write stats for this BackendWithFallback - virtual void update_write_stats(ssize_t nbytes) = 0; - protected: std::shared_ptr fallback_backend; }; diff --git a/src/amd_detail/backend/fallback.cpp b/src/amd_detail/backend/fallback.cpp index 26ba8b08..89bce478 100644 --- a/src/amd_detail/backend/fallback.cpp +++ b/src/amd_detail/backend/fallback.cpp @@ -129,19 +129,21 @@ Fallback::_io_impl(IoType io_type, shared_ptr file, shared_ptr b } } while (static_cast(total_io_bytes) < size); - switch (io_type) { - case IoType::Read: - statsAddFallbackPathRead(static_cast(total_io_bytes)); - break; - case IoType::Write: - statsAddFallbackPathWrite(static_cast(total_io_bytes)); - break; - default: - break; - } return total_io_bytes; } +void +Fallback::update_read_stats(ssize_t nbytes) +{ + statsAddFallbackPathRead(static_cast(nbytes)); +} + +void +Fallback::update_write_stats(ssize_t nbytes) +{ + statsAddFallbackPathWrite(static_cast(nbytes)); +} + void Fallback::async_io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t *size_p, hoff_t *file_offset_p, hoff_t *buffer_offset_p, ssize_t *bytes_transferred_p, diff --git a/src/amd_detail/backend/fallback.h b/src/amd_detail/backend/fallback.h index 24f05cad..d35c068a 100644 --- a/src/amd_detail/backend/fallback.h +++ b/src/amd_detail/backend/fallback.h @@ -34,6 +34,10 @@ struct Fallback : public Backend { ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset) override; + void update_read_stats(ssize_t nbytes) override; + + void update_write_stats(ssize_t nbytes) override; + void async_io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t *size_p, hoff_t *file_offset_p, hoff_t *buffer_offset_p, ssize_t *bytes_transferred_p, std::shared_ptr stream); diff --git a/test/amd_detail/mbackend.h b/test/amd_detail/mbackend.h index b74ea753..d544e0b2 100644 --- a/test/amd_detail/mbackend.h +++ b/test/amd_detail/mbackend.h @@ -18,6 +18,8 @@ struct MBackend : Backend { (hipFile::IoType type, std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), (override)); + MOCK_METHOD(void, update_read_stats, (ssize_t nbytes), (override)); + MOCK_METHOD(void, update_write_stats, (ssize_t nbytes), (override)); MOCK_METHOD(ssize_t, _io_impl, (hipFile::IoType type, std::shared_ptr, std::shared_ptr, size_t, hoff_t, hoff_t), From 62db64a50e4fd13c243fcbf1a1d283f6a3b3af52 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Wed, 18 Mar 2026 11:31:07 -0600 Subject: [PATCH 07/11] Fastpath: Create integration test for fallback mechanism --- test/amd_detail/fastpath.cpp | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/test/amd_detail/fastpath.cpp b/test/amd_detail/fastpath.cpp index 258f29d0..6559e46b 100644 --- a/test/amd_detail/fastpath.cpp +++ b/test/amd_detail/fastpath.cpp @@ -3,6 +3,7 @@ * SPDX-License-Identifier: MIT */ +#include "backend/fallback.h" #include "backend/fastpath.h" #include "hip.h" #include "hipfile.h" @@ -12,10 +13,12 @@ #include "mbuffer.h" #include "mfile.h" #include "mhip.h" +#include "msys.h" #include #include #include +#include #include #include #include @@ -25,6 +28,7 @@ #include #include #include +#include using namespace hipFile; using namespace testing; @@ -555,4 +559,73 @@ TEST_P(FastpathIoParam, IoSizeIsTruncatedToMaxRWCount) INSTANTIATE_TEST_SUITE_P(FastpathTest, FastpathIoParam, Values(IoType::Read, IoType::Write)); +struct FastpathIoParamWithFallback : public FastpathTestBase, + public TestWithParam> { + inline IoType _get_param_io_type() const + { + return std::get<0>(GetParam()); + } + + inline const std::exception_ptr _get_param_exc_ptr() const + { + return std::get<1>(GetParam()); + } +}; + +// The Fastpath can throw a few different kinds of derived std::runtime_errors. +TEST_P(FastpathIoParamWithFallback, IntegrationRunWithFallback) +{ + StrictMock mhip; + StrictMock msys; + + auto fallback_backend = std::make_shared>(); + auto fastpath_backend = std::make_shared>(); + fastpath_backend->register_fallback_backend(fallback_backend); + + const int DEFAULT_BUFFERED_FD = DEFAULT_UNBUFFERED_FD.value() + 1; + + // Called by both Fastpath and Fallback + EXPECT_CALL(*mbuffer, getBuffer).WillRepeatedly(Return(DEFAULT_BUFFER_ADDR)); + EXPECT_CALL(*mbuffer, getLength).Times(2).WillRepeatedly(Return(DEFAULT_BUFFER_LENGTH)); + // Called only by Fastpath + EXPECT_CALL(*mfile, getUnbufferedFd).WillOnce(Return(DEFAULT_UNBUFFERED_FD)); + // Called only by Fallback + EXPECT_CALL(*mbuffer, getType).WillOnce(Return(hipMemoryTypeDevice)); + EXPECT_CALL(*mfile, getBufferedFd).WillRepeatedly(Return(DEFAULT_BUFFERED_FD)); + EXPECT_CALL(mhip, hipMemcpy).WillRepeatedly(Return()); + EXPECT_CALL(msys, mmap).WillOnce(Return(reinterpret_cast(0x12345678))); + EXPECT_CALL(msys, munmap).WillOnce(Return()); + switch (_get_param_io_type()) { + case IoType::Read: + // Called by Fastpath + EXPECT_CALL(mhip, hipAmdFileRead).WillOnce(Rethrow(_get_param_exc_ptr())); + // Called by Fallback + EXPECT_CALL(msys, pread).WillRepeatedly(ReturnArg<2>()); + break; + case IoType::Write: + // Called by Fastpath + EXPECT_CALL(mhip, hipAmdFileWrite).WillOnce(Rethrow(_get_param_exc_ptr())); + // Called by Fallback + EXPECT_CALL(mhip, hipStreamSynchronize).WillRepeatedly(Return()); + EXPECT_CALL(msys, fdatasync).WillRepeatedly(Return()); + EXPECT_CALL(msys, pwrite).WillRepeatedly(ReturnArg<2>()); + break; + default: + FAIL() << "Invalid IoType"; + } + + ssize_t num_bytes = fastpath_backend->io(_get_param_io_type(), mfile, mbuffer, DEFAULT_IO_SIZE, 0, 0); + ASSERT_EQ(num_bytes, DEFAULT_IO_SIZE); +} + +// Using std::exception_ptr is more straightforward here than storing a pointer +// to a derived std::exception type, which would require careful handling when +// setting expectations. Note that Throw() does not accept std::exception_ptr, +// but the public (though undocumented) Rethrow() action does support it. +INSTANTIATE_TEST_SUITE_P( + FastpathTest, FastpathIoParamWithFallback, + Combine(Values(IoType::Read, IoType::Write), + Values(std::make_exception_ptr(Hip::RuntimeError(hipErrorNoDevice)), + std::make_exception_ptr(std::system_error(make_error_code(errc::no_such_device)))))); + HIPFILE_WARN_NO_GLOBAL_CTOR_ON From c8a987d18c967d71671bf08c83b390481d4b8079 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Wed, 18 Mar 2026 12:45:30 -0600 Subject: [PATCH 08/11] Fastpath: Add integration test for tracking what exception gets thrown. --- test/amd_detail/fastpath.cpp | 48 ++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/test/amd_detail/fastpath.cpp b/test/amd_detail/fastpath.cpp index 6559e46b..ec3a324a 100644 --- a/test/amd_detail/fastpath.cpp +++ b/test/amd_detail/fastpath.cpp @@ -618,6 +618,54 @@ TEST_P(FastpathIoParamWithFallback, IntegrationRunWithFallback) ASSERT_EQ(num_bytes, DEFAULT_IO_SIZE); } +// If the fallback backend rejects the IO, the original exception from +// Fastpath should be raised. +TEST_P(FastpathIoParamWithFallback, IntegrationFallbackRejectsIO) +{ + StrictMock mhip; + StrictMock msys; + + auto fallback_backend = std::make_shared>(); + auto fastpath_backend = std::make_shared>(); + fastpath_backend->register_fallback_backend(fallback_backend); + + // Called only by Fastpath + EXPECT_CALL(*mbuffer, getBuffer).WillOnce(Return(DEFAULT_BUFFER_ADDR)); + EXPECT_CALL(*mbuffer, getLength).WillOnce(Return(DEFAULT_BUFFER_LENGTH)); + EXPECT_CALL(*mfile, getUnbufferedFd).WillOnce(Return(DEFAULT_UNBUFFERED_FD)); + // Called only by Fallback - should fail the score() check. + EXPECT_CALL(*mbuffer, getType).WillOnce(Return(hipMemoryTypeHost)); + switch (_get_param_io_type()) { + case IoType::Read: + // Called by Fastpath + EXPECT_CALL(mhip, hipAmdFileRead).WillOnce(Rethrow(_get_param_exc_ptr())); + break; + case IoType::Write: + // Called by Fastpath + EXPECT_CALL(mhip, hipAmdFileWrite).WillOnce(Rethrow(_get_param_exc_ptr())); + break; + default: + FAIL() << "Invalid IoType"; + } + + // Have to rethrow the exception_ptr to be able to access the exception + try { + std::rethrow_exception(_get_param_exc_ptr()); + } + catch (const std::exception &expected_exc) { + // Can't use EXPECT_THROW due to the thrown exception type only being known at runtime + try { + fastpath_backend->io(_get_param_io_type(), mfile, mbuffer, DEFAULT_IO_SIZE, 0, 0); + } + catch (const std::exception &actual_exc) { + ASSERT_EQ(&expected_exc, &actual_exc); + } + catch (...) { + FAIL() << "io() threw something other than a std::exception"; + } + } +} + // Using std::exception_ptr is more straightforward here than storing a pointer // to a derived std::exception type, which would require careful handling when // setting expectations. Note that Throw() does not accept std::exception_ptr, From 71e168f31235375e58a77b15c30cbc36a010bc60 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Thu, 19 Mar 2026 14:27:47 -0600 Subject: [PATCH 09/11] Backend: Remove comment about io() returning a negative integer A negative integer represents a system error. Internally, all of the backends wrap this error code into an exception which gets propagated up to the hipFile API layer. Removing the comment that io() may return a negative error code. Technically this is not enforced as we leave the return type ssize_t, but this is to avoid type-casting elsewhere. --- src/amd_detail/backend.cpp | 6 ++++++ src/amd_detail/backend.h | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp index f5531fa7..61d54391 100644 --- a/src/amd_detail/backend.cpp +++ b/src/amd_detail/backend.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include using namespace hipFile; @@ -41,6 +42,11 @@ BackendWithFallback::io(IoType type, std::shared_ptr file, std::shared_pt ssize_t nbytes{0}; try { nbytes = _io_impl(type, file, buffer, size, file_offset, buffer_offset); + if (nbytes < 0) { + // Typically we should not reach this point. But in case we do, throw + // an exception to use the fallback backend. + throw std::system_error(-static_cast(nbytes), std::generic_category()); + } } catch (...) { std::exception_ptr e_ptr = std::current_exception(); diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index 60717131..7edb5d83 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -51,7 +51,7 @@ struct Backend { /// @param file_offset Offset from the start of the file /// @param buffer_offset Offset from the start of the buffer /// - /// @return Number of bytes transferred, negative on error + /// @return Number of bytes transferred /// /// @throws Hip::RuntimeError Sys::RuntimeError virtual ssize_t io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, @@ -79,7 +79,7 @@ struct Backend { /// @param file_offset Offset from the start of the file /// @param buffer_offset Offset from the start of the buffer /// - /// @return Number of bytes transferred, negative on error + /// @return Number of bytes transferred /// /// @throws Hip::RuntimeError Sys::RuntimeError virtual ssize_t _io_impl(IoType type, std::shared_ptr file, std::shared_ptr buffer, From 3ddd9b09ad14bea68dde146cdbb58decdad76cb8 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Wed, 18 Mar 2026 15:06:23 -0600 Subject: [PATCH 10/11] Address copilot suggestions --- src/amd_detail/backend.cpp | 3 +-- src/amd_detail/backend.h | 3 ++- src/amd_detail/backend/fallback.cpp | 15 +++++++++++++-- test/amd_detail/backend.cpp | 1 - test/amd_detail/fastpath.cpp | 9 ++++++++- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/amd_detail/backend.cpp b/src/amd_detail/backend.cpp index 61d54391..c9fc5725 100644 --- a/src/amd_detail/backend.cpp +++ b/src/amd_detail/backend.cpp @@ -74,8 +74,7 @@ BackendWithFallback::io(IoType type, std::shared_ptr file, std::shared_pt bool BackendWithFallback::is_fallback_eligible(std::exception_ptr e_ptr, ssize_t nbytes, std::shared_ptr file, std::shared_ptr buffer, - size_t size, hoff_t file_offset, - hoff_t buffer_offset) const + size_t size, hoff_t file_offset, hoff_t buffer_offset) const { (void)e_ptr; (void)nbytes; diff --git a/src/amd_detail/backend.h b/src/amd_detail/backend.h index 7edb5d83..cfc0d289 100644 --- a/src/amd_detail/backend.h +++ b/src/amd_detail/backend.h @@ -12,6 +12,7 @@ #include "sys.h" #include +#include #include #include #include @@ -94,7 +95,7 @@ struct BackendWithFallback : public Backend { /// @brief Check if a failed IO operation can be re-issued to the fallback Backend. /// - /// @param e_ptr Pointer to the thrown Exception by the failed IO + /// @param e_ptr exception_ptr to the thrown exception from the failed IO /// @param nbytes Return value from `_io_impl`, or 0 if an exception was thrown. /// @param file File to read from or write to /// @param buffer Buffer to write to or read from diff --git a/src/amd_detail/backend/fallback.cpp b/src/amd_detail/backend/fallback.cpp index 89bce478..2e33e05a 100644 --- a/src/amd_detail/backend/fallback.cpp +++ b/src/amd_detail/backend/fallback.cpp @@ -54,14 +54,25 @@ ssize_t Fallback::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset) { - return _io_impl(type, file, buffer, size, file_offset, buffer_offset, DefaultChunkSize); + return io(type, file, buffer, size, file_offset, buffer_offset, DefaultChunkSize); } ssize_t Fallback::io(IoType type, std::shared_ptr file, std::shared_ptr buffer, size_t size, hoff_t file_offset, hoff_t buffer_offset, size_t chunk_size) { - return _io_impl(type, file, buffer, size, file_offset, buffer_offset, chunk_size); + ssize_t nbytes = _io_impl(type, file, buffer, size, file_offset, buffer_offset, chunk_size); + switch (type) { + case (IoType::Read): + update_read_stats(nbytes); + break; + case (IoType::Write): + update_write_stats(nbytes); + break; + default: + break; + } + return nbytes; } ssize_t diff --git a/test/amd_detail/backend.cpp b/test/amd_detail/backend.cpp index c167d36c..d0c00d0f 100644 --- a/test/amd_detail/backend.cpp +++ b/test/amd_detail/backend.cpp @@ -17,7 +17,6 @@ using namespace hipFile; using ::testing::AnyNumber; -using ::testing::Invoke; using ::testing::Return; using ::testing::StrictMock; using ::testing::Throw; diff --git a/test/amd_detail/fastpath.cpp b/test/amd_detail/fastpath.cpp index ec3a324a..f0d4c061 100644 --- a/test/amd_detail/fastpath.cpp +++ b/test/amd_detail/fastpath.cpp @@ -649,6 +649,8 @@ TEST_P(FastpathIoParamWithFallback, IntegrationFallbackRejectsIO) } // Have to rethrow the exception_ptr to be able to access the exception + // This looks ugly, but is better than the alternative of trying to preserve the + // exception type when setting Throw(*std::shared_ptr>). try { std::rethrow_exception(_get_param_exc_ptr()); } @@ -656,9 +658,14 @@ TEST_P(FastpathIoParamWithFallback, IntegrationFallbackRejectsIO) // Can't use EXPECT_THROW due to the thrown exception type only being known at runtime try { fastpath_backend->io(_get_param_io_type(), mfile, mbuffer, DEFAULT_IO_SIZE, 0, 0); + FAIL() << "io() was expected to throw, but it returned normally"; } catch (const std::exception &actual_exc) { - ASSERT_EQ(&expected_exc, &actual_exc); + // Verify that the propagated exception has the same dynamic type and message + // as the one stored in the original std::exception_ptr, without relying on + // pointer identity of the underlying exception object. + ASSERT_EQ(typeid(expected_exc), typeid(actual_exc)); + ASSERT_STREQ(expected_exc.what(), actual_exc.what()); } catch (...) { FAIL() << "io() threw something other than a std::exception"; From 89a9a9ddb8e48eeded09ea86a6a5b896b75bbc30 Mon Sep 17 00:00:00 2001 From: Riley Dixon Date: Fri, 20 Mar 2026 09:39:56 -0600 Subject: [PATCH 11/11] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f1119aa2..60624355 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * The CMake namespace was changed from `roc::` to `hip::` * `AIS_BUILD_EXAMPLES` has been renamed to `AIS_INSTALL_EXAMPLES` * `AIS_USE_SANITIZERS` now also enables the following sanitizers: integer, float-divide-by-zero, local-bounds, vptr, nullability (in addition to address, leak, and undefined). Sanitizers should also now emit usable stack trace info. +* The AIS optimized IO path will automatically fallback to the POSIX IO path if a failure occurs and the compatability mode has not been disabled. ### Removed * The rocFile library has been completely removed and the code is now a part of hipFile.