Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
df5ccf5
Fix build on s390x when training is disabled
AlekseiNikiforovIBM Feb 17, 2026
45d7e7c
Fix byteswapping raw data stored in bigger data types
AlekseiNikiforovIBM Feb 18, 2026
b372cf0
Add byteswapping in all overloads of SetRawDataInTensorProto
AlekseiNikiforovIBM Feb 19, 2026
29374ef
Fix SparseTensorConversionTests.TestConstantNodeConversion test on s390x
AlekseiNikiforovIBM Feb 18, 2026
ce9d15b
Fix byte order in SparsifyGeneric function
AlekseiNikiforovIBM Feb 19, 2026
152eda2
Fix test SparseTensorConversionTests.TestDenseToSparseConversion on s…
AlekseiNikiforovIBM Feb 19, 2026
6c1c9d1
Remove excessive byteswapping in Graph::Graph
AlekseiNikiforovIBM Feb 19, 2026
ce495f8
Remove big-endian-specific exceptions
AlekseiNikiforovIBM Feb 20, 2026
eda4d90
Byteswap dimensions obtained from flatbuffer structures
AlekseiNikiforovIBM Feb 20, 2026
6e5ce60
Add byteswapping in CopyTensorDataToByteSpan function
AlekseiNikiforovIBM Feb 23, 2026
c513bf3
Add byteswap after calling set_raw_data in various functions in Megat…
AlekseiNikiforovIBM Feb 23, 2026
c307fbf
Replace most TensorProto::set_raw_data calls with SetRawDataInTensorP…
AlekseiNikiforovIBM Feb 24, 2026
e3aa9c3
Fix unpacking raw data in tests
AlekseiNikiforovIBM Feb 24, 2026
11d437b
Apply lint fixes
AlekseiNikiforovIBM Feb 24, 2026
f940d67
Byteswap external memory data too when saving ort model
AlekseiNikiforovIBM Mar 5, 2026
3ace82c
Byteswap data when saving it to external file
AlekseiNikiforovIBM Mar 5, 2026
bd93518
Don't do excessive byteswapping in UnpackTensorWithExternalDataImpl f…
AlekseiNikiforovIBM Mar 6, 2026
b8042be
Write test file in little endian in SparseTensorConversionTests.TestC…
AlekseiNikiforovIBM Mar 6, 2026
c66921b
Move byteswapping to saving tensor to file
AlekseiNikiforovIBM Mar 6, 2026
1580f21
Fix byteswapping in TensorProtoWithExternalDataToTensorProto
AlekseiNikiforovIBM Mar 17, 2026
2cdaf92
Add more data types in GetElementSizeInTensorProto function
AlekseiNikiforovIBM Mar 17, 2026
18c5c3a
Fix byte order in SaveOrtTensorOrtFormat
AlekseiNikiforovIBM Mar 19, 2026
93eae14
Split utils::kTensorProtoMemoryAddressTag
AlekseiNikiforovIBM Mar 20, 2026
a5fd873
Replace WriteLittleEndian with ReadLittleEndian in GetExtDataFromTens…
AlekseiNikiforovIBM Mar 24, 2026
41d0d19
Move common unpacking code in SaveOrtTensorOrtFormat into a lambda
AlekseiNikiforovIBM Mar 24, 2026
38983f1
Add guards against onnxruntime::utils::GetElementSizeOfTensor returni…
AlekseiNikiforovIBM Mar 25, 2026
6e6e743
Add missing headers
AlekseiNikiforovIBM Mar 26, 2026
692ebf7
Add missing casts when creating gls::span
AlekseiNikiforovIBM Mar 27, 2026
28bbd3d
Add missing header file and type cast
AlekseiNikiforovIBM Mar 30, 2026
b84ba98
Add missing namespace when using endian::native and endian::little
AlekseiNikiforovIBM Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,14 @@ if(NOT IOS)

list(REMOVE_ITEM onnx_test_runner_common_srcs ${onnx_test_runner_src_dir}/main.cc)

# if training is disabled, endian_utils are still used in tests
if (NOT onnxruntime_ENABLE_TRAINING)
list(APPEND onnx_test_runner_common_srcs
${ONNXRUNTIME_ROOT}/core/framework/endian_utils.cc
${ONNXRUNTIME_ROOT}/core/framework/endian_utils.h
)
endif ()

onnxruntime_add_static_library(onnx_test_runner_common ${onnx_test_runner_common_srcs})
if(MSVC)
target_compile_options(onnx_test_runner_common PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/core/framework/endian_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,5 +83,11 @@ common::Status ReadLittleEndian(size_t element_size,
return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes);
}

common::Status WriteLittleEndian(size_t element_size,
gsl::span<const unsigned char> source_bytes,
gsl::span<unsigned char> destination_bytes) {
return detail::CopyLittleEndian(element_size, source_bytes, destination_bytes);
}

} // namespace utils
} // namespace onnxruntime
9 changes: 8 additions & 1 deletion onnxruntime/core/framework/endian_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,14 +76,21 @@ common::Status ReadLittleEndian(gsl::span<const unsigned char> source_bytes, gsl
return ReadLittleEndian(sizeof(T), source_bytes, destination_bytes);
}

/**
* Writes to a little-endian destination.
*/
common::Status WriteLittleEndian(size_t element_size,
gsl::span<const unsigned char> source_bytes,
gsl::span<unsigned char> destination_bytes);

/**
* Writes to a little-endian destination.
*/
template <typename T>
common::Status WriteLittleEndian(gsl::span<const T> source, gsl::span<unsigned char> destination_bytes) {
static_assert(std::is_trivially_copyable<T>::value, "T must be trivially copyable");
const auto source_bytes = gsl::make_span(reinterpret_cast<const unsigned char*>(source.data()), source.size_bytes());
return detail::CopyLittleEndian(sizeof(T), source_bytes, destination_bytes);
return WriteLittleEndian(sizeof(T), source_bytes, destination_bytes);
}

} // namespace utils
Expand Down
181 changes: 133 additions & 48 deletions onnxruntime/core/framework/tensorprotoutils.cc

Large diffs are not rendered by default.

28 changes: 25 additions & 3 deletions onnxruntime/core/framework/tensorprotoutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ Status GetExternalDataInfo(const ONNX_NAMESPACE::TensorProto& tensor_proto,
*/
void ConvertRawDataInTensorProto(ONNX_NAMESPACE::TensorProto& tensor_proto);

/**
* This function is used to get element size of tensor data.
*
* For complex types it returns size of one of elements of complex value.
*
* It will be used mostly to convert data on big endian systems
* after unpacking data.
* @param tensor_data_type tensor data type to get element size from
*/
size_t GetElementSizeOfTensor(ONNX_NAMESPACE::TensorProto_DataType tensor_data_type);

/**
* Wrapper function for set_raw_data.
* First calls the set_raw_data and then calls ConvertRawDataInTensorProto
Expand Down Expand Up @@ -156,7 +167,7 @@ common::Status CreateTensorFromTensorProto(const Env& env, const std::filesystem

/// The threshold for small tensors. If the size of the tensor is LE to this value,
/// The data will stay in the TensorProto. Otherwise, the data will be moved to a Tensor instance
/// and TensorProto will contain a kTensorProtoMemoryAddressTag reference as a result of
/// and TensorProto will contain a kTensorProtoNativeEndianMemoryAddressTag reference as a result of
/// TensorToTensorProto() below. This is because shape inferencing code in onnx for
/// like Reshape parses weights data and it needs to be in the TensorProto.
/// The value of 127 was chosen empirically to be the smallest value that is required
Expand All @@ -177,7 +188,7 @@ constexpr const size_t kMaxEmbeddedInitializerSizeInBytes = size_t{2} * 1024 * 1
* @param[in] tensor the Tensor whose data and shape will be used to create the TensorProto.
* @param[in] tensor_proto_name the name of the TensorProto.
* @param[in] use_tensor_buffer the tensor proto is set to use external location, with
* 'location' set to onnxruntime::utils::kTensorProtoMemoryAddressTag
* 'location' set to onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag
* 'offset' set to tensor's memory location, and 'length' set to tensor's
* memory size. The caller is responsible to maintain the lifetime of
* the allocated memory buffer. Use with caution.
Expand Down Expand Up @@ -215,8 +226,19 @@ common::Status ValidateEmbeddedTensorProtoDataSizeAndShape(const ONNX_NAMESPACE:
Special marker used to indicate an existing memory buffer contains the TensorProto external data.
If the 'location' field of the external data info is set to this marker, the 'offset' field should contain the
address of the memory containing the data.

This marker is used when data is always in little endian format.
*/
constexpr const ORTCHAR_T* kTensorProtoLittleEndianMemoryAddressTag = ORT_TSTR("*/_ORT_MEM_ADDR_/*");

/**
Special marker used to indicate an existing memory buffer contains the TensorProto external data.
If the 'location' field of the external data info is set to this marker, the 'offset' field should contain the
address of the memory containing the data.

This marker is used when data is in native endian format, i.e. big endian on big endian systems.
*/
constexpr const ORTCHAR_T* kTensorProtoMemoryAddressTag = ORT_TSTR("*/_ORT_MEM_ADDR_/*");
constexpr const ORTCHAR_T* kTensorProtoNativeEndianMemoryAddressTag = ORT_TSTR("*/_ORT_NATIVE_ENDIAN_MEM_ADDR_/*");

/// <summary>
/// Creates a OrtValue with a tensor on top of the external data.
Expand Down
25 changes: 14 additions & 11 deletions onnxruntime/core/graph/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1242,15 +1242,6 @@ Graph::Graph(const Model& owning_model,

const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
ORT_THROW_IF_ERROR(utils::ConstantNodeProtoToTensorProto(node, model_path, *tensor));
if constexpr (endian::native != endian::little) {
const AttributeProto& attrib = node.attribute(0);
if (attrib.type() == AttributeProto_AttributeType_SPARSE_TENSOR) {
const TensorProto& sparse_values = node.attribute(0).sparse_tensor().values();
if ((!(sparse_values.has_raw_data())) && utils::HasRawData(*tensor)) {
onnxruntime::utils::ConvertRawDataInTensorProto(*tensor);
}
}
}

// Ensure initializers are also graph inputs.
if (ir_version_ < 4) {
Expand Down Expand Up @@ -4901,6 +4892,18 @@ Status Graph::AddExternalInitializersToGraphProtoImpl(
std::vector<uint8_t> raw_data;
ORT_RETURN_IF_ERROR(utils::UnpackInitializerData(initializer, model_path, raw_data));
size_t tensor_bytes_size = raw_data.size();

// Convert it data to little endian before saving to file
if constexpr (endian::native != endian::little) {
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(initializer.data_type()));

if (element_size > 1) {
onnxruntime::utils::SwapByteOrderInplace(
element_size,
gsl::make_span(reinterpret_cast<std::byte*>(raw_data.data()), tensor_bytes_size));
}
}

if (model_saving_options.force_embed_external_ini ||
tensor_bytes_size < model_saving_options.initializer_size_threshold) {
*output_proto = initializer;
Expand Down Expand Up @@ -6655,13 +6658,13 @@ Status Graph::LoadFromModelEditorApiModel(const OrtGraph& api_graph, bool updati
const void* data_offset = t.DataRaw(); // address of memory not offset into file
auto offset = narrow<ExternalDataInfo::OFFSET_TYPE>(reinterpret_cast<intptr_t>(data_offset));

ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoMemoryAddressTag,
ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag,
offset, t.SizeInBytes(), tensor_proto);

// add OrtValue to ortvalue_initializers_ to keep it alive and to store the deleter if provided.
ortvalue_initializers_.emplace(name, std::move(v));
} else {
tensor_proto.set_raw_data(t.DataRaw(), t.SizeInBytes());
onnxruntime::utils::SetRawDataInTensorProto(tensor_proto, t.DataRaw(), t.SizeInBytes());
}

TypeProto type_proto{utils::TypeProtoFromTensorProto(tensor_proto)};
Expand Down
78 changes: 63 additions & 15 deletions onnxruntime/core/graph/graph_flatbuffers_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,19 @@
string_data = builder.CreateVectorOfStrings(string_data_vec);
} else {
std::vector<uint8_t> unpacked_tensor;
// We can not convert this in place, because the session may be used
// after the model was saved in ort format. If the session is continued to be used, then
// we continue with initializers in memory with wrong endianess
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));

// We cannot convert data before unpacking due to
// external data not getting converted by ConvertRawDataInTensorProto function.
// Instead convert data after unpacking it
if constexpr (endian::native != endian::little) {
auto be_copy{initializer};
onnxruntime::utils::ConvertRawDataInTensorProto(be_copy);
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(be_copy, model_path, unpacked_tensor));
} else {
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor));
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(initializer.data_type()));

if (element_size > 1) {
onnxruntime::utils::SwapByteOrderInplace(
element_size,
gsl::make_span(reinterpret_cast<std::byte*>(unpacked_tensor.data()), unpacked_tensor.size()));
}
}

if (external_writer && unpacked_tensor.size() >= kMinimumSizeForExternalData) {
Expand Down Expand Up @@ -316,7 +320,7 @@
// high bit, but that should be unlikely in a scenario where we care about memory usage enough to use this path.
auto offset = narrow<ExternalDataInfo::OFFSET_TYPE>(reinterpret_cast<intptr_t>(data_offset));

ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoMemoryAddressTag,
ExternalDataInfo::SetExternalLocationToProto(onnxruntime::utils::kTensorProtoLittleEndianMemoryAddressTag,
offset, fbs_raw_data->size(), initializer);

} else {
Expand Down Expand Up @@ -473,9 +477,31 @@
// To avoid issues with vtable offsets, raw_data fbs::vector must be constructed before the TensorBuilder begins
// building the tensor. See flatbuffer_builder.h's NotNested() function for more details.
flatbuffers::Offset<flatbuffers::Vector<uint8_t>> raw_data;

auto unpack_tensor_data_be = [&ort_tensor](std::vector<uint8_t>& unpacked_tensor_data) -> Status {
unpacked_tensor_data.resize(ort_tensor.SizeInBytes());

size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(ort_tensor.GetElementType()));
auto src_span = gsl::make_span(reinterpret_cast<const unsigned char*>(ort_tensor.DataRaw()), ort_tensor.SizeInBytes());
auto dst_span = gsl::make_span(reinterpret_cast<unsigned char*>(unpacked_tensor_data.data()), unpacked_tensor_data.size());

// If element size is unknown, set it to 1 to disable byteswapping
if (element_size < 1) element_size = 1;

return onnxruntime::utils::WriteLittleEndian(element_size, src_span, dst_span);
};

if (!external_data_writer) {
raw_data = builder.CreateVector(static_cast<const uint8_t*>(ort_tensor.DataRaw()),
ort_tensor.SizeInBytes());
if constexpr (endian::native != endian::little) {
std::vector<uint8_t> unpacked_tensor;

ORT_RETURN_IF_ERROR(unpack_tensor_data_be(unpacked_tensor));

raw_data = builder.CreateVector(unpacked_tensor.data(), unpacked_tensor.size());
} else {
raw_data = builder.CreateVector(static_cast<const uint8_t*>(ort_tensor.DataRaw()),
ort_tensor.SizeInBytes());
}
}

fbs::TensorBuilder tb(builder);
Expand All @@ -485,8 +511,17 @@
tb.add_data_type(static_cast<fbs::TensorDataType>(ort_tensor.GetElementType()));
if (external_data_writer) {
uint64_t offset = 0;
gsl::span<const uint8_t> ort_tensor_data_span(static_cast<const uint8_t*>(ort_tensor.DataRaw()), ort_tensor.SizeInBytes());
ORT_RETURN_IF_ERROR(external_data_writer(ort_tensor.GetElementType(), ort_tensor_data_span, offset));
if constexpr (endian::native != endian::little) {
std::vector<uint8_t> unpacked_tensor;

ORT_RETURN_IF_ERROR(unpack_tensor_data_be(unpacked_tensor));

gsl::span<const uint8_t> ort_tensor_data_span(static_cast<const uint8_t*>(unpacked_tensor.data()), unpacked_tensor.size());
ORT_RETURN_IF_ERROR(external_data_writer(ort_tensor.GetElementType(), ort_tensor_data_span, offset));
} else {
gsl::span<const uint8_t> ort_tensor_data_span(static_cast<const uint8_t*>(ort_tensor.DataRaw()), ort_tensor.SizeInBytes());
ORT_RETURN_IF_ERROR(external_data_writer(ort_tensor.GetElementType(), ort_tensor_data_span, offset));
}
int64_t external_data_offset = onnxruntime::narrow<int64_t>(offset);
tb.add_external_data_offset(external_data_offset);
} else {
Expand Down Expand Up @@ -546,8 +581,21 @@
const DataTypeImpl* tensor_dtype = DataTypeImpl::TensorTypeFromONNXEnum(
tensor_data_type)
->GetElementType();
ort_tensor = onnxruntime::Tensor(
tensor_dtype, TensorShape(tensor_dims->data(), tensor_dims->size()), allocator);

if constexpr (endian::native != endian::little) {
std::vector<typename std::remove_reference_t<decltype(*tensor_dims)>::return_type> byteswapped_data;

Check warning on line 586 in onnxruntime/core/graph/graph_flatbuffers_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <vector> for vector<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/graph/graph_flatbuffers_utils.cc:586: Add #include <vector> for vector<> [build/include_what_you_use] [4]
byteswapped_data.resize(tensor_dims->size());

for (size_t i = 0; i < tensor_dims->size(); ++i) {
byteswapped_data[i] = tensor_dims->Get(i);
}

ort_tensor = onnxruntime::Tensor(
tensor_dtype, TensorShape(byteswapped_data.data(), byteswapped_data.size()), allocator);
} else {
ort_tensor = onnxruntime::Tensor(
tensor_dtype, TensorShape(tensor_dims->data(), tensor_dims->size()), allocator);
}

if (fbs_tensor.raw_data() && fbs_tensor.raw_data()->size() == 0U) {
// Empty tensor. Nothing to unpack.
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/graph/graph_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,8 @@ bool CheckInMemoryDataMatch(const ONNX_NAMESPACE::TensorProto& tensor_proto, con
// Retrieve external data using ExternalData structure
std::unique_ptr<ExternalDataInfo> external_data;
ORT_THROW_IF_ERROR(ExternalDataInfo::Create(tensor_proto.external_data(), external_data));
return (external_data->GetRelPath().compare(utils::kTensorProtoMemoryAddressTag) == 0) &&
return ((external_data->GetRelPath().compare(utils::kTensorProtoLittleEndianMemoryAddressTag) == 0) ||
(external_data->GetRelPath().compare(utils::kTensorProtoNativeEndianMemoryAddressTag) == 0)) &&
(tensor.DataRaw() == reinterpret_cast<const void*>(external_data->GetOffset()));
}
return false;
Expand Down
20 changes: 10 additions & 10 deletions onnxruntime/core/optimizer/qdq_transformer/where_dummy_dq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,29 +63,29 @@ Status WhereDummyDq::InsertDummyDQ(Node& node, Graph& graph, bool& modified, con
case ONNX_NAMESPACE::TensorProto_DataType_INT8: {
int8_t zp = 0;
int8_t dummy_data = 1;
dummy_zp_proto.set_raw_data(&zp, 1);
dummy_data_proto.set_raw_data(&dummy_data, 1);
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 1);
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 1);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_UINT8: {
uint8_t zp = 0;
uint8_t dummy_data = 1;
dummy_zp_proto.set_raw_data(&zp, 1);
dummy_data_proto.set_raw_data(&dummy_data, 1);
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 1);
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 1);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_INT16: {
int16_t zp = 0;
int16_t dummy_data = 1;
dummy_zp_proto.set_raw_data(&zp, 2);
dummy_data_proto.set_raw_data(&dummy_data, 2);
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 2);
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 2);
break;
}
case ONNX_NAMESPACE::TensorProto_DataType_UINT16: {
uint16_t zp = 0;
uint16_t dummy_data = 1;
dummy_zp_proto.set_raw_data(&zp, 2);
dummy_data_proto.set_raw_data(&dummy_data, 2);
utils::SetRawDataInTensorProto(dummy_zp_proto, &zp, 2);
utils::SetRawDataInTensorProto(dummy_data_proto, &dummy_data, 2);
break;
}
default:
Expand All @@ -110,7 +110,7 @@ Status WhereDummyDq::InsertDummyDQ(Node& node, Graph& graph, bool& modified, con
switch (initializer.data_type()) {
case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: {
float* where_const_scalar = initializer.data<float>();
dummy_scale_proto.set_raw_data(where_const_scalar, sizeof(float));
utils::SetRawDataInTensorProto(dummy_scale_proto, where_const_scalar, sizeof(float));
break;
}
default:
Expand Down Expand Up @@ -166,4 +166,4 @@ Status WhereDummyDq::ApplyImpl(Graph& graph, bool& modified, int graph_level, co

return Status::OK();
}
} // namespace onnxruntime
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
#include "DmlGraphFusionHelper.h"
#include "DmlRuntimeFusedGraphKernel.h"

#include "core/common/endian.h"
#include "core/framework/endian_utils.h"

using namespace Windows::AI::MachineLearning::Adapter;

namespace Dml
Expand Down Expand Up @@ -121,7 +124,31 @@ namespace DmlGraphFusionHelper
onnxruntime::FileOffsetType fileOffset;
SafeInt<size_t> safeTensorByteSize;
THROW_IF_NOT_OK(onnxruntime::utils::GetExternalDataInfo(*initializer, graph.ModelPath(), /*out*/ externalFilePath, /*out*/ fileOffset, /*out*/ safeTensorByteSize));
if (externalFilePath == onnxruntime::utils::kTensorProtoMemoryAddressTag)
if (externalFilePath == onnxruntime::utils::kTensorProtoLittleEndianMemoryAddressTag)
{
if constexpr (onnxruntime::endian::native != onnxruntime::endian::little)
{
unpackedTensor.reset(new std::byte[safeTensorByteSize]);

auto src = gsl::make_span<const unsigned char>(reinterpret_cast<const unsigned char*>(fileOffset), safeTensorByteSize);
auto dst = gsl::make_span<unsigned char>(reinterpret_cast<unsigned char*>(unpackedTensor.get()), safeTensorByteSize);
size_t element_size = onnxruntime::utils::GetElementSizeOfTensor(static_cast<ONNX_NAMESPACE::TensorProto_DataType>(initializer->data_type()));

// If element size is unknown, set it to 1 to disable byteswapping
if (element_size < 1) element_size = 1;

THROW_IF_NOT_OK(onnxruntime::utils::ReadLittleEndian(element_size, src, dst));

tensorPtr = unpackedTensor.get();
tensorByteSize = safeTensorByteSize;
}
else
{
tensorPtr = reinterpret_cast<std::byte*>(fileOffset);
tensorByteSize = safeTensorByteSize;
}
}
else if (externalFilePath == onnxruntime::utils::kTensorProtoNativeEndianMemoryAddressTag)
{
tensorPtr = reinterpret_cast<std::byte*>(fileOffset);
tensorByteSize = safeTensorByteSize;
Expand Down
Loading
Loading