diff --git a/CMakeLists.txt b/CMakeLists.txt index 8841931bfe..be6b83f810 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,6 +53,14 @@ option(SIMPLNX_DOWNLOAD_TEST_FILES "Download the test files" ON) # ------------------------------------------------------------------------------ option(SIMPLNX_WRITE_TEST_OUTPUT "Write unit test output files" OFF) +# ------------------------------------------------------------------------------ +# Controls which algorithm paths are exercised by dual-dispatch unit tests. +# 0 (Both) - tests run with forceOoc=false AND forceOoc=true (default) +# 1 (OocOnly) - tests run with forceOoc=true only (use for OOC builds) +# 2 (InCoreOnly) - tests run with forceOoc=false only (quick validation) +# ------------------------------------------------------------------------------ +set(SIMPLNX_TEST_ALGORITHM_PATH "0" CACHE STRING "Algorithm paths to test: 0=Both, 1=OocOnly, 2=InCoreOnly") + # ------------------------------------------------------------------------------ # Is the SimplnxCore Plugin enabled [DEFAULT=ON] # ------------------------------------------------------------------------------ @@ -255,6 +263,7 @@ if(SIMPLNX_ENABLE_MULTICORE) target_link_libraries(simplnx PUBLIC TBB::tbb) endif() + target_link_libraries(simplnx PUBLIC fmt::fmt @@ -458,6 +467,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/DataStructure/DynamicListArray.hpp ${SIMPLNX_SOURCE_DIR}/DataStructure/EmptyDataStore.hpp ${SIMPLNX_SOURCE_DIR}/DataStructure/EmptyListStore.hpp + ${SIMPLNX_SOURCE_DIR}/DataStructure/EmptyStringStore.hpp ${SIMPLNX_SOURCE_DIR}/DataStructure/IArray.hpp ${SIMPLNX_SOURCE_DIR}/DataStructure/IDataArray.hpp ${SIMPLNX_SOURCE_DIR}/DataStructure/IDataStore.hpp @@ -539,6 +549,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/DataGroupUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/DataObjectUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/DataStoreUtilities.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/AlgorithmDispatch.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FilePathGenerator.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/ColorTableUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FileUtilities.hpp diff --git a/cmake/Plugin.cmake b/cmake/Plugin.cmake index 43e4691566..0f11bd11f4 100644 --- a/cmake/Plugin.cmake +++ b/cmake/Plugin.cmake @@ -383,6 +383,7 @@ function(create_simplnx_plugin_unit_test) target_compile_definitions(${UNIT_TEST_TARGET} PRIVATE SIMPLNX_BUILD_DIR="$" + SIMPLNX_TEST_ALGORITHM_PATH=${SIMPLNX_TEST_ALGORITHM_PATH} ) target_compile_options(${UNIT_TEST_TARGET} diff --git a/src/Plugins/ITKImageProcessing/src/ITKImageProcessing/Common/ITKArrayHelper.hpp b/src/Plugins/ITKImageProcessing/src/ITKImageProcessing/Common/ITKArrayHelper.hpp index 827a7f7812..d582d0cb9a 100644 --- a/src/Plugins/ITKImageProcessing/src/ITKImageProcessing/Common/ITKArrayHelper.hpp +++ b/src/Plugins/ITKImageProcessing/src/ITKImageProcessing/Common/ITKArrayHelper.hpp @@ -855,7 +855,7 @@ Result DataCheck(const DataStructure& dataStructure, const DataPa const auto& inputArray = dataStructure.getDataRefAs(inputArrayPath); const auto& inputDataStore = inputArray.getIDataStoreRef(); - if(!inputArray.getDataFormat().empty()) + if(inputArray.getStoreType() == IDataStore::StoreType::OutOfCore) { return MakeErrorResult(Constants::k_OutOfCoreDataNotSupported, fmt::format("Input Array '{}' utilizes out-of-core data. This is not supported within ITK filters.", inputArrayPath.toString())); @@ -877,7 +877,7 @@ Result> Execute(DataStr using ResultT = detail::ITKFilterFunctorResult_t; - if(!inputArray.getDataFormat().empty()) + if(inputArray.getStoreType() == IDataStore::StoreType::OutOfCore) { return MakeErrorResult(Constants::k_OutOfCoreDataNotSupported, fmt::format("Input Array '{}' utilizes out-of-core data. This is not supported within ITK filters.", inputArrayPath.toString())); } diff --git a/src/Plugins/ITKImageProcessing/test/ITKTestBase.cpp b/src/Plugins/ITKImageProcessing/test/ITKTestBase.cpp index 2773ad109a..fe394b26d1 100644 --- a/src/Plugins/ITKImageProcessing/test/ITKTestBase.cpp +++ b/src/Plugins/ITKImageProcessing/test/ITKTestBase.cpp @@ -30,7 +30,7 @@ std::string ComputeMD5HashTyped(const IDataArray& outputDataArray) usize arraySize = dataArray.getSize(); MD5 md5; - if(outputDataArray.getDataFormat().empty()) + if(outputDataArray.getIDataStoreRef().getStoreType() != IDataStore::StoreType::OutOfCore) { const T* dataPtr = dataArray.template getIDataStoreRefAs>().data(); md5.update(reinterpret_cast(dataPtr), arraySize * sizeof(T)); @@ -135,47 +135,7 @@ namespace ITKTestBase bool IsArrayInMemory(DataStructure& dataStructure, const DataPath& outputDataPath) { const auto& outputDataArray = dataStructure.getDataRefAs(outputDataPath); - DataType outputDataType = outputDataArray.getDataType(); - - switch(outputDataType) - { - case DataType::float32: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::float64: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::int8: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::uint8: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::int16: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::uint16: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::int32: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::uint32: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::int64: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::uint64: { - return dynamic_cast&>(outputDataArray).getDataFormat().empty(); - } - case DataType::boolean: { - [[fallthrough]]; - } - default: { - return {}; - } - } + return outputDataArray.getIDataStoreRef().getStoreType() != IDataStore::StoreType::OutOfCore; } //------------------------------------------------------------------------------ std::string ComputeMd5Hash(DataStructure& dataStructure, const DataPath& outputDataPath) diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/ComputeFZQuaternionsFilter.cpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/ComputeFZQuaternionsFilter.cpp index a77f6e6b2f..d7d1d4603d 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/ComputeFZQuaternionsFilter.cpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/ComputeFZQuaternionsFilter.cpp @@ -122,8 +122,8 @@ IFilter::PreflightResult ComputeFZQuaternionsFilter::preflightImpl(const DataStr nx::core::Result resultOutputActions; - auto createArrayAction = std::make_unique(nx::core::DataType::float32, quatArray.getDataStore()->getTupleShape(), quatArray.getDataStore()->getComponentShape(), - pFZQuatsArrayPathValue, CreateArrayAction::k_DefaultDataFormat, "0.0"); + auto createArrayAction = + std::make_unique(nx::core::DataType::float32, quatArray.getDataStore()->getTupleShape(), quatArray.getDataStore()->getComponentShape(), pFZQuatsArrayPathValue, "", "0.0"); resultOutputActions.value().appendAction(std::move(createArrayAction)); // Return both the resultOutputActions and the preflightUpdatedValues via std::move() diff --git a/src/Plugins/OrientationAnalysis/test/ComputeIPFColorsTest.cpp b/src/Plugins/OrientationAnalysis/test/ComputeIPFColorsTest.cpp index 444828ec87..8e4e937797 100644 --- a/src/Plugins/OrientationAnalysis/test/ComputeIPFColorsTest.cpp +++ b/src/Plugins/OrientationAnalysis/test/ComputeIPFColorsTest.cpp @@ -75,9 +75,7 @@ TEST_CASE("OrientationAnalysis::ComputeIPFColors", "[OrientationAnalysis][Comput // This test file was produced by SIMPL/DREAM3D. our results should match theirs auto exemplarFilePath = fs::path(fmt::format("{}/so3_cubic_high_ipf_001.dream3d", unit_test::k_TestFilesDir)); REQUIRE(fs::exists(exemplarFilePath)); - auto result = DREAM3D::ImportDataStructureFromFile(exemplarFilePath, false); - REQUIRE(result.valid()); - dataStructure = result.value(); + dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); } // Instantiate the filter, a DataStructure object and an Arguments Object @@ -113,7 +111,7 @@ TEST_CASE("OrientationAnalysis::ComputeIPFColors", "[OrientationAnalysis][Comput SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); { // Write out the DataStructure for later viewing/debugging - auto fileWriter = nx::core::HDF5::FileIO::WriteFile(std::filesystem::path(fmt::format("{}/ComputeIPFColors_Test.dream3d", unit_test::k_BinaryTestOutputDir))); + auto fileWriter = nx::core::HDF5::FileIO::WriteFile(fs::path(fmt::format("{}/ComputeIPFColors_Test.dream3d", unit_test::k_BinaryTestOutputDir))); auto resultH5 = HDF5::DataStructureWriter::WriteFile(dataStructure, fileWriter); SIMPLNX_RESULT_REQUIRE_VALID(resultH5); } diff --git a/src/Plugins/OrientationAnalysis/test/RodriguesConvertorTest.cpp b/src/Plugins/OrientationAnalysis/test/RodriguesConvertorTest.cpp index f3f3002789..ee2f1243d6 100644 --- a/src/Plugins/OrientationAnalysis/test/RodriguesConvertorTest.cpp +++ b/src/Plugins/OrientationAnalysis/test/RodriguesConvertorTest.cpp @@ -44,6 +44,10 @@ TEST_CASE("OrientationAnalysis::RodriguesConvertorFilter", "[OrientationAnalysis (*exemplarData)[9] = 0.573462F; (*exemplarData)[10] = 0.655386F; (*exemplarData)[11] = 12.2066F; + (*exemplarData)[12] = 0.517892F; + (*exemplarData)[13] = 0.575435F; + (*exemplarData)[14] = 0.632979F; + (*exemplarData)[15] = 17.37815F; { // Instantiate the filter, a DataStructure object and an Arguments Object const RodriguesConvertorFilter filter; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp index dc364fca6b..84861d6480 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ComputeArrayStatistics.cpp @@ -1,6 +1,7 @@ #include "ComputeArrayStatistics.hpp" #include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" #include "simplnx/Utilities/DataArrayUtilities.hpp" #include "simplnx/Utilities/FilterUtilities.hpp" #include "simplnx/Utilities/HistogramUtilities.hpp" @@ -31,7 +32,7 @@ bool CheckArraysInMemory(const nx::core::IParallelAlgorithm::AlgorithmArrays& ar continue; } - if(!arrayPtr->getIDataStoreRef().getDataFormat().empty()) + if(arrayPtr->getIDataStoreRef().getStoreType() == nx::core::IDataStore::StoreType::OutOfCore) { return false; } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp index 5cd59ae953..7cf31a305a 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp @@ -300,115 +300,93 @@ const std::atomic_bool& FillBadData::getCancel() const // @param provisionalLabels Map from voxel index to assigned provisional label // @param dims Image dimensions [X, Y, Z] // ============================================================================= -void FillBadData::phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims) +Result<> FillBadData::phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims) { - // Use negative labels for bad data regions to distinguish from positive feature IDs int64 nextLabel = -1; + const usize slabSize = static_cast(dims[0]) * static_cast(dims[1]); - const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + // Two slab buffers: current Z-slab and previous Z-slab for -Z neighbor checks + std::vector curSlab(slabSize); + std::vector prevSlab(slabSize); - // Process each chunk sequentially (load, process, unload) - for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + for(int64 z = 0; z < dims[2]; z++) { - // Load the current chunk into memory - featureIdsStore.loadChunk(chunkIdx); - - // Get chunk bounds (INCLUSIVE ranges in [Z, Y, X] order) - const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); - const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + const usize slabStart = static_cast(z) * slabSize; + auto readResult = featureIdsStore.copyIntoBuffer(slabStart, nonstd::span(curSlab.data(), slabSize)); + if(readResult.invalid()) + { + return MergeResults(readResult, + MakeErrorResult(-71500, fmt::format("FillBadData phase 1 (connected component labeling): failed to read Z-slab {} (start index {}, size {}) from feature IDs store.", z, + slabStart, slabSize))); + } - // Process voxels in this chunk using scanline algorithm - // Iterate in Z-Y-X order (slowest to fastest) to maintain scanline consistency - // Note: chunk bounds are INCLUSIVE and in [Z, Y, X] order (slowest to fastest) - for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + for(int64 y = 0; y < dims[1]; y++) { - for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + for(int64 x = 0; x < dims[0]; x++) { - for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + const usize localIdx = static_cast(y) * static_cast(dims[0]) + static_cast(x); + if(curSlab[localIdx] != 0) { - // Calculate linear index for current voxel - const usize index = z * dims[0] * dims[1] + y * dims[0] + x; - - // Only process bad data voxels (FeatureId == 0) - // Skip valid feature voxels (FeatureId > 0) - if(featureIdsStore[index] != 0) - { - continue; - } + continue; + } - // Check already-processed neighbors (scanline order: -Z, -Y, -X) - // We only check "backward" neighbors because "forward" neighbors - // haven't been processed yet in the scanline order - std::vector neighborLabels; + const usize globalIdx = slabStart + localIdx; + std::vector neighborLabels; - // Check -X neighbor - if(x > 0) + // Check -X neighbor (same slab) + if(x > 0 && curSlab[localIdx - 1] == 0) + { + const usize nIdx = globalIdx - 1; + if(provisionalLabels.contains(nIdx)) { - const usize neighborIdx = index - 1; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } + neighborLabels.push_back(provisionalLabels[nIdx]); } - - // Check -Y neighbor - if(y > 0) + } + // Check -Y neighbor (same slab) + if(y > 0 && curSlab[localIdx - dims[0]] == 0) + { + const usize nIdx = globalIdx - dims[0]; + if(provisionalLabels.contains(nIdx)) { - const usize neighborIdx = index - dims[0]; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } + neighborLabels.push_back(provisionalLabels[nIdx]); } - - // Check -Z neighbor - if(z > 0) + } + // Check -Z neighbor (previous slab) + if(z > 0 && prevSlab[localIdx] == 0) + { + const usize nIdx = globalIdx - slabSize; + if(provisionalLabels.contains(nIdx)) { - const usize neighborIdx = index - dims[0] * dims[1]; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } + neighborLabels.push_back(provisionalLabels[nIdx]); } + } - // Assign label based on neighbors - int64 assignedLabel; - if(neighborLabels.empty()) - { - // No labeled neighbors found - this is a new connected component - // Assign a new negative label and initialize in union-find - assignedLabel = nextLabel--; - unionFind.find(assignedLabel); // Initialize in union-find (creates entry) - } - else + int64 assignedLabel = 0; + if(neighborLabels.empty()) + { + assignedLabel = nextLabel--; + unionFind.find(assignedLabel); + } + else + { + assignedLabel = neighborLabels[0]; + for(usize i = 1; i < neighborLabels.size(); i++) { - // One or more labeled neighbors found - join their equivalence class - // Use the first neighbor's label as the representative - assignedLabel = neighborLabels[0]; - - // If multiple neighbors have different labels, unite them - // This handles the case where different regions merge at this voxel - for(usize i = 1; i < neighborLabels.size(); i++) + if(neighborLabels[i] != assignedLabel) { - if(neighborLabels[i] != assignedLabel) - { - unionFind.unite(assignedLabel, neighborLabels[i]); - } + unionFind.unite(assignedLabel, neighborLabels[i]); } } - - // Store the assigned label for this voxel - provisionalLabels[index] = assignedLabel; - - // Increment the size count for this label (will be accumulated to root in flatten()) - unionFind.addSize(assignedLabel, 1); } + + provisionalLabels[globalIdx] = assignedLabel; + unionFind.addSize(assignedLabel, 1); } } - } - // Flush to ensure all chunks are written back to storage - featureIdsStore.flush(); + std::swap(prevSlab, curSlab); + } + return {}; } // ============================================================================= @@ -450,15 +428,10 @@ void FillBadData::phaseTwoGlobalResolution(ChunkAwareUnionFind& unionFind, std:: // @param unionFind Union-Find structure with resolved equivalences (from Phase 2) // @param maxPhase Maximum existing phase value (for new phase assignment) // ============================================================================= -void FillBadData::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, - const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, usize maxPhase) const +Result<> FillBadData::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, + const std::unordered_set& /*smallRegions*/, ChunkAwareUnionFind& unionFind, usize maxPhase) const { - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const SizeVec3 udims = selectedImageGeom.getDimensions(); - const uint64 numChunks = featureIdsStore.getNumberOfChunks(); - - // Collect all unique root labels and their sizes - // After flatten(), all labels point to roots and sizes are accumulated + // Classify regions by size std::unordered_map rootSizes; for(const auto& [index, label] : provisionalLabels) { @@ -469,7 +442,6 @@ void FillBadData::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, } } - // Classify regions as small (need filling) or large (keep or assign to a new phase) std::unordered_set localSmallRegions; for(const auto& [root, size] : rootSizes) { @@ -479,57 +451,60 @@ void FillBadData::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, } } - // Process each chunk to relabel voxels based on region classification - for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) - { - // Load chunk into memory - featureIdsStore.loadChunk(chunkIdx); + // Process slab-by-slab, reading and writing back via bulk I/O + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + const usize slabSize = udims[0] * udims[1]; + std::vector slab(slabSize); - // Get chunk bounds (INCLUSIVE ranges in [Z, Y, X] order) - const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); - const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + for(usize z = 0; z < udims[2]; z++) + { + const usize slabStart = z * slabSize; + auto readResult = featureIdsStore.copyIntoBuffer(slabStart, nonstd::span(slab.data(), slabSize)); + if(readResult.invalid()) + { + return MergeResults(readResult, MakeErrorResult(-71501, fmt::format("FillBadData phase 3 (region classification): failed to read Z-slab {} (start index {}, size {}) from feature IDs store.", z, + slabStart, slabSize))); + } - // Iterate through all voxels in this chunk - // Note: chunk bounds are INCLUSIVE and in [Z, Y, X] order (slowest to fastest) - for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + bool slabModified = false; + for(usize localIdx = 0; localIdx < slabSize; localIdx++) { - for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + const usize globalIdx = slabStart + localIdx; + auto labelIter = provisionalLabels.find(globalIdx); + if(labelIter == provisionalLabels.end()) { - for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) - { - const usize index = z * udims[0] * udims[1] + y * udims[0] + x; - - // Check if this voxel was labeled as bad data in Phase 1 - auto labelIter = provisionalLabels.find(index); - if(labelIter != provisionalLabels.end()) - { - // Find the root label for this voxel's connected component - int64 root = unionFind.find(labelIter->second); + continue; + } - if(localSmallRegions.contains(root)) - { - // Small region - mark with -1 for filling in Phase 4 - featureIdsStore[index] = -1; - } - else - { - // Large region - keep as bad data (0) or assign to a new phase - featureIdsStore[index] = 0; - - // Optionally assign large bad data regions to a new phase - if(m_InputValues->storeAsNewPhase && cellPhasesPtr != nullptr) - { - (*cellPhasesPtr)[index] = static_cast(maxPhase) + 1; - } - } - } + int64 root = unionFind.find(labelIter->second); + if(localSmallRegions.contains(root)) + { + slab[localIdx] = -1; + } + else + { + slab[localIdx] = 0; + if(m_InputValues->storeAsNewPhase && cellPhasesPtr != nullptr) + { + (*cellPhasesPtr)[globalIdx] = static_cast(maxPhase) + 1; } } + slabModified = true; } - } - // Write all chunks back to storage - featureIdsStore.flush(); + if(slabModified) + { + auto writeResult = featureIdsStore.copyFromBuffer(slabStart, nonstd::span(slab.data(), slabSize)); + if(writeResult.invalid()) + { + return MergeResults(writeResult, + MakeErrorResult(-71502, fmt::format("FillBadData phase 3 (region classification): failed to write Z-slab {} (start index {}, size {}) back to feature IDs store.", z, + slabStart, slabSize))); + } + } + } + return {}; } // ============================================================================= @@ -695,12 +670,10 @@ void FillBadData::phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore // ============================================================================= Result<> FillBadData::operator()() const { - // Get feature IDs array and image geometry auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); const SizeVec3 udims = selectedImageGeom.getDimensions(); - // Convert dimensions to signed integers for offset calculations std::array dims = { static_cast(udims[0]), static_cast(udims[1]), @@ -709,15 +682,11 @@ Result<> FillBadData::operator()() const const usize totalPoints = featureIdsStore.getNumberOfTuples(); - // Get cell phases array if we need to assign large regions to a new phase Int32Array* cellPhasesPtr = nullptr; usize maxPhase = 0; - if(m_InputValues->storeAsNewPhase) { cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); - - // Find the maximum existing phase value for(usize i = 0; i < totalPoints; i++) { if((*cellPhasesPtr)[i] > maxPhase) @@ -727,35 +696,50 @@ Result<> FillBadData::operator()() const } } - // Count the number of existing features for array sizing usize numFeatures = 0; - for(usize i = 0; i < totalPoints; i++) { - int32 featureName = featureIdsStore[i]; - if(featureName > numFeatures) + const usize bufSize = 65536; + std::vector buf(bufSize); + for(usize offset = 0; offset < totalPoints; offset += bufSize) { - numFeatures = featureName; + const usize count = std::min(bufSize, totalPoints - offset); + auto readResult = featureIdsStore.copyIntoBuffer(offset, nonstd::span(buf.data(), count)); + if(readResult.invalid()) + { + return MergeResults(readResult, + MakeErrorResult(-71503, fmt::format("FillBadData: failed to scan feature IDs store for maximum feature id (chunk [{}, {}) of {}).", offset, offset + count, totalPoints))); + } + for(usize i = 0; i < count; i++) + { + if(buf[i] > static_cast(numFeatures)) + { + numFeatures = buf[i]; + } + } } } - // Initialize data structures for chunk-aware connected component labeling - ChunkAwareUnionFind unionFind; // Tracks label equivalences and sizes - std::unordered_map provisionalLabels; // Maps voxel index to provisional label - std::unordered_set smallRegions; // Set of small region roots (unused currently) + ChunkAwareUnionFind unionFind; + std::unordered_map provisionalLabels; + std::unordered_set smallRegions; - // Phase 1: Chunk-Sequential Connected Component Labeling m_MessageHandler({IFilter::Message::Type::Info, "Phase 1/4: Labeling connected components..."}); - phaseOneCCL(featureIdsStore, unionFind, provisionalLabels, dims); + auto phaseOneResult = phaseOneCCL(featureIdsStore, unionFind, provisionalLabels, dims); + if(phaseOneResult.invalid()) + { + return phaseOneResult; + } - // Phase 2: Global Resolution of equivalences m_MessageHandler({IFilter::Message::Type::Info, "Phase 2/4: Resolving region equivalences..."}); phaseTwoGlobalResolution(unionFind, smallRegions); - // Phase 3: Relabeling based on region size classification m_MessageHandler({IFilter::Message::Type::Info, "Phase 3/4: Classifying region sizes..."}); - phaseThreeRelabeling(featureIdsStore, cellPhasesPtr, provisionalLabels, smallRegions, unionFind, maxPhase); + auto phaseThreeResult = phaseThreeRelabeling(featureIdsStore, cellPhasesPtr, provisionalLabels, smallRegions, unionFind, maxPhase); + if(phaseThreeResult.invalid()) + { + return phaseThreeResult; + } - // Phase 4: Iterative morphological fill m_MessageHandler({IFilter::Message::Type::Info, "Phase 4/4: Filling small defects..."}); phaseFourIterativeFill(featureIdsStore, dims, numFeatures); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp index 1e994f2948..e40989040e 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp @@ -108,8 +108,9 @@ class SIMPLNXCORE_EXPORT FillBadData * @param unionFind Union-find structure for tracking equivalences * @param provisionalLabels Map from voxel index to provisional label * @param dims Image geometry dimensions + * @return Result<> invalid if a bulk read from the feature IDs store fails. */ - static void phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims); + static Result<> phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims); /** * @brief Phase 2: Global resolution of equivalences and region classification @@ -126,9 +127,10 @@ class SIMPLNXCORE_EXPORT FillBadData * @param smallRegions Set of labels for small regions * @param unionFind Union-find for looking up equivalences * @param maxPhase Maximum phase value (for new phase assignment) + * @return Result<> invalid if a bulk read or write against the feature IDs store fails. */ - void phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, - const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, size_t maxPhase) const; + Result<> phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, + const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, size_t maxPhase) const; /** * @brief Phase 4: Iterative morphological fill diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/QuickSurfaceMesh.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/QuickSurfaceMesh.cpp index e0016d3d54..10ffc4c42f 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/QuickSurfaceMesh.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/QuickSurfaceMesh.cpp @@ -1608,11 +1608,6 @@ void QuickSurfaceMesh::generateTripleLines() // Cycle through again assigning coordinates to each node and assigning node numbers and feature labels to each triangle ParallelData3DAlgorithm algorithm; algorithm.setRange(Range3D(xP - 1, yP - 1, zP - 1)); - if(featureIds.getChunkShape().has_value()) - { - const auto chunkShape = featureIds.getChunkShape().value(); - algorithm.setChunkSize(Range3D(chunkShape[0], chunkShape[1], chunkShape[2])); - } algorithm.setParallelizationEnabled(false); algorithm.execute(GenerateTripleLinesImpl(imageGeom, featureIds, vertexMap, edgeMap)); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/WriteVtkRectilinearGrid.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/WriteVtkRectilinearGrid.cpp index c2862ad4ab..454566da91 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/WriteVtkRectilinearGrid.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/WriteVtkRectilinearGrid.cpp @@ -69,8 +69,13 @@ Result<> WriteVtkRectilinearGrid::operator()() for(const DataPath& arrayPath : m_InputValues->SelectedDataArrayPaths) { - ExecuteDataFunction(WriteVtkDataArrayFunctor{}, m_DataStructure.getDataAs(arrayPath)->getDataType(), outputFile, m_InputValues->WriteBinaryFile, m_DataStructure, arrayPath, - m_MessageHandler); + auto writeArrayResult = ExecuteDataFunction(WriteVtkDataArrayFunctor{}, m_DataStructure.getDataAs(arrayPath)->getDataType(), outputFile, m_InputValues->WriteBinaryFile, + m_DataStructure, arrayPath, m_MessageHandler); + if(writeArrayResult.invalid()) + { + fclose(outputFile); + return MergeResults(writeArrayResult, MakeErrorResult(-2091, fmt::format("Error writing data array '{}' to VTK file '{}'", arrayPath.toString(), m_InputValues->OutputFile.string()))); + } } fclose(outputFile); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeFeatureNeighborsFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeFeatureNeighborsFilter.cpp index bc22762949..b518d3d59d 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeFeatureNeighborsFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeFeatureNeighborsFilter.cpp @@ -158,7 +158,7 @@ IFilter::PreflightResult ComputeFeatureNeighborsFilter::preflightImpl(const Data // Create the SurfaceFeatures Output Data Array in the Feature Attribute Matrix if(storeSurfaceFeatures) { - auto action = std::make_unique(DataType::boolean, tupleShape, cDims, surfaceFeaturesPath, CreateArrayAction::k_DefaultDataFormat, "false"); + auto action = std::make_unique(DataType::boolean, tupleShape, cDims, surfaceFeaturesPath, "", "false"); actions.appendAction(std::move(action)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp index 91932895f9..5bea16d31f 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMeansFilter.cpp @@ -137,8 +137,8 @@ IFilter::PreflightResult ComputeKMeansFilter::preflightImpl(const DataStructure& } { - auto createAction = std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue), - CreateArrayAction::k_DefaultDataFormat, "0"); + auto createAction = + std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue), "", "0"); resultOutputActions.value().appendAction(std::move(createAction)); } @@ -146,7 +146,7 @@ IFilter::PreflightResult ComputeKMeansFilter::preflightImpl(const DataStructure& { DataPath tempPath = DataPath({k_MaskName}); { - auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, CreateArrayAction::k_DefaultDataFormat, "true"); + auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, "", "true"); resultOutputActions.value().appendAction(std::move(createAction)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp index 7051340eb9..556944f6c8 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeKMedoidsFilter.cpp @@ -132,8 +132,8 @@ IFilter::PreflightResult ComputeKMedoidsFilter::preflightImpl(const DataStructur } { - auto createAction = std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue), - CreateArrayAction::k_DefaultDataFormat, "0"); + auto createAction = + std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue), "", "0"); resultOutputActions.value().appendAction(std::move(createAction)); } @@ -141,7 +141,7 @@ IFilter::PreflightResult ComputeKMedoidsFilter::preflightImpl(const DataStructur { DataPath tempPath = DataPath({k_MaskName}); { - auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, CreateArrayAction::k_DefaultDataFormat, "true"); + auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, "", "true"); resultOutputActions.value().appendAction(std::move(createAction)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeSurfaceFeaturesFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeSurfaceFeaturesFilter.cpp index 5a07499910..f9808f0c63 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeSurfaceFeaturesFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeSurfaceFeaturesFilter.cpp @@ -109,8 +109,8 @@ IFilter::PreflightResult ComputeSurfaceFeaturesFilter::preflightImpl(const DataS tupleDims = surfaceFeaturesParent->getShape(); } - auto createSurfaceFeaturesAction = std::make_unique( - DataType::uint8, tupleDims, std::vector{1}, pCellFeaturesAttributeMatrixPathValue.createChildPath(pSurfaceFeaturesArrayNameValue), CreateArrayAction::k_DefaultDataFormat, "0"); + auto createSurfaceFeaturesAction = + std::make_unique(DataType::uint8, tupleDims, std::vector{1}, pCellFeaturesAttributeMatrixPathValue.createChildPath(pSurfaceFeaturesArrayNameValue), "", "0"); resultOutputActions.value().appendAction(std::move(createSurfaceFeaturesAction)); return {std::move(resultOutputActions), std::move(preflightUpdatedValues)}; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeVectorColorsFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeVectorColorsFilter.cpp index 38e6ae243e..44451da0e2 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeVectorColorsFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ComputeVectorColorsFilter.cpp @@ -109,7 +109,7 @@ IFilter::PreflightResult ComputeVectorColorsFilter::preflightImpl(const DataStru if(!pUseGoodVoxelsValue) { - auto action = std::make_unique(DataType::boolean, vectorsTupShape, std::vector{1}, k_MaskArrayPath, CreateArrayAction::k_DefaultDataFormat, "true"); + auto action = std::make_unique(DataType::boolean, vectorsTupShape, std::vector{1}, k_MaskArrayPath, "", "true"); resultOutputActions.value().appendAction(std::move(action)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayAdvancedFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayAdvancedFilter.cpp index a2890660b8..4b94916513 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayAdvancedFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayAdvancedFilter.cpp @@ -161,7 +161,6 @@ IFilter::PreflightResult CreateDataArrayAdvancedFilter::preflightImpl(const Data auto compDimsData = filterArgs.value(k_CompDims_Key); auto dataArrayPath = filterArgs.value(k_DataPath_Key); auto tableData = filterArgs.value(k_TupleDims_Key); - auto dataFormat = filterArgs.value(k_DataFormat_Key); auto initFillValue = filterArgs.value(k_InitValue_Key); auto initIncFillValue = filterArgs.value(k_StartingFillValue_Key); auto stepValue = filterArgs.value(k_StepValue_Key); @@ -221,6 +220,8 @@ IFilter::PreflightResult CreateDataArrayAdvancedFilter::preflightImpl(const Data usize numTuples = std::accumulate(tupleDims.begin(), tupleDims.end(), static_cast(1), std::multiplies<>()); + auto dataFormat = filterArgs.value(k_DataFormat_Key); + auto arrayDataType = ConvertNumericTypeToDataType(numericType); auto action = std::make_unique(ConvertNumericTypeToDataType(numericType), tupleDims, compDims, dataArrayPath, dataFormat); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayFilter.cpp index aec0be53ad..6d29ddb265 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateDataArrayFilter.cpp @@ -110,8 +110,6 @@ IFilter::PreflightResult CreateDataArrayFilter::preflightImpl(const DataStructur auto dataArrayPath = filterArgs.value(k_DataPath_Key); auto initValue = filterArgs.value(k_InitializationValue_Key); auto tableData = filterArgs.value(k_TupleDims_Key); - auto dataFormat = filterArgs.value(k_DataFormat_Key); - nx::core::Result resultOutputActions; if(initValue.empty()) @@ -156,6 +154,8 @@ IFilter::PreflightResult CreateDataArrayFilter::preflightImpl(const DataStructur } } + auto dataFormat = filterArgs.value(k_DataFormat_Key); + // Sanity check that init value can be converted safely to the final numeric type integrated into action auto action = std::make_unique(ConvertNumericTypeToDataType(numericType), tupleDims, compDims, dataArrayPath, dataFormat, initValue); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateFeatureArrayFromElementArrayFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateFeatureArrayFromElementArrayFilter.cpp index 81e9b6d415..ae061a3d82 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateFeatureArrayFromElementArrayFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/CreateFeatureArrayFromElementArrayFilter.cpp @@ -104,8 +104,8 @@ IFilter::PreflightResult CreateFeatureArrayFromElementArrayFilter::preflightImpl { DataType dataType = selectedCellArray.getDataType(); - auto createArrayAction = std::make_unique(dataType, amTupleShape, selectedCellArrayStore.getComponentShape(), - pCellFeatureAttributeMatrixPathValue.createChildPath(pCreatedArrayNameValue), CreateArrayAction::k_DefaultDataFormat, "0"); + auto createArrayAction = + std::make_unique(dataType, amTupleShape, selectedCellArrayStore.getComponentShape(), pCellFeatureAttributeMatrixPathValue.createChildPath(pCreatedArrayNameValue), "", "0"); resultOutputActions.value().appendAction(std::move(createArrayAction)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp index 8615e53289..c378d5a743 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/DBSCANFilter.cpp @@ -160,8 +160,8 @@ IFilter::PreflightResult DBSCANFilter::preflightImpl(const DataStructure& dataSt } { - auto createAction = std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue), - CreateArrayAction::k_DefaultDataFormat, "0"); + auto createAction = + std::make_unique(DataType::int32, clusterArray->getTupleShape(), std::vector{1}, pSelectedArrayPathValue.replaceName(pFeatureIdsArrayNameValue), "", "0"); resultOutputActions.value().appendAction(std::move(createAction)); } @@ -169,7 +169,7 @@ IFilter::PreflightResult DBSCANFilter::preflightImpl(const DataStructure& dataSt { DataPath tempPath = DataPath({k_MaskName}); { - auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, CreateArrayAction::k_DefaultDataFormat, "true"); + auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, "", "true"); resultOutputActions.value().appendAction(std::move(createAction)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/MapPointCloudToRegularGridFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/MapPointCloudToRegularGridFilter.cpp index bb57e5683c..6dfab6932a 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/MapPointCloudToRegularGridFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/MapPointCloudToRegularGridFilter.cpp @@ -351,7 +351,7 @@ IFilter::PreflightResult MapPointCloudToRegularGridFilter::preflightImpl(const D { DataPath tempPath = DataPath({k_MaskName}); { - auto createAction = std::make_unique(DataType::boolean, vertexData->getShape(), std::vector{1}, tempPath, CreateArrayAction::k_DefaultDataFormat, "true"); + auto createAction = std::make_unique(DataType::boolean, vertexData->getShape(), std::vector{1}, tempPath, "", "true"); actions.appendAction(std::move(createAction)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadDREAM3DFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadDREAM3DFilter.cpp index 664698b6aa..2017736cb3 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadDREAM3DFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadDREAM3DFilter.cpp @@ -6,7 +6,6 @@ #include "simplnx/Parameters/StringParameter.hpp" #include "simplnx/Pipeline/Pipeline.hpp" #include "simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp" -#include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" #include "simplnx/Utilities/SIMPLConversion.hpp" @@ -16,7 +15,6 @@ namespace { constexpr nx::core::int32 k_NoImportPathError = -1; -constexpr nx::core::int32 k_FailedOpenFileIOError = -25; constexpr nx::core::int32 k_UnsupportedPathImportPolicyError = -51; } // namespace @@ -82,16 +80,10 @@ IFilter::PreflightResult ReadDREAM3DFilter::preflightImpl(const DataStructure& d { return {MakeErrorResult(k_NoImportPathError, "Import file path not provided.")}; } - auto fileReader = nx::core::HDF5::FileIO::ReadFile(importData.FilePath); - if(!fileReader.isValid()) - { - return {MakeErrorResult(k_FailedOpenFileIOError, fmt::format("Failed to open the HDF5 file at the specified path: '{}'", importData.FilePath.string()))}; - } - Result result; OutputActions& actions = result.value(); - Result dataStructureResult = DREAM3D::ImportDataStructureFromFile(fileReader, true); + Result dataStructureResult = DREAM3D::LoadDataStructureMetadata(importData.FilePath); if(dataStructureResult.invalid()) { return {ConvertResultTo(ConvertResult(std::move(dataStructureResult)), {})}; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadTextDataArrayFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadTextDataArrayFilter.cpp index 296236ab5c..b81408efa8 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadTextDataArrayFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ReadTextDataArrayFilter.cpp @@ -99,8 +99,6 @@ IFilter::PreflightResult ReadTextDataArrayFilter::preflightImpl(const DataStruct auto useDims = filterArgs.value(k_AdvancedOptions_Key); auto tableData = filterArgs.value(k_NTuples_Key); - auto dataFormat = filterArgs.value(k_DataFormat_Key); - nx::core::Result resultOutputActions; ShapeType tupleDims = {}; @@ -139,6 +137,8 @@ IFilter::PreflightResult ReadTextDataArrayFilter::preflightImpl(const DataStruct } } + auto dataFormat = filterArgs.value(k_DataFormat_Key); + auto action = std::make_unique(ConvertNumericTypeToDataType(numericType), tupleDims, std::vector{nComp}, arrayPath, dataFormat); resultOutputActions.value().appendAction(std::move(action)); diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ScalarSegmentFeaturesFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ScalarSegmentFeaturesFilter.cpp index 636ca22ccf..f461b2245f 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ScalarSegmentFeaturesFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/ScalarSegmentFeaturesFilter.cpp @@ -137,11 +137,11 @@ IFilter::PreflightResult ScalarSegmentFeaturesFilter::preflightImpl(const DataSt } // Create the Cell Level FeatureIds array - auto createFeatureIdsAction = std::make_unique(DataType::int32, cellTupleDims, std::vector{1}, featureIdsPath, createdArrayFormat, "0"); + auto createFeatureIdsAction = std::make_unique(DataType::int32, cellTupleDims, std::vector{1}, featureIdsPath, "", "0"); // Create the Feature Attribute Matrix auto createFeatureGroupAction = std::make_unique(cellFeaturesPath, std::vector{1}); - auto createActiveAction = std::make_unique(DataType::uint8, std::vector{1}, std::vector{1}, activeArrayPath, createdArrayFormat, "1"); + auto createActiveAction = std::make_unique(DataType::uint8, std::vector{1}, std::vector{1}, activeArrayPath, "", "1"); nx::core::Result resultOutputActions; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp index 96a439ef57..1774efbb8b 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/SilhouetteFilter.cpp @@ -120,7 +120,7 @@ IFilter::PreflightResult SilhouetteFilter::preflightImpl(const DataStructure& da { DataPath tempPath = DataPath({k_MaskName}); { - auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, CreateArrayAction::k_DefaultDataFormat, "true"); + auto createAction = std::make_unique(DataType::boolean, clusterArray->getTupleShape(), std::vector{1}, tempPath, "", "true"); resultOutputActions.value().appendAction(std::move(createAction)); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/utils/VtkUtilities.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/utils/VtkUtilities.hpp index 6270f4c036..9d96dfb7ae 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/utils/VtkUtilities.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/utils/VtkUtilities.hpp @@ -134,14 +134,17 @@ std::string TypeForPrimitive(const IFilter::MessageHandler& messageHandler) return ""; } +// ----------------------------------------------------------------------------- +// Functor for writing a DataArray to a VTK legacy file via FILE* I/O. +// Supports both binary (big-endian) and ASCII output modes. // ----------------------------------------------------------------------------- struct WriteVtkDataArrayFunctor { template - void operator()(FILE* outputFile, bool binary, DataStructure& dataStructure, const DataPath& arrayPath, const IFilter::MessageHandler& messageHandler) + Result<> operator()(FILE* outputFile, bool binary, DataStructure& dataStructure, const DataPath& arrayPath, const IFilter::MessageHandler& messageHandler) { auto* dataArray = dataStructure.getDataAs>(arrayPath); - auto& dataStore = dataArray->template getIDataStoreRefAs>(); + auto& dataStore = dataArray->getDataStoreRef(); messageHandler(IFilter::Message::Type::Info, fmt::format("Writing Cell Data {}", arrayPath.getTargetName())); @@ -161,16 +164,62 @@ struct WriteVtkDataArrayFunctor fprintf(outputFile, "LOOKUP_TABLE default\n"); if(binary) { - if constexpr(endian::little == endian::native) + // --------------------------------------------------------------- + // Chunked binary write pattern for OOC compatibility + // --------------------------------------------------------------- + // The original code used dataArray->data() for a single fwrite, + // which requires the entire array to be resident in memory. This + // fails for OOC stores where data lives on disk and data() is + // not available. + // + // Instead, we read 4096 elements at a time into a local buffer + // via copyIntoBuffer (the OOC-compatible bulk read API), perform + // an in-place byte swap to big-endian (VTK legacy binary format + // requires big-endian), and fwrite the buffer. This keeps memory + // usage constant regardless of array size. + // + // For bool arrays, copyIntoBuffer is not available (bool is not + // a supported span type), so we use per-element getValue() and + // convert to uint8 (0 or 1). + // --------------------------------------------------------------- + constexpr usize k_ChunkSize = 4096; + for(usize offset = 0; offset < totalElements; offset += k_ChunkSize) { - dataArray->byteSwapElements(); + usize count = std::min(k_ChunkSize, totalElements - offset); + if constexpr(std::is_same_v) + { + // Bool special case: convert to uint8 via per-element access. + std::vector buf(count); + for(usize i = 0; i < count; i++) + { + buf[i] = dataStore.getValue(offset + i) ? 1 : 0; + } + fwrite(buf.data(), sizeof(uint8), count, outputFile); + } + else + { + // General case: bulk read into buffer, byte-swap, then write. + std::vector buf(count); + auto copyResult = dataStore.copyIntoBuffer(offset, nonstd::span(buf.data(), count)); + if(copyResult.invalid()) + { + return MakeErrorResult(-2090, fmt::format("Failed to read chunk [{}, {}) from data array '{}' while writing VTK file: {}", offset, offset + count, arrayPath.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + if constexpr(endian::little == endian::native) + { + // VTK legacy binary requires big-endian. Swap in the local + // buffer rather than mutating the DataStore, which would be + // slow for OOC stores and would modify shared data. + for(usize i = 0; i < count; i++) + { + buf[i] = nx::core::byteswap(buf[i]); + } + } + fwrite(buf.data(), sizeof(T), count, outputFile); + } } - fwrite(dataStore.data(), sizeof(T), totalElements, outputFile); fprintf(outputFile, "\n"); - if constexpr(endian::little == endian::native) - { - dataArray->byteSwapElements(); - } } else { @@ -184,7 +233,7 @@ struct WriteVtkDataArrayFunctor } if(useIntCast) { - buffer.append(fmt::format(" {:d}", static_cast(dataStore[i]))); + buffer.append(fmt::format(" {:d}", static_cast(dataStore.getValue(i)))); } else if constexpr(std::is_floating_point_v) { @@ -206,6 +255,7 @@ struct WriteVtkDataArrayFunctor buffer.append("\n"); fprintf(outputFile, "%s", buffer.c_str()); } + return {}; } }; diff --git a/src/Plugins/SimplnxCore/test/DREAM3DFileTest.cpp b/src/Plugins/SimplnxCore/test/DREAM3DFileTest.cpp index 80d01581bc..0bafd8e8b9 100644 --- a/src/Plugins/SimplnxCore/test/DREAM3DFileTest.cpp +++ b/src/Plugins/SimplnxCore/test/DREAM3DFileTest.cpp @@ -65,7 +65,7 @@ const FilterHandle k_ImportD3DHandle(Uuid::FromString("0dbd31c7-19e0-4077-83ef-f fs::path GetDataDir(const Application& app) { - return std::filesystem::path(unit_test::k_BinaryTestOutputDir.view()); + return fs::path(unit_test::k_BinaryTestOutputDir.view()); } fs::path GetIODataPath() @@ -144,9 +144,8 @@ DataStructure CreateTestDataStructure() ShapeType tupleShape = {10}; auto* attributeMatrix = AttributeMatrix::Create(dataStructure, DataNames::k_AttributeMatrixName, tupleShape, group1->getId()); - Result<> arrayCreationResults = - ArrayCreationUtilities::CreateArray(dataStructure, tupleShape, std::vector{1}, DataPath({DataNames::k_Group1Name, DataNames::k_AttributeMatrixName, DataNames::k_Array2Name}), - IDataAction::Mode::Execute, ArrayCreationUtilities::k_DefaultDataFormat, "1"); + Result<> arrayCreationResults = ArrayCreationUtilities::CreateArray( + dataStructure, tupleShape, std::vector{1}, DataPath({DataNames::k_Group1Name, DataNames::k_AttributeMatrixName, DataNames::k_Array2Name}), IDataAction::Mode::Execute, "1"); return dataStructure; } @@ -283,10 +282,13 @@ TEST_CASE("DREAM3DFileTest:DREAM3D File IO Test", "[WriteDREAM3DFilter]") // Read .dream3d file { auto fileReader = HDF5::FileIO::ReadFile(GetIODataPath()); - auto fileResult = DREAM3D::ReadFile(fileReader); - SIMPLNX_RESULT_REQUIRE_VALID(fileResult); + auto pipelineResult = DREAM3D::ImportPipelineFromFile(fileReader); + SIMPLNX_RESULT_REQUIRE_VALID(pipelineResult); + auto pipeline = std::move(pipelineResult.value()); - auto [pipeline, dataStructure] = fileResult.value(); + auto dsResult = DREAM3D::LoadDataStructure(GetIODataPath()); + SIMPLNX_RESULT_REQUIRE_VALID(dsResult); + DataStructure dataStructure = std::move(dsResult.value()); // Test reading the DataStructure REQUIRE(dataStructure.getData(DataPath({DataNames::k_Group1Name})) != nullptr); diff --git a/src/Plugins/SimplnxCore/test/RotateSampleRefFrameTest.cpp b/src/Plugins/SimplnxCore/test/RotateSampleRefFrameTest.cpp index e65d150cc4..340ce9db5f 100644 --- a/src/Plugins/SimplnxCore/test/RotateSampleRefFrameTest.cpp +++ b/src/Plugins/SimplnxCore/test/RotateSampleRefFrameTest.cpp @@ -70,11 +70,7 @@ TEST_CASE("SimplnxCore::RotateSampleRefFrame", "[Core][RotateSampleRefFrameFilte const DataPath k_OriginalGeomPath({"Original"}); - Result dataStructureResult = - DREAM3D::ImportDataStructureFromFile(fs::path(fmt::format("{}/Rotate_Sample_Ref_Frame_Test_v3/Rotate_Sample_Ref_Frame_Test_v3.dream3d", nx::core::unit_test::k_TestFilesDir)), false); - SIMPLNX_RESULT_REQUIRE_VALID(dataStructureResult); - - DataStructure dataStructure = std::move(dataStructureResult.value()); + DataStructure dataStructure = UnitTest::LoadDataStructure(fs::path(fmt::format("{}/Rotate_Sample_Ref_Frame_Test_v3/Rotate_Sample_Ref_Frame_Test_v3.dream3d", nx::core::unit_test::k_TestFilesDir))); const auto* originalImageGeom = dataStructure.getDataAs(k_OriginalGeomPath); REQUIRE(originalImageGeom != nullptr); diff --git a/src/Plugins/SimplnxCore/wrapping/python/simplnxpy.cpp b/src/Plugins/SimplnxCore/wrapping/python/simplnxpy.cpp index 239252013b..f7c8d27a1d 100644 --- a/src/Plugins/SimplnxCore/wrapping/python/simplnxpy.cpp +++ b/src/Plugins/SimplnxCore/wrapping/python/simplnxpy.cpp @@ -1174,8 +1174,8 @@ PYBIND11_MODULE(simplnx, mod) copyDataObjectAction.def(py::init>(), "path"_a, "new_path"_a, "all_created_paths"_a); auto createArrayAction = SIMPLNX_PY_BIND_CLASS_VARIADIC(mod, CreateArrayAction, IDataCreationAction); - createArrayAction.def(py::init&, const std::vector&, const DataPath&, std::string>(), "type"_a, "t_dims"_a, "c_dims"_a, "path"_a, - "data_format"_a = std::string("")); + createArrayAction.def(py::init&, const std::vector&, const DataPath&, std::string, std::string>(), "type"_a, "t_dims"_a, "c_dims"_a, "path"_a, + "data_format"_a = std::string(""), "fill_value"_a = std::string("")); auto createAttributeMatrixAction = SIMPLNX_PY_BIND_CLASS_VARIADIC(mod, CreateAttributeMatrixAction, IDataCreationAction); createAttributeMatrixAction.def(py::init(), "path"_a, "shape"_a); diff --git a/src/simplnx/Core/Application.cpp b/src/simplnx/Core/Application.cpp index ebd49eab85..04300f2569 100644 --- a/src/simplnx/Core/Application.cpp +++ b/src/simplnx/Core/Application.cpp @@ -338,9 +338,9 @@ JsonPipelineBuilder* Application::getPipelineBuilder() const return nullptr; } -std::shared_ptr Application::getIOCollection() const +DataIOCollection& Application::getIOCollection() const { - return m_DataIOCollection; + return *m_DataIOCollection; } std::shared_ptr Application::getIOManager(const std::string& formatName) const @@ -390,7 +390,12 @@ Result<> Application::loadPlugin(const std::filesystem::path& path, bool verbose for(const auto& pluginIO : plugin->getDataIOManagers()) { - m_DataIOCollection->addIOManager(pluginIO); + auto addManagerResult = m_DataIOCollection->addIOManager(pluginIO); + if(addManagerResult.invalid()) + { + return MakeErrorResult( + -34, fmt::format("Failed to register data I/O manager from plugin '{}': {}", plugin->getName(), addManagerResult.errors().empty() ? "unknown error" : addManagerResult.errors()[0].message)); + } } return {}; @@ -414,3 +419,8 @@ std::vector Application::getDataStoreFormats() const { return m_DataIOCollection->getFormatNames(); } + +std::vector> Application::getDataStoreFormatDisplayNames() const +{ + return m_DataIOCollection->getFormatDisplayNames(); +} diff --git a/src/simplnx/Core/Application.hpp b/src/simplnx/Core/Application.hpp index 7f9d727b0f..a505f31916 100644 --- a/src/simplnx/Core/Application.hpp +++ b/src/simplnx/Core/Application.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include namespace nx::core @@ -121,9 +122,14 @@ class SIMPLNX_EXPORT Application /** * @brief Returns the collection of data I/O managers. - * @return Shared pointer to the DataIOCollection + * + * The returned reference is non-owning; the DataIOCollection is owned by + * the Application singleton and lives for the entire process lifetime. + * Callers should not attempt to extend its lifetime. + * + * @return Reference to the DataIOCollection owned by the Application. */ - std::shared_ptr getIOCollection() const; + DataIOCollection& getIOCollection() const; /** * @brief Returns the I/O manager for the specified format. @@ -190,6 +196,17 @@ class SIMPLNX_EXPORT Application */ std::vector getDataStoreFormats() const; + /** + * @brief Returns all known format display names as (formatName, displayName) pairs. + * + * Delegates to DataIOCollection::getFormatDisplayNames(). The list always + * includes ("", "Automatic") and (k_InMemoryFormat, "In Memory"), plus any + * plugin-registered entries. + * + * @return Vector of (formatName, displayName) pairs + */ + std::vector> getDataStoreFormatDisplayNames() const; + protected: /** * @brief Constructs an Application using default values and replaces the diff --git a/src/simplnx/Core/Preferences.cpp b/src/simplnx/Core/Preferences.cpp index aa12ef5776..bc5ff09511 100644 --- a/src/simplnx/Core/Preferences.cpp +++ b/src/simplnx/Core/Preferences.cpp @@ -22,10 +22,10 @@ namespace nx::core namespace { constexpr int64 k_LargeDataSize = 1073741824; // 1 GB -constexpr StringLiteral k_LargeDataFormat = ""; constexpr StringLiteral k_Plugin_Key = "plugins"; constexpr StringLiteral k_DefaultFileName = "preferences.json"; constexpr int64 k_ReducedDataStructureSize = 3221225472; // 3 GB +constexpr uint32 k_OocRangeScanTimeoutSecondsDefault = 30; constexpr int32 k_FailedToCreateDirectory_Code = -585; constexpr int32 k_FileDoesNotExist_Code = -586; @@ -77,7 +77,6 @@ void Preferences::setDefaultValues() m_DefaultValues[k_Plugin_Key] = nlohmann::json::object(); m_DefaultValues[k_LargeDataSize_Key] = k_LargeDataSize; - m_DefaultValues[k_PreferredLargeDataFormat_Key] = k_LargeDataFormat; { // Set a default value for out-of-core temp directory. @@ -92,6 +91,8 @@ void Preferences::setDefaultValues() #else m_DefaultValues[k_ForceOocData_Key] = false; #endif + + m_DefaultValues[k_OocRangeScanTimeoutSeconds_Key] = k_OocRangeScanTimeoutSecondsDefault; } std::string Preferences::defaultLargeDataFormat() const @@ -107,11 +108,29 @@ void Preferences::setDefaultLargeDataFormat(std::string dataFormat) std::string Preferences::largeDataFormat() const { - return valueAs(k_PreferredLargeDataFormat_Key); + auto formatJson = value(k_PreferredLargeDataFormat_Key); + if(formatJson.is_null() || !formatJson.is_string()) + { + return {}; + } + return formatJson.get(); } void Preferences::setLargeDataFormat(std::string dataFormat) { - m_Values[k_PreferredLargeDataFormat_Key] = dataFormat; + if(dataFormat.empty()) + { + // Remove the key so the default (set by plugins) can take effect. + // An empty string means "not configured", not "in-core". To explicitly + // request in-core storage, pass k_InMemoryFormat instead. This distinction + // matters because the SimplnxOoc plugin sets a default OOC format on startup, + // and erasing the user value lets that default take effect. + m_Values.erase(k_PreferredLargeDataFormat_Key); + } + else + { + m_Values[k_PreferredLargeDataFormat_Key] = dataFormat; + } + // Recompute the cached m_UseOoc flag after any format change checkUseOoc(); } @@ -255,6 +274,24 @@ Result<> Preferences::loadFromFile(const std::filesystem::path& filepath) m_Values = parsedResult; + // Migrate legacy format strings from saved preferences files that were + // written before the OOC architecture was finalized. Two legacy values + // need cleanup: + // - Empty string (""): Old "not configured" state. Removing the key + // lets the plugin-supplied default (e.g., "HDF5-OOC") take effect. + // - "In-Memory": Old explicit in-core sentinel. Replaced by + // k_InMemoryFormat ("Simplnx-Default-In-Memory"). Removing the key + // avoids confusion with the new sentinel value. + if(m_Values.contains(k_PreferredLargeDataFormat_Key) && m_Values[k_PreferredLargeDataFormat_Key].is_string()) + { + const std::string savedFormat = m_Values[k_PreferredLargeDataFormat_Key].get(); + if(savedFormat.empty() || savedFormat == "In-Memory") + { + m_Values.erase(k_PreferredLargeDataFormat_Key); + } + } + + // Recompute derived state from the loaded (and possibly migrated) values checkUseOoc(); updateMemoryDefaults(); return {}; @@ -262,7 +299,21 @@ Result<> Preferences::loadFromFile(const std::filesystem::path& filepath) void Preferences::checkUseOoc() { - m_UseOoc = !value(k_PreferredLargeDataFormat_Key).get().empty(); + // Resolve the format from user values first, then default values (via value()) + auto formatJson = value(k_PreferredLargeDataFormat_Key); + + // If no format is configured (null/non-string), OOC is not active + if(formatJson.is_null() || !formatJson.is_string()) + { + m_UseOoc = false; + return; + } + + // OOC is active when the format is a non-empty string that is NOT the + // explicit in-memory sentinel. This means a plugin (e.g., SimplnxOoc) + // has registered a real OOC format like "HDF5-OOC". + const std::string format = formatJson.get(); + m_UseOoc = !format.empty() && format != k_InMemoryFormat; } bool Preferences::useOocData() const @@ -290,9 +341,15 @@ void Preferences::setForceOocData(bool forceOoc) void Preferences::updateMemoryDefaults() { + // Reserve headroom equal to 2x the single-array large-data threshold. + // This leaves room for the OS, the application, and at least one large + // array being constructed while the DataStructure holds existing data. const uint64 minimumRemaining = 2 * defaultValueAs(k_LargeDataSize_Key); const uint64 totalMemory = Memory::GetTotalMemory(); uint64 targetValue = totalMemory - minimumRemaining; + + // On low-memory systems where the reservation exceeds total RAM, + // fall back to using half of total RAM as the threshold if(minimumRemaining >= totalMemory) { targetValue = totalMemory / 2; @@ -320,4 +377,31 @@ void Preferences::setOocTempDirectory(const std::string& path) plugin->setOocTempDirectory(path); } } + +uint32 Preferences::oocRangeScanTimeoutSeconds() const +{ + return value(k_OocRangeScanTimeoutSeconds_Key).get(); +} + +void Preferences::setOocRangeScanTimeoutSeconds(uint32 seconds) +{ + setValue(k_OocRangeScanTimeoutSeconds_Key, seconds); +} + +uint64 Preferences::oocMemoryBudgetBytes() const +{ + // Hard-coded fallback of 8 GB. This conservative default is used when: + // 1. The user has never saved an explicit budget preference, AND + // 2. The SimplnxOoc plugin has not yet called setOocMemoryBudgetBytes() + // with its computed default (50% of system RAM). + // Using m_Values.value() (not the value() member) reads directly from + // user-set values with the fallback, bypassing the default-value layer. + static constexpr uint64 k_DefaultBudget = 8ULL * 1024 * 1024 * 1024; + return m_Values.value(k_OocMemoryBudgetBytes_Key, k_DefaultBudget); +} + +void Preferences::setOocMemoryBudgetBytes(uint64 bytes) +{ + m_Values[k_OocMemoryBudgetBytes_Key] = bytes; +} } // namespace nx::core diff --git a/src/simplnx/Core/Preferences.hpp b/src/simplnx/Core/Preferences.hpp index cec687aef0..a1022ff446 100644 --- a/src/simplnx/Core/Preferences.hpp +++ b/src/simplnx/Core/Preferences.hpp @@ -24,11 +24,49 @@ class SIMPLNX_EXPORT Preferences friend class AbstractPlugin; public: - static inline constexpr StringLiteral k_LargeDataSize_Key = "large_data_size"; // bytes - static inline constexpr StringLiteral k_PreferredLargeDataFormat_Key = "large_data_format"; // string - static inline constexpr StringLiteral k_LargeDataStructureSize_Key = "large_datastructure_size"; // bytes - static inline constexpr StringLiteral k_ForceOocData_Key = "force_ooc_data"; // boolean - static inline constexpr nx::core::StringLiteral k_OoCTempDirectory_ID = "ooc_temp_directory"; // Out-of-Core temp directory + /// @name Preference Keys + /// JSON keys used to store and retrieve preference values. These keys appear + /// in the serialized preferences.json file and are used internally by the + /// getter/setter methods below. + /// @{ + + /// Byte-size threshold above which a single DataArray is considered "large" + /// and may be written to an OOC-capable format instead of in-memory storage. + static inline constexpr StringLiteral k_LargeDataSize_Key = "large_data_size"; + + /// Name of the preferred storage format for large DataArrays (e.g., "HDF5-OOC"). + /// An empty string means "not yet configured by the user or plugin". + static inline constexpr StringLiteral k_PreferredLargeDataFormat_Key = "large_data_format"; + + /// Sentinel value for k_PreferredLargeDataFormat_Key that explicitly requests + /// in-memory storage. This is distinct from an empty string, which means + /// "not configured" and falls back to plugin-supplied defaults. + static inline constexpr StringLiteral k_InMemoryFormat = "Simplnx-Default-In-Memory"; + + /// Byte-size threshold for the entire DataStructure. When total memory usage + /// approaches this value, the application may switch to OOC storage for new arrays. + /// The default is computed dynamically by updateMemoryDefaults() based on system RAM. + static inline constexpr StringLiteral k_LargeDataStructureSize_Key = "large_datastructure_size"; + + /// Boolean flag that, when true, forces all new DataArrays to use OOC storage + /// regardless of their size. Only takes effect when an OOC format is active. + static inline constexpr StringLiteral k_ForceOocData_Key = "force_ooc_data"; + + /// Filesystem path to the directory where OOC temporary files (chunk stores, + /// backing HDF5 files) are created during filter execution. + static inline constexpr nx::core::StringLiteral k_OoCTempDirectory_ID = "ooc_temp_directory"; + + /// Timeout in seconds for the background thread that scans OOC DataArrays to + /// compute their value ranges (min/max). If the scan does not complete within + /// this window, the range is reported as unknown. Default: 30 seconds. + static inline constexpr StringLiteral k_OocRangeScanTimeoutSeconds_Key = "ooc_range_scan_timeout_seconds"; + + /// Total memory budget in bytes shared across all OOC caching subsystems + /// (chunk cache, stride cache, partition cache). The OOC memory budget manager + /// distributes this budget via global LRU eviction. Default: 8 GB. + static inline constexpr StringLiteral k_OocMemoryBudgetBytes_Key = "ooc_memory_budget_bytes"; + + /// @} /** * @brief Returns the default file path for storing preferences based on the application name. @@ -227,7 +265,15 @@ class SIMPLNX_EXPORT Preferences void setForceOocData(bool forceOoc); /** - * @brief Updates memory-related default values based on system capabilities. + * @brief Recomputes the default value for k_LargeDataStructureSize_Key based + * on the current system's total physical RAM. + * + * The target value is (totalRAM - 2 * k_LargeDataSize), which reserves + * headroom for the OS and the application itself. If the reservation would + * exceed total RAM (e.g., on a low-memory system), the fallback is totalRAM / 2. + * + * Called automatically during construction, after loadFromFile(), and after + * clear(). Can also be called explicitly after changing k_LargeDataSize_Key. */ void updateMemoryDefaults(); @@ -249,6 +295,58 @@ class SIMPLNX_EXPORT Preferences */ void setOocTempDirectory(const std::string& path); + /** + * @brief Gets the timeout for the background OOC range scan. + * + * The range scan runs on a background thread after an OOC DataArray is loaded, + * computing min/max values by reading through all chunks sequentially. If the + * scan does not complete within this timeout, the range is reported as unknown + * and the UI shows "N/A" for the array's value range. + * + * @return Timeout in seconds (default 30) + */ + uint32 oocRangeScanTimeoutSeconds() const; + + /** + * @brief Sets the timeout for the background OOC range scan. + * @param seconds Timeout value in seconds. A value of 0 effectively disables + * the range scan by expiring it immediately. + */ + void setOocRangeScanTimeoutSeconds(uint32 seconds); + + /** + * @brief Gets the total memory budget for all OOC caching subsystems. + * + * The OOC memory budget manager distributes this budget across the chunk + * cache, stride cache, and partition cache using global LRU eviction. When + * the combined memory usage of all caches exceeds this budget, the least + * recently used entries are evicted to make room for new data. + * + * The default value (8 GB) is a conservative fallback used when no plugin + * has configured a more appropriate value. On startup, the SimplnxOoc plugin + * calls OocMemoryBudgetManager::defaultBudgetBytes() (50% of system RAM) and + * sets that as the budget, unless the user has already saved an explicit + * preference via the UI. + * + * @note This reads from m_Values (user-set) directly, NOT from m_DefaultValues, + * because the default is hard-coded as a compile-time constant. + * + * @return Budget in bytes (default 8 GB if not explicitly set) + */ + uint64 oocMemoryBudgetBytes() const; + + /** + * @brief Sets the total memory budget for all OOC caching subsystems. + * + * The new budget takes effect immediately for subsequent cache eviction + * decisions. Existing cached data that exceeds the new budget will be + * evicted lazily as new cache entries are requested. + * + * @param bytes Budget in bytes. Must be > 0; passing 0 would effectively + * disable caching. + */ + void setOocMemoryBudgetBytes(uint64 bytes); + protected: /** * @brief Initializes all default preference values for the application. @@ -264,7 +362,13 @@ class SIMPLNX_EXPORT Preferences void addDefaultValues(std::string pluginName, std::string valueName, const nlohmann::json& value); /** - * @brief Checks and updates whether out-of-core mode should be used based on current settings. + * @brief Recomputes the cached m_UseOoc flag based on the current value of + * k_PreferredLargeDataFormat_Key. + * + * OOC mode is considered active when the resolved format string is non-empty + * and is not the sentinel value k_InMemoryFormat. This method is called after + * any operation that could change the format: construction, loadFromFile(), + * setLargeDataFormat(), and setDefaultLargeDataFormat(). */ void checkUseOoc(); diff --git a/src/simplnx/DataStructure/AbstractDataStore.hpp b/src/simplnx/DataStructure/AbstractDataStore.hpp index 2996fdce12..78a5140210 100644 --- a/src/simplnx/DataStructure/AbstractDataStore.hpp +++ b/src/simplnx/DataStructure/AbstractDataStore.hpp @@ -410,6 +410,64 @@ class AbstractDataStore : public IDataStore */ virtual void setValue(usize index, value_type value) = 0; + /** + * @brief Copies a contiguous range of values from this data store into the + * provided caller-owned buffer. + * + * This is the primary bulk-read API for algorithms that need to process data + * in contiguous blocks. It replaces the earlier chunk-based API and provides + * a single uniform interface that works identically for both in-memory and + * out-of-core (OOC) data stores: + * + * - **In-memory (DataStore):** Performs a direct std::copy from the backing + * array into the buffer. This is essentially zero-overhead. + * - **Out-of-core (OOC stores):** The OOC subclass translates the flat + * element range into the appropriate chunk reads from the backing HDF5 + * file, coalescing I/O where possible. The caller does not need to know + * the chunk layout. + * - **Empty (EmptyDataStore):** Returns an invalid Result<> because no data + * exists. + * + * The number of elements to copy is determined by `buffer.size()`. The caller + * is responsible for ensuring the buffer is large enough and that the range + * `[startIndex, startIndex + buffer.size())` does not exceed `getSize()`. + * + * @param startIndex The starting flat element index to read from + * @param buffer A span to receive the copied values; its size determines how + * many elements are read + * @return Result<> valid on success; invalid with an error message if the + * requested range exceeds the store's size or the store has no data. + */ + virtual Result<> copyIntoBuffer(usize startIndex, nonstd::span buffer) const = 0; + + /** + * @brief Copies values from the provided caller-owned buffer into a + * contiguous range of this data store. + * + * This is the primary bulk-write API, the write-side counterpart of + * copyIntoBuffer(). It provides a single uniform interface for both + * in-memory and out-of-core (OOC) data stores: + * + * - **In-memory (DataStore):** Performs a direct std::copy from the buffer + * into the backing array. + * - **Out-of-core (OOC stores):** The OOC subclass translates the flat + * element range into the appropriate chunk writes to the backing HDF5 + * file. + * - **Empty (EmptyDataStore):** Returns an invalid Result<> because no data + * exists. + * + * The number of elements to copy is determined by `buffer.size()`. The caller + * is responsible for ensuring the range `[startIndex, startIndex + buffer.size())` + * does not exceed `getSize()`. + * + * @param startIndex The starting flat element index to write to + * @param buffer A span containing the values to copy into the store; its + * size determines how many elements are written + * @return Result<> valid on success; invalid with an error message if the + * requested range exceeds the store's size or the store has no data. + */ + virtual Result<> copyFromBuffer(usize startIndex, nonstd::span buffer) = 0; + /** * @brief Returns the value found at the specified index of the DataStore. * This cannot be used to edit the value found at the specified index. @@ -804,132 +862,6 @@ class AbstractDataStore : public IDataStore return getValue(index); } - std::optional getChunkShape() const override - { - return {}; - } - - /** - * @brief Returns the data for a particular data chunk. Returns an empty span if the data is not chunked. - * @param chunkPosition - * @return chunk data as span - */ - virtual std::vector getChunkValues(const ShapeType& chunkPosition) const - { - return {}; - } - - /** - * @brief Returns the number of chunks used to store the data. - * @return uint64 - */ - virtual uint64 getNumberOfChunks() const = 0; - // { - // return 1; - // } - - /** - * @brief Returns the number of elements in the specified chunk index. - * @param flatChunkIndex - * @return - */ - virtual uint64 getChunkSize(uint64 flatChunkIndex) const - { - if(flatChunkIndex >= getNumberOfChunks()) - { - return 0; - } - return size(); - } - - /** - * @brief Returns the Smallest N-Dimensional tuple position included in the - * specified chunk. - * @param flatChunkIndex - * @return ShapeType - */ - virtual ShapeType getChunkLowerBounds(uint64 flatChunkIndex) const = 0; - - /** - * @brief Returns the largest N-Dimensional tuple position included in the - * specified chunk. - * @param flatChunkIndex - * @return ShapeType - */ - virtual ShapeType getChunkUpperBounds(uint64 flatChunkIndex) const = 0; - - /** - * @brief Returns the tuple shape for the specified chunk. - * Returns an empty vector if the chunk is out of bounds. - * @param flatChunkIndex - * @return std::vector chunk tuple shape - */ - virtual ShapeType getChunkTupleShape(uint64 flatChunkIndex) const - { - if(flatChunkIndex >= getNumberOfChunks()) - { - return ShapeType(); - } - auto lowerBounds = getChunkLowerBounds(flatChunkIndex); - auto upperBounds = getChunkUpperBounds(flatChunkIndex); - - const usize tupleCount = lowerBounds.size(); - ShapeType chunkTupleShape(tupleCount); - for(usize i = 0; i < tupleCount; i++) - { - chunkTupleShape[i] = upperBounds[i] - lowerBounds[i] + 1; - } - return chunkTupleShape; - } - - /** - * @brief Returns a vector containing the tuple extents for a specified chunk. - * The returned values are formatted as [min, max] in the order of the tuple - * dimensions. For instance, a single chunk with tuple dimensions {X, Y, Z} - * will result in an extent of [0, X-1, 0, Y-1, 0, Z-1]. - * Returns an empty vector if the chunk requested is beyond the scope of the - * available chunks. - * @param flatChunkIndex - * @return std::vector extents - */ - std::vector getChunkExtents(uint64 flatChunkIndex) const - { - if(flatChunkIndex >= getNumberOfChunks()) - { - return std::vector(); - } - - usize tupleDims = getTupleShape().size(); - std::vector extents(tupleDims * 2); - - auto upperBounds = getChunkUpperBounds(flatChunkIndex); - auto lowerBounds = getChunkLowerBounds(flatChunkIndex); - - for(usize i = 0; i < tupleDims; i++) - { - extents[i * 2] = lowerBounds[i]; - extents[i * 2 + 1] = upperBounds[i]; - } - - return extents; - } - - /** - * @brief Makes sure the target chunk is loaded in memory. - * This method does nothing for in-memory DataStores. - * @param flatChunkIndex - */ - virtual void loadChunk(uint64 flatChunkIndex) - { - } - - /** - * @brief Creates and returns an in-memory AbstractDataStore from a copy of the data - * from the specified chunk. - * @param flatChunkIndex - */ - virtual std::unique_ptr> convertChunkToDataStore(uint64 flatChunkIndex) const = 0; - /** * @brief Flushes the data store to its respective target. * In-memory DataStores are not affected. diff --git a/src/simplnx/DataStructure/AbstractStringStore.hpp b/src/simplnx/DataStructure/AbstractStringStore.hpp index eb2b3760c8..1af0cc2e48 100644 --- a/src/simplnx/DataStructure/AbstractStringStore.hpp +++ b/src/simplnx/DataStructure/AbstractStringStore.hpp @@ -2,6 +2,7 @@ #include "simplnx/Common/Aliases.hpp" #include "simplnx/Common/Types.hpp" +#include "simplnx/simplnx_export.hpp" #include #include @@ -9,7 +10,23 @@ namespace nx::core { -class AbstractStringStore +/** + * @class AbstractStringStore + * @brief Abstract base class for string storage backends used by StringArray. + * + * AbstractStringStore defines the interface for storing and accessing an + * ordered collection of strings, organized by tuple shape. Concrete + * subclasses include: + * + * - **StringStore** -- The real, in-memory store that owns a + * `std::vector` and supports full read/write access. + * - **EmptyStringStore** -- A metadata-only placeholder that records + * tuple shape but holds no data. All data access methods throw. + * + * The isPlaceholder() virtual method allows callers to distinguish + * between these two cases without dynamic_cast. + */ +class SIMPLNX_EXPORT AbstractStringStore { public: using value_type = std::string; @@ -323,7 +340,12 @@ class AbstractStringStore using iterator = Iterator; using const_iterator = ConstIterator; - ~AbstractStringStore() = default; + /** + * @brief Virtual destructor. Ensures correct cleanup when deleting through + * a base class pointer, which is the normal ownership pattern since + * StringArray holds an AbstractStringStore via std::unique_ptr. + */ + virtual ~AbstractStringStore() = default; /** * @brief Creates a deep copy of this AbstractStringStore. @@ -343,6 +365,23 @@ class AbstractStringStore */ virtual bool empty() const = 0; + /** + * @brief Checks whether this store is a metadata-only placeholder that + * holds no real string data. + * + * This method exists so that import/backfill logic can identify which + * StringArray objects in a DataStructure still need their data loaded + * without resorting to dynamic_cast. The two concrete subclasses return + * fixed values: + * + * - **StringStore::isPlaceholder()** returns `false` (data is present). + * - **EmptyStringStore::isPlaceholder()** returns `true` (no data; + * accessing elements will throw). + * + * @return true if this store is a placeholder with no accessible data + */ + virtual bool isPlaceholder() const = 0; + /** * @brief Returns the number of tuples in the StringStore. * @return usize diff --git a/src/simplnx/DataStructure/DataStore.hpp b/src/simplnx/DataStructure/DataStore.hpp index cb0e5cb5c2..e3b6dd9357 100644 --- a/src/simplnx/DataStructure/DataStore.hpp +++ b/src/simplnx/DataStructure/DataStore.hpp @@ -11,10 +11,12 @@ #include #include #include +#include #include #include #include #include +#include #include namespace nx::core @@ -58,8 +60,8 @@ class DataStore : public AbstractDataStore : parent_type() , m_ComponentShape(componentShape) , m_TupleShape(tupleShape) - , m_NumComponents(std::accumulate(m_ComponentShape.cbegin(), m_ComponentShape.cend(), static_cast(1), std::multiplies<>())) - , m_NumTuples(std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumComponents(std::accumulate(m_ComponentShape.cbegin(), m_ComponentShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumTuples(std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>())) , m_InitValue(initValue) { resizeTuples(m_TupleShape); @@ -80,8 +82,8 @@ class DataStore : public AbstractDataStore , m_ComponentShape(std::move(componentShape)) , m_TupleShape(std::move(tupleShape)) , m_Data(std::move(buffer)) - , m_NumComponents(std::accumulate(m_ComponentShape.cbegin(), m_ComponentShape.cend(), static_cast(1), std::multiplies<>())) - , m_NumTuples(std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumComponents(std::accumulate(m_ComponentShape.cbegin(), m_ComponentShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumTuples(std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>())) { // Because no init value is passed into the constructor, we will use a "mudflap" style value that is easy to debug. m_InitValue = GetMudflap(); @@ -208,6 +210,25 @@ class DataStore : public AbstractDataStore return IDataStore::StoreType::InMemory; } + /** + * @brief Returns recovery metadata for an in-memory store. + * + * In-memory DataStores have no backing file or external state, so the + * recovery file's HDF5 dataset for this array contains all the data + * needed to reconstruct the store. No extra key-value attributes are + * required, so this returns an empty map. + * + * Out-of-core store subclasses override this to return the file path, + * dataset path, chunk shape, etc. needed to reattach to their backing + * storage after a crash. + * + * @return std::map Empty map. + */ + std::map getRecoveryMetadata() const override + { + return {}; + } + /** * @brief This method copies a value to the member variable m_InitValue */ @@ -240,7 +261,7 @@ class DataStore : public AbstractDataStore auto oldSize = this->getSize(); // Calculate the total number of values in the new array m_TupleShape = tupleShape; - m_NumTuples = std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>()); + m_NumTuples = std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>()); usize newSize = getNumberOfComponents() * m_NumTuples; @@ -299,6 +320,60 @@ class DataStore : public AbstractDataStore m_Data.get()[index] = value; } + /** + * @brief Copies a contiguous range of values from this in-memory data store + * into the caller-provided buffer. For the in-memory DataStore this is a + * simple bounds-checked std::copy from the raw backing array. + * + * @param startIndex The starting flat element index to read from + * @param buffer A span to receive the copied values; its size determines how + * many elements are read + * @return Result<> valid on success; invalid if `[startIndex, startIndex + buffer.size())` + * exceeds getSize(). + */ + Result<> copyIntoBuffer(usize startIndex, nonstd::span buffer) const override + { + const usize count = buffer.size(); + + // Bounds check: ensure the requested range fits within the store + if(startIndex + count > this->getSize()) + { + return MakeErrorResult(-6020, fmt::format("DataStore bulk read failed: requested range [{}, {}) exceeds store size ({}). Requested {} elements starting at index {}.", startIndex, + startIndex + count, this->getSize(), count, startIndex)); + } + + // Direct memory copy from the contiguous backing array into the caller's buffer + std::copy(m_Data.get() + startIndex, m_Data.get() + startIndex + count, buffer.data()); + return {}; + } + + /** + * @brief Copies values from the caller-provided buffer into a contiguous + * range of this in-memory data store. For the in-memory DataStore this is + * a simple bounds-checked std::copy into the raw backing array. + * + * @param startIndex The starting flat element index to write to + * @param buffer A span containing the values to write; its size determines + * how many elements are written + * @return Result<> valid on success; invalid if `[startIndex, startIndex + buffer.size())` + * exceeds getSize(). + */ + Result<> copyFromBuffer(usize startIndex, nonstd::span buffer) override + { + const usize count = buffer.size(); + + // Bounds check: ensure the requested range fits within the store + if(startIndex + count > this->getSize()) + { + return MakeErrorResult(-6021, fmt::format("DataStore bulk write failed: requested range [{}, {}) exceeds store size ({}). Requested {} elements starting at index {}.", startIndex, + startIndex + count, this->getSize(), count, startIndex)); + } + + // Direct memory copy from the caller's buffer into the contiguous backing array + std::copy(buffer.begin(), buffer.end(), m_Data.get() + startIndex); + return {}; + } + /** * @brief Returns the value found at the specified index of sthe DataStore. * This cannot be used to edit the value found at the specified index. @@ -579,80 +654,12 @@ class DataStore : public AbstractDataStore return dataset.writeSpan(dims, span); } - /** - * @brief Creates and returns an in-memory AbstractDataStore from a copy of the data - * from the specified chunk. - * @param flatChunkIndex - */ - std::unique_ptr> convertChunkToDataStore(uint64 flatChunkIndex) const override - { - if(flatChunkIndex >= this->getNumberOfChunks()) - { - return nullptr; - } - - std::unique_ptr dataWrapper = std::make_unique_for_overwrite(this->getSize()); - std::copy(this->begin(), this->end(), dataWrapper.get()); - - return std::make_unique>(std::move(dataWrapper), this->getTupleShape(), this->getComponentShape()); - } - - /** - * @brief Returns the number of chunks used to store the data. - * For in-memory DataStore, this is always 1. - * @return uint64 The number of chunks (always 1 for in-memory storage) - */ - uint64 getNumberOfChunks() const override - { - return 1; - } - - /** - * @brief Returns the Smallest N-Dimensional tuple position included in the - * specified chunk. - * @param flatChunkIndex - * @return ShapeType - */ - ShapeType getChunkLowerBounds(uint64 flatChunkIndex) const override - { - if(flatChunkIndex >= getNumberOfChunks()) - { - return ShapeType(); - } - usize tupleDims = getTupleShape().size(); - - ShapeType lowerBounds(tupleDims); - std::fill(lowerBounds.begin(), lowerBounds.end(), 0); - return lowerBounds; - } - - /** - * @brief Returns the largest N-Dimensional tuple position included in the - * specified chunk. - * @param flatChunkIndex - * @return ShapeType - */ - ShapeType getChunkUpperBounds(uint64 flatChunkIndex) const override - { - if(flatChunkIndex >= getNumberOfChunks()) - { - return ShapeType(); - } - - ShapeType upperBounds(getTupleShape()); - for(auto& value : upperBounds) - { - value -= 1; - } - return upperBounds; - } - private: ShapeType m_ComponentShape; ShapeType m_TupleShape; std::unique_ptr m_Data = nullptr; - size_t m_NumComponents = {0}; - size_t m_NumTuples = {0}; + usize m_NumComponents = {0}; + usize m_NumTuples = {0}; std::optional m_InitValue; }; diff --git a/src/simplnx/DataStructure/EmptyDataStore.hpp b/src/simplnx/DataStructure/EmptyDataStore.hpp index dd2cb44a16..eb8c5c3d9a 100644 --- a/src/simplnx/DataStructure/EmptyDataStore.hpp +++ b/src/simplnx/DataStructure/EmptyDataStore.hpp @@ -4,8 +4,10 @@ #include +#include #include #include +#include #include namespace nx::core @@ -38,8 +40,8 @@ class EmptyDataStore : public AbstractDataStore EmptyDataStore(const ShapeType& tupleShape, const ShapeType& componentShape, std::string dataFormat = "") : m_ComponentShape(componentShape) , m_TupleShape(tupleShape) - , m_NumComponents(std::accumulate(m_ComponentShape.cbegin(), m_ComponentShape.cend(), static_cast(1), std::multiplies<>())) - , m_NumTuples(std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumComponents(std::accumulate(m_ComponentShape.cbegin(), m_ComponentShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumTuples(std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>())) , m_DataFormat(dataFormat) { } @@ -85,7 +87,7 @@ class EmptyDataStore : public AbstractDataStore * @brief Returns the target tuple getSize. * @return usize */ - size_t getNumberOfComponents() const override + usize getNumberOfComponents() const override { return m_NumComponents; } @@ -109,17 +111,45 @@ class EmptyDataStore : public AbstractDataStore } /** - * @brief Returns the store type e.g. in memory, out of core, etc. - * @return StoreType + * @brief Returns StoreType::Empty because this store is a metadata-only + * placeholder. The dataFormat() string records the intended storage + * strategy (e.g., "" for in-memory, or a named OOC format) so the + * framework knows what real store to create when execution begins. + * @return StoreType::Empty */ IDataStore::StoreType getStoreType() const override { - return m_DataFormat.empty() ? IDataStore::StoreType::Empty : IDataStore::StoreType::EmptyOutOfCore; + return IDataStore::StoreType::Empty; } /** - * @brief Checks and returns if the created data store should be in memory or handled out of core. - * @return bool + * @brief Throws — EmptyDataStore is a metadata-only placeholder. + * + * EmptyDataStore holds no data and no backing file, so it has no + * recovery metadata to report. Calling getRecoveryMetadata() on an + * EmptyDataStore is a programming error: the caller is treating a + * placeholder as if it were a real store. The real store that + * replaces this placeholder during execution is the one responsible + * for providing recovery metadata. + * + * Throws std::runtime_error to fail fast, matching the behavior of + * the other data-access methods on this class. + */ + std::map getRecoveryMetadata() const override + { + throw std::runtime_error("EmptyDataStore::getRecoveryMetadata: cannot query recovery metadata on a placeholder store"); + } + + /** + * @brief Returns the data format string that was specified at construction. + * + * This string indicates the intended storage strategy for the real data + * store that will replace this EmptyDataStore after preflight: + * - An empty string ("") means the data will be stored in-memory (DataStore). + * - A non-empty string names an out-of-core format (e.g., an OOC store + * implementation) that should be used for execution. + * + * @return std::string The data format identifier */ std::string dataFormat() const { @@ -158,6 +188,36 @@ class EmptyDataStore : public AbstractDataStore throw std::runtime_error("EmptyDataStore::setValue() is not implemented"); } + /** + * @brief Always returns an invalid Result because EmptyDataStore holds no + * data. EmptyDataStore is a metadata-only placeholder used during preflight; + * bulk data access is not supported. The store must be replaced with a real + * DataStore or OOC store before any data I/O is attempted. + * @param startIndex Unused + * @param buffer Unused + * @return Invalid Result<> — always. + */ + Result<> copyIntoBuffer(usize startIndex, nonstd::span buffer) const override + { + return MakeErrorResult(-6022, "EmptyDataStore bulk read is not supported: EmptyDataStore is a metadata-only placeholder used during preflight and must be replaced with a real DataStore or " + "out-of-core store before bulk I/O is attempted."); + } + + /** + * @brief Always returns an invalid Result because EmptyDataStore holds no + * data. EmptyDataStore is a metadata-only placeholder used during preflight; + * bulk data access is not supported. The store must be replaced with a real + * DataStore or OOC store before any data I/O is attempted. + * @param startIndex Unused + * @param buffer Unused + * @return Invalid Result<> — always. + */ + Result<> copyFromBuffer(usize startIndex, nonstd::span buffer) override + { + return MakeErrorResult(-6023, "EmptyDataStore bulk write is not supported: EmptyDataStore is a metadata-only placeholder used during preflight and must be replaced with a real DataStore or " + "out-of-core store before bulk I/O is attempted."); + } + /** * @brief Throws an exception because this should never be called. The * EmptyDataStore class contains no data other than its target getSize. @@ -347,50 +407,11 @@ class EmptyDataStore : public AbstractDataStore return MakeErrorResult(-42350, "Cannot write data from an EmptyDataStore"); } - /** - * @brief Creates and returns an in-memory AbstractDataStore from a copy of the data - * from the specified chunk. - * @param flatChunkIndex - */ - std::unique_ptr> convertChunkToDataStore(uint64 flatChunkIndex) const override - { - return nullptr; - } - - /** - * @brief Returns empty bounds because EmptyDataStore has no chunks. - * @param flatChunkIndex The chunk index (unused) - * @return ShapeType Empty shape vector - */ - ShapeType getChunkLowerBounds(uint64 flatChunkIndex) const override - { - return {}; - } - - /** - * @brief Returns empty bounds because EmptyDataStore has no chunks. - * @param flatChunkIndex The chunk index (unused) - * @return ShapeType Empty shape vector - */ - ShapeType getChunkUpperBounds(uint64 flatChunkIndex) const override - { - return {}; - } - - /** - * @brief Returns the number of chunks in the EmptyDataStore. - * @return uint64 Always returns 0 because EmptyDataStore has no data - */ - uint64 getNumberOfChunks() const override - { - return 0; - } - private: ShapeType m_ComponentShape; ShapeType m_TupleShape; - size_t m_NumComponents = {0}; - size_t m_NumTuples = {0}; + usize m_NumComponents = {0}; + usize m_NumTuples = {0}; std::string m_DataFormat = ""; }; } // namespace nx::core diff --git a/src/simplnx/DataStructure/EmptyStringStore.hpp b/src/simplnx/DataStructure/EmptyStringStore.hpp new file mode 100644 index 0000000000..14ffcbc4df --- /dev/null +++ b/src/simplnx/DataStructure/EmptyStringStore.hpp @@ -0,0 +1,207 @@ +#pragma once + +#include "AbstractStringStore.hpp" + +#include +#include +#include + +namespace nx::core +{ +/** + * @class EmptyStringStore + * @brief A metadata-only placeholder for AbstractStringStore, analogous to + * EmptyDataStore for numeric arrays. + * + * EmptyStringStore records tuple shape (number and layout of strings) but + * holds no actual string data. It exists because: + * + * 1. **Preflight-style imports:** When loading a .dream3d file's + * DataStructure in metadata-only mode (e.g., for file inspection or + * pipeline validation), StringArray objects need a store that reports + * correct tuple counts without allocating or reading string data. + * + * 2. **Out-of-core workflows:** When the OOC import path builds the + * DataStructure skeleton, string arrays are initially populated with + * EmptyStringStore instances. A subsequent backfill step replaces each + * EmptyStringStore with a real StringStore that contains the loaded + * data. + * + * All data access methods (operator[], at, getValue, setValue, operator=) + * throw std::runtime_error to fail fast if code accidentally tries to read + * or write string data before the backfill step has run. + * + * @see StringStore The concrete store that holds real string data. + * @see EmptyDataStore The equivalent placeholder for numeric DataArrays. + */ +class SIMPLNX_EXPORT EmptyStringStore : public AbstractStringStore +{ +public: + /** + * @brief Default constructor. + */ + EmptyStringStore() = default; + + /** + * @brief Constructs an EmptyStringStore with the specified tuple shape. + * @param tupleShape The shape of the tuple dimensions + */ + EmptyStringStore(const ShapeType& tupleShape) + : AbstractStringStore() + , m_TupleShape(tupleShape) + , m_NumTuples(std::accumulate(tupleShape.cbegin(), tupleShape.cend(), static_cast(1), std::multiplies<>())) + { + } + + /** + * @brief Copy constructor. + * @param rhs The EmptyStringStore to copy from + */ + EmptyStringStore(const EmptyStringStore& rhs) = default; + + /** + * @brief Move constructor. + * @param rhs The EmptyStringStore to move from + */ + EmptyStringStore(EmptyStringStore&& rhs) = default; + + ~EmptyStringStore() override = default; + + /** + * @brief Creates a deep copy of this EmptyStringStore. + * @return std::unique_ptr Unique pointer to the deep copy + */ + std::unique_ptr deepCopy() const override + { + return std::make_unique(*this); + } + + /** + * @brief Returns the total number of strings in the store (equal to the number of tuples). + * @return usize The number of strings + */ + usize size() const override + { + return m_NumTuples; + } + + /** + * @brief Returns whether the string store is empty. + * @return bool True if the store has no strings, false otherwise + */ + bool empty() const override + { + return m_NumTuples == 0; + } + + /** + * @brief Returns the number of tuples in the EmptyStringStore. + * @return usize + */ + usize getNumberOfTuples() const override + { + return m_NumTuples; + } + + /** + * @brief Returns the dimensions of the Tuples + * @return + */ + const ShapeType& getTupleShape() const override + { + return m_TupleShape; + } + + /** + * @brief Resizes the string store to the specified tuple shape. + * @param tupleShape The new shape of the tuple dimensions + */ + void resizeTuples(const ShapeType& tupleShape) override + { + m_TupleShape = tupleShape; + m_NumTuples = std::accumulate(m_TupleShape.cbegin(), m_TupleShape.cend(), static_cast(1), std::multiplies<>()); + } + + /** + * @brief Returns true because EmptyStringStore is a metadata-only + * placeholder that holds no actual string data. + * + * Code that needs to distinguish between a real StringStore (which has + * accessible data) and an EmptyStringStore (which will throw on access) + * should call isPlaceholder() rather than using dynamic_cast. This is + * used by the backfill/import logic to identify which stores still need + * their data loaded. + * + * @return true Always returns true for EmptyStringStore. + */ + bool isPlaceholder() const override + { + return true; + } + + /** + * @brief Throws an error because EmptyStringStore has no data. + * @param index The index (unused) + * @throw std::runtime_error Always throws because EmptyStringStore has no data + */ + reference operator[](usize index) override + { + throw std::runtime_error("EmptyStringStore::operator[] called on placeholder store - data not loaded yet"); + } + + /** + * @brief Throws an error because EmptyStringStore has no data. + * @param index The index (unused) + * @throw std::runtime_error Always throws because EmptyStringStore has no data + */ + const_reference operator[](usize index) const override + { + throw std::runtime_error("EmptyStringStore::operator[] called on placeholder store - data not loaded yet"); + } + + /** + * @brief Throws an error because EmptyStringStore has no data. + * @param index The index (unused) + * @throw std::runtime_error Always throws because EmptyStringStore has no data + */ + const_reference at(usize index) const override + { + throw std::runtime_error("EmptyStringStore::at() called on placeholder store - data not loaded yet"); + } + + /** + * @brief Throws an error because EmptyStringStore has no data. + * @param index The index (unused) + * @throw std::runtime_error Always throws because EmptyStringStore has no data + */ + const_reference getValue(usize index) const override + { + throw std::runtime_error("EmptyStringStore::getValue() called on placeholder store - data not loaded yet"); + } + + /** + * @brief Throws an error because EmptyStringStore has no data. + * @param index The index (unused) + * @param value The value to set (unused) + * @throw std::runtime_error Always throws because EmptyStringStore has no data + */ + void setValue(usize index, const value_type& value) override + { + throw std::runtime_error("EmptyStringStore::setValue() called on placeholder store - data not loaded yet"); + } + + /** + * @brief Throws an error because EmptyStringStore cannot accept data assignments. + * @param values Vector of strings to assign (unused) + * @throw std::runtime_error Always throws because EmptyStringStore has no data + */ + AbstractStringStore& operator=(const std::vector& values) override + { + throw std::runtime_error("EmptyStringStore::operator= called on placeholder store - data not loaded yet"); + } + +private: + ShapeType m_TupleShape; + usize m_NumTuples = 0; +}; +} // namespace nx::core diff --git a/src/simplnx/DataStructure/IDataStore.hpp b/src/simplnx/DataStructure/IDataStore.hpp index 4f644f0227..dcc250cad7 100644 --- a/src/simplnx/DataStructure/IDataStore.hpp +++ b/src/simplnx/DataStructure/IDataStore.hpp @@ -6,8 +6,8 @@ #include #include +#include #include -#include #include #include @@ -23,12 +23,35 @@ namespace nx::core class SIMPLNX_EXPORT IDataStore { public: + /** + * @brief Identifies how a data store manages its backing storage. + * + * Algorithms and I/O routines use this enum to determine whether data is + * immediately accessible in RAM or must be fetched from disk, and to + * distinguish real stores from preflight-only placeholders. + * + * - **InMemory** -- The store's data lives in a heap-allocated array that is + * always resident in RAM (DataStore). Element access via getValue/setValue + * and the bulk copyIntoBuffer/copyFromBuffer API are both cheap memory + * copies. + * + * - **OutOfCore** -- The store's data lives on disk in a chunked HDF5 dataset. + * Element access goes through chunk caching; the bulk copyIntoBuffer/ + * copyFromBuffer API translates flat ranges into efficient multi-chunk I/O. + * An earlier "EmptyOutOfCore" value was removed because the Empty type + * already covers placeholder semantics regardless of the eventual storage + * strategy. + * + * - **Empty** -- A metadata-only placeholder used during preflight + * (EmptyDataStore). Records tuple/component shape but holds no data. + * All data access methods throw. After preflight the Empty store is + * replaced with an InMemory or OutOfCore store before execution begins. + */ enum class StoreType : int32 { - InMemory = 0, - OutOfCore, - Empty, - EmptyOutOfCore + InMemory = 0, ///< Data is fully resident in a heap-allocated array (DataStore) + OutOfCore, ///< Data lives on disk in a chunked HDF5 dataset + Empty ///< Metadata-only placeholder used during preflight (EmptyDataStore) }; virtual ~IDataStore() = default; @@ -56,13 +79,6 @@ class SIMPLNX_EXPORT IDataStore */ virtual const ShapeType& getComponentShape() const = 0; - /** - * @brief Returns the chunk shape if the DataStore is separated into chunks. - * If the DataStore does not have chunks, this method returns a null optional. - * @return optional Shapetype - */ - virtual std::optional getChunkShape() const = 0; - /** * @brief Returns the number of values stored within the DataStore. * @return usize @@ -117,6 +133,33 @@ class SIMPLNX_EXPORT IDataStore return ""; } + /** + * @brief Returns store-specific metadata needed for crash recovery. + * + * When the pipeline runner writes a recovery (.dream3d) file at + * the end of pipeline execution, it calls this method on every + * data store to capture whatever information is needed to reconnect + * the store to its data after a crash or unexpected termination. + * + * **In-memory stores (DataStore)** return an empty map because their + * data is written directly into the recovery file's HDF5 datasets; + * no extra metadata is required. + * + * **Out-of-core stores** return key-value pairs describing their + * backing file path, HDF5 dataset path, chunk shape, and any other + * parameters needed to reconstruct the OOC store from the file on + * disk. + * + * Each key-value pair is written as an HDF5 string attribute on the + * array's dataset inside the recovery file. The recovery loader + * reads these attributes to reconstruct the appropriate store + * subclass without loading the data into RAM. + * + * @return std::map Key-value pairs of + * recovery metadata. Empty for in-memory stores. + */ + virtual std::map getRecoveryMetadata() const = 0; + /** * @brief Returns the size of the stored type of the data store. * @return usize diff --git a/src/simplnx/DataStructure/IO/Generic/CoreDataIOManager.cpp b/src/simplnx/DataStructure/IO/Generic/CoreDataIOManager.cpp index 4b92eff22a..f56fbd493d 100644 --- a/src/simplnx/DataStructure/IO/Generic/CoreDataIOManager.cpp +++ b/src/simplnx/DataStructure/IO/Generic/CoreDataIOManager.cpp @@ -1,5 +1,6 @@ #include "CoreDataIOManager.hpp" +#include "simplnx/Core/Preferences.hpp" #include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/ListStore.hpp" @@ -17,7 +18,11 @@ CoreDataIOManager::~CoreDataIOManager() noexcept = default; std::string CoreDataIOManager::formatName() const { - return ""; + // The core in-memory manager uses the reserved k_InMemoryFormat constant so + // that "in-memory" is distinct from "unset" (empty string). Callers that want + // explicit in-memory storage should pass k_InMemoryFormat; callers that pass + // "" are signaling "unset/auto — let the resolver decide". + return std::string(Preferences::k_InMemoryFormat); } void CoreDataIOManager::addCoreFactories() diff --git a/src/simplnx/DataStructure/IO/Generic/DataIOCollection.cpp b/src/simplnx/DataStructure/IO/Generic/DataIOCollection.cpp index da9de2fc6c..a2fe2a8a59 100644 --- a/src/simplnx/DataStructure/IO/Generic/DataIOCollection.cpp +++ b/src/simplnx/DataStructure/IO/Generic/DataIOCollection.cpp @@ -1,27 +1,52 @@ #include "DataIOCollection.hpp" -#include "simplnx/Core/Application.hpp" +#include "simplnx/Core/Preferences.hpp" +#include "simplnx/DataStructure/AbstractStringStore.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" #include "simplnx/DataStructure/IO/Generic/CoreDataIOManager.hpp" #include "simplnx/DataStructure/IO/Generic/IDataIOManager.hpp" #include "simplnx/DataStructure/IO/HDF5/DataIOManager.hpp" +#include "simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp" +#include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" + +namespace fs = std::filesystem; namespace nx::core { DataIOCollection::DataIOCollection() { - addIOManager(std::make_shared()); - addIOManager(std::make_shared()); + // Register the built-in CoreDataIOManager directly into the map. The core + // manager's formatName() is k_InMemoryFormat, which is a reserved name that + // addIOManager() rejects for plugin registrations — so we bypass that + // validation here by writing to m_ManagerMap directly. This is the only + // place in the codebase permitted to register a manager under the reserved + // in-memory format name. + auto coreManager = std::make_shared(); + m_ManagerMap[coreManager->formatName()] = coreManager; + + // HDF5 format name is not reserved, so this cannot fail. + (void)addIOManager(std::make_shared()); } DataIOCollection::~DataIOCollection() noexcept = default; -void DataIOCollection::addIOManager(std::shared_ptr manager) +Result<> DataIOCollection::addIOManager(std::shared_ptr manager) { if(manager == nullptr) { - return; + return MakeErrorResult(-6010, "Cannot register a null IDataIOManager"); + } + + const std::string& name = manager->formatName(); + // k_InMemoryFormat is reserved for the built-in CoreDataIOManager, which is + // registered directly by the constructor. Any other attempt to register + // under this name (e.g., from a plugin) is rejected. + if(name == Preferences::k_InMemoryFormat) + { + return MakeErrorResult(-6011, fmt::format("Cannot register an I/O manager with the reserved format name '{}'", std::string(Preferences::k_InMemoryFormat))); } - m_ManagerMap[manager->formatName()] = manager; + m_ManagerMap[name] = manager; + return {}; } std::shared_ptr DataIOCollection::getManager(const std::string& formatName) const @@ -55,8 +80,11 @@ std::unique_ptr DataIOCollection::createDataStore(const std::string& } } - nx::core::Generic::CoreDataIOManager coreManager; - return coreManager.dataStoreCreationFnc(coreManager.formatName())(dataType, tupleShape, componentShape, {}); + // Fallback: no registered manager claimed @p type, so default to in-memory + // storage. The built-in CoreDataIOManager is always registered under + // k_InMemoryFormat by our constructor, so .at() is guaranteed to succeed. + const auto& coreManager = m_ManagerMap.at(std::string(Preferences::k_InMemoryFormat)); + return coreManager->dataStoreCreationFnc(coreManager->formatName())(dataType, tupleShape, componentShape, {}); } std::unique_ptr DataIOCollection::createListStore(const std::string& type, DataType dataType, const ShapeType& tupleShape) const @@ -69,23 +97,188 @@ std::unique_ptr DataIOCollection::createListStore(const std::string& } } - nx::core::Generic::CoreDataIOManager coreManager; - return coreManager.listStoreCreationFnc(coreManager.formatName())(dataType, tupleShape); + // Fallback: see createDataStore for rationale. Core manager is always present. + const auto& coreManager = m_ManagerMap.at(std::string(Preferences::k_InMemoryFormat)); + return coreManager->listStoreCreationFnc(coreManager->formatName())(dataType, tupleShape); +} + +// --------------------------------------------------------------------------- +// Read-only reference DataStore creation (OOC DataArray support) +// --------------------------------------------------------------------------- + +bool DataIOCollection::hasReadOnlyRefCreationFnc(const std::string& type) const +{ + // Search all registered IO managers for one that can create OOC DataStores + // for the requested format name. + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasReadOnlyRefCreationFnc(type)) + { + return true; + } + } + return false; +} + +std::unique_ptr DataIOCollection::createReadOnlyRefStore(const std::string& type, DataType numericType, const fs::path& filePath, const std::string& datasetPath, + const ShapeType& tupleShape, const ShapeType& componentShape, const ShapeType& chunkShape) +{ + // Find the first IO manager that has a factory for the requested format and + // delegate the actual store construction to it. The factory will create a + // chunk-backed read-only store pointing at the HDF5 dataset. + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasReadOnlyRefCreationFnc(type)) + { + auto fnc = ioManager->readOnlyRefCreationFnc(type); + return fnc(numericType, filePath, datasetPath, tupleShape, componentShape, chunkShape); + } + } + // No IO manager supports this format -- return nullptr so the caller can + // fall back to a different strategy (or report an error). + return nullptr; +} + +// --------------------------------------------------------------------------- +// Read-only reference ListStore creation (OOC NeighborList support) +// --------------------------------------------------------------------------- + +bool DataIOCollection::hasReadOnlyRefListCreationFnc(const std::string& type) const +{ + // Search all registered IO managers for one that can create OOC ListStores + // for the requested format name. + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasListStoreRefCreationFnc(type)) + { + return true; + } + } + return false; +} + +std::unique_ptr DataIOCollection::createReadOnlyRefListStore(const std::string& type, DataType numericType, const fs::path& filePath, const std::string& datasetPath, + const ShapeType& tupleShape, const ShapeType& chunkShape) +{ + // Find the first IO manager that has a ListStore reference factory for the + // requested format and delegate construction. + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasListStoreRefCreationFnc(type)) + { + auto fnc = ioManager->listStoreRefCreationFnc(type); + return fnc(numericType, filePath, datasetPath, tupleShape, chunkShape); + } + } + // No IO manager supports this format for list stores. + return nullptr; +} + +// --------------------------------------------------------------------------- +// StringStore creation +// --------------------------------------------------------------------------- + +bool DataIOCollection::hasStringStoreCreationFnc(const std::string& type) const +{ + // Search all registered IO managers for one that provides a StringStore + // factory for the requested format name. + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasStringStoreCreationFnc(type)) + { + return true; + } + } + return false; +} + +std::unique_ptr DataIOCollection::createStringStore(const std::string& type, const ShapeType& tupleShape) +{ + // Find the first IO manager that has a StringStore factory for the requested + // format and delegate construction. + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasStringStoreCreationFnc(type)) + { + auto fnc = ioManager->stringStoreCreationFnc(type); + return fnc(tupleShape); + } + } + // No IO manager supports this format for string stores. + return nullptr; +} + +// --------------------------------------------------------------------------- +// Post-pipeline store finalization +// --------------------------------------------------------------------------- + +void DataIOCollection::finalizeStores(DataStructure& dataStructure) +{ + // Iterate over every registered IO manager and invoke its finalize callback + // if one exists. Each callback transitions its own stores from write mode to + // read-only mode (e.g., closing HDF5 write handles, opening read handles). + for(const auto& [ioType, ioManager] : m_ManagerMap) + { + if(ioManager->hasFinalizeStoresFnc(ioType)) + { + ioManager->finalizeStoresFnc(ioType)(dataStructure); + } + } +} + +// --------------------------------------------------------------------------- +// Format resolver hook (decides in-memory vs OOC for new arrays) +// --------------------------------------------------------------------------- + +void DataIOCollection::setFormatResolver(FormatResolverFnc resolver) +{ + // Store the callback. An empty std::function effectively disables the resolver. + m_FormatResolver = std::move(resolver); } -void DataIOCollection::checkStoreDataFormat(uint64 dataSize, std::string& dataFormat) const +bool DataIOCollection::hasFormatResolver() const { - if(!dataFormat.empty()) + return static_cast(m_FormatResolver); +} + +std::string DataIOCollection::resolveFormat(const DataStructure& dataStructure, const DataPath& arrayPath, DataType numericType, uint64 dataSizeBytes) const +{ + // If no resolver is installed, return "" to signal the default in-memory format. + if(!m_FormatResolver) { - return; + return ""; } - const Preferences* preferences = Application::GetOrCreateInstance()->getPreferences(); - const uint64 largeDataSize = preferences->valueAs(Preferences::k_LargeDataSize_Key); - const std::string largeDataFormat = preferences->valueAs(Preferences::k_PreferredLargeDataFormat_Key); - if(dataSize > largeDataSize && hasDataStoreCreationFunction(largeDataFormat)) + // Delegate to the plugin-provided resolver. It returns a format name like + // "HDF5-OOC" for large arrays, or "" to keep the default in-memory store. + return m_FormatResolver(dataStructure, arrayPath, numericType, dataSizeBytes); +} + +// --------------------------------------------------------------------------- +// Data store import handler hook (loads all data stores after .dream3d import) +// --------------------------------------------------------------------------- + +void DataIOCollection::setDataStoreImportHandler(DataStoreImportHandlerFnc handler) +{ + // Store the callback. An empty std::function effectively disables the handler. + m_DataStoreImportHandler = std::move(handler); +} + +bool DataIOCollection::hasDataStoreImportHandler() const +{ + return static_cast(m_DataStoreImportHandler); +} + +Result<> DataIOCollection::runDataStoreImportHandler(DataStructure& dataStructure, const std::vector& paths, const nx::core::HDF5::FileIO& fileReader, EagerLoadFnc eagerLoad) +{ + // If no handler is installed, this is the in-memory code path: placeholders + // will be replaced elsewhere (or eagerly loaded). Return success. + if(!m_DataStoreImportHandler) { - dataFormat = largeDataFormat; + return {}; } + // Delegate to the plugin-provided handler, which loads all data stores — + // in-core, OOC, or recovered — for the imported paths. + return m_DataStoreImportHandler(dataStructure, paths, fileReader, std::move(eagerLoad)); } std::vector DataIOCollection::getFormatNames() const @@ -102,6 +295,114 @@ std::vector DataIOCollection::getFormatNames() const return keyNames; } +void DataIOCollection::registerFormatDisplayName(const std::string& formatName, const std::string& displayName) +{ + m_FormatDisplayNames[formatName] = displayName; +} + +std::vector> DataIOCollection::getFormatDisplayNames() const +{ + std::vector> result; + // Always include the two built-in entries first + result.emplace_back("", "Automatic"); + result.emplace_back(std::string(Preferences::k_InMemoryFormat), "In Memory"); + // Append any plugin-registered display names + for(const auto& [formatName, displayName] : m_FormatDisplayNames) + { + result.emplace_back(formatName, displayName); + } + return result; +} + +std::string DataIOCollection::generateManagerListString() const +{ + // Build one row per registered manager. Each row pairs the display name + // with a comma-separated list of store-type capabilities. We collect first, + // then format the output as a padded table so columns align. + struct Row + { + std::string displayName; + std::string capabilityList; + }; + std::vector rows; + rows.reserve(m_ManagerMap.size()); + + usize maxNameWidth = 0; + for(const auto& [managerKey, manager] : m_ManagerMap) + { + // The manager's own formatName() is the key under which it registers its + // factories (convention enforced across all known managers). Query with + // that key to determine what this manager can create. + const std::string fn = manager->formatName(); + + // Resolve a friendly display name. The core in-memory manager has a fixed + // label; plugin-registered managers may have a registered display name; + // otherwise fall back to the raw format identifier. + std::string displayName; + if(fn == Preferences::k_InMemoryFormat) + { + displayName = "In Memory"; + } + else + { + auto it = m_FormatDisplayNames.find(fn); + displayName = (it != m_FormatDisplayNames.end()) ? it->second : fn; + } + + // Collect the capability labels in a stable order so rows read consistently. + std::vector capabilities; + if(manager->hasDataStoreCreationFnc(fn)) + { + capabilities.emplace_back("DataStore"); + } + if(manager->hasListStoreCreationFnc(fn)) + { + capabilities.emplace_back("ListStore"); + } + if(manager->hasStringStoreCreationFnc(fn)) + { + capabilities.emplace_back("StringStore"); + } + if(manager->hasReadOnlyRefCreationFnc(fn)) + { + capabilities.emplace_back("ReadOnlyRef(DataStore)"); + } + if(manager->hasListStoreRefCreationFnc(fn)) + { + capabilities.emplace_back("ReadOnlyRef(ListStore)"); + } + + std::string capList; + for(usize i = 0; i < capabilities.size(); ++i) + { + if(i > 0) + { + capList += ", "; + } + capList += capabilities[i]; + } + if(capList.empty()) + { + capList = "(no factories registered)"; + } + + if(displayName.size() > maxNameWidth) + { + maxNameWidth = displayName.size(); + } + rows.push_back({std::move(displayName), std::move(capList)}); + } + + // Assemble the padded output. The leading newline keeps the table from + // butting up against the caller's error-message prefix. + std::string result = "Registered IO managers and their capabilities:"; + for(const auto& row : rows) + { + result += fmt::format("\n {:<{}} : {}", row.displayName, maxNameWidth, row.capabilityList); + } + return result; +} + DataIOCollection::iterator DataIOCollection::begin() { return m_ManagerMap.begin(); @@ -119,4 +420,47 @@ DataIOCollection::const_iterator DataIOCollection::end() const { return m_ManagerMap.end(); } + +// --------------------------------------------------------------------------- +// Write-array-override hook (intercepts DataObject writes during HDF5 save) +// --------------------------------------------------------------------------- + +void DataIOCollection::setWriteArrayOverride(WriteArrayOverrideFnc fnc) +{ + // Store the callback. Registration alone does not activate the override; + // setWriteArrayOverrideActive(true) or WriteArrayOverrideGuard is also needed. + m_WriteArrayOverride = std::move(fnc); +} + +bool DataIOCollection::hasWriteArrayOverride() const +{ + return static_cast(m_WriteArrayOverride); +} + +void DataIOCollection::setWriteArrayOverrideActive(bool active) +{ + // Toggle the gate flag. The override fires only when both the callback is + // registered (m_WriteArrayOverride is non-empty) AND this flag is true. + m_WriteArrayOverrideActive = active; +} + +bool DataIOCollection::isWriteArrayOverrideActive() const +{ + return m_WriteArrayOverrideActive; +} + +std::optional> DataIOCollection::runWriteArrayOverride(HDF5::DataStructureWriter& writer, const DataObject* dataObject, HDF5::GroupIO& parentGroup) +{ + // Short-circuit: if the override is not active or not registered, return + // std::nullopt so the caller proceeds with the normal HDF5 write path. + if(!m_WriteArrayOverrideActive || !m_WriteArrayOverride) + { + return std::nullopt; + } + // Delegate to the plugin-provided callback. It may return: + // - std::nullopt => decline this object, caller should use normal write + // - Result<>{} => object was written successfully by the override + // - Result<> error => override attempted to write but failed + return m_WriteArrayOverride(writer, dataObject, parentGroup); +} } // namespace nx::core diff --git a/src/simplnx/DataStructure/IO/Generic/DataIOCollection.hpp b/src/simplnx/DataStructure/IO/Generic/DataIOCollection.hpp index b2b46adc27..2a3d92c870 100644 --- a/src/simplnx/DataStructure/IO/Generic/DataIOCollection.hpp +++ b/src/simplnx/DataStructure/IO/Generic/DataIOCollection.hpp @@ -2,21 +2,37 @@ #include "simplnx/DataStructure/AbstractDataStore.hpp" #include "simplnx/DataStructure/AbstractListStore.hpp" +#include "simplnx/DataStructure/DataPath.hpp" #include "simplnx/simplnx_export.hpp" +#include "simplnx/Common/Result.hpp" #include "simplnx/Common/Types.hpp" #include "simplnx/Common/TypesUtility.hpp" +#include +#include #include #include +#include #include +#include namespace nx::core { template class AbstractDataStore; +class AbstractStringStore; +class DataObject; +class DataStructure; class IDataIOManager; +namespace HDF5 +{ +class FileIO; +class GroupIO; +class DataStructureWriter; +} // namespace HDF5 + /** * @brief The DataIOCollection class contains all known IDataIOManagers for the current Application instance. */ @@ -27,14 +43,240 @@ class SIMPLNX_EXPORT DataIOCollection using iterator = typename map_type::iterator; using const_iterator = typename map_type::const_iterator; + /** + * @brief Callback that decides which storage format to use when a new array is + * created during filter execution (e.g., in CreateArrayAction). + * + * The resolver receives the full DataStructure and array DataPath so it can + * walk parent objects to determine geometry type, plus the numeric type and + * total byte size for size-threshold decisions. It returns either: + * - A format name string (e.g., "HDF5-OOC") to request out-of-core storage, or + * - An empty string "" to use the default in-memory DataStore. + * + * This hook allows the OOC plugin to redirect large arrays to disk-backed + * storage without the core library needing to know about any specific OOC + * format. Only one resolver may be active at a time. + * + * @param dataStructure The DataStructure containing (or about to contain) the array. + * The resolver can walk parent objects to determine geometry type. + * @param arrayPath The DataPath where the array lives or will be created. + * @param numericType The element data type (float32, int32, uint8, etc.) + * @param dataSizeBytes The total size of the array data in bytes + * @return Format name string, or empty string for in-memory default + */ + using FormatResolverFnc = std::function; + + /** + * @brief Callback that eagerly loads a single DataObject's data from HDF5 into memory. + * Constructed by the loading infrastructure and passed to the data store import handler. + * The handler calls this for arrays the format resolver says should be in-core. + */ + using EagerLoadFnc = std::function(DataStructure& dataStructure, const DataPath& path)>; + + /** + * @brief Callback responsible for ALL data store loading from .dream3d files — + * in-core, OOC, and recovery. + * + * During import, the HDF5 reader creates lightweight placeholder stores + * (EmptyDataStore, EmptyListStore, EmptyStringStore) for every array so that + * the DataStructure's topology is complete without loading data. The data + * store import handler is then called with the list of imported DataPaths, + * the still-open HDF5 file reader, and an eager-load callback, giving the OOC + * plugin the opportunity to replace each placeholder with a real store — either + * a read-only OOC reference that lazily reads chunks from the same file, an + * eagerly-loaded in-core store (by invoking @p eagerLoad), or a recovered store + * from a prior session. + * + * The @p eagerLoad callback encapsulates the Dream3dIO loading infrastructure so + * the handler does not need to know about file format versions or internal reader + * details — it simply calls eagerLoad(dataStructure, path) for any array that + * should be brought fully into memory. + * + * Only one data store import handler may be registered at a time. If no + * handler is registered, the placeholders remain (which is valid for + * in-memory workflows that eagerly load data through a different path). + * + * @param dataStructure The target DataStructure containing placeholder stores to replace + * @param paths DataPaths of the objects imported from the file + * @param fileReader Open HDF5 file reader for the source .dream3d file + * @param eagerLoad Callback that loads a single array's data from HDF5 into memory + * @return Result<> indicating success or describing any errors + */ + using DataStoreImportHandlerFnc = std::function(DataStructure& dataStructure, const std::vector& paths, const nx::core::HDF5::FileIO& fileReader, EagerLoadFnc eagerLoad)>; + + /** + * @brief Callback that can intercept and override how a DataObject is written + * to an HDF5 file during a DataStructure save operation. + * + * This hook exists primarily for recovery-file writes in OOC mode. When the + * OOC plugin is active, arrays may be backed by read-only references to an + * existing HDF5 file. During a recovery save, those arrays should not be + * re-serialized from memory (they may not even be fully loaded); instead the + * override callback can create an HDF5 hard link or external link back to the + * original dataset. + * + * The callback uses a tri-state return: + * - std::nullopt -- the hook declines to handle this object; the normal + * write path should proceed. + * - Result<>{} -- the hook successfully wrote the object; the normal + * write path should be skipped. + * - Result<> with errors -- the hook attempted to write but failed. + * + * The override is only invoked when both registered AND activated via + * setWriteArrayOverrideActive(true) or WriteArrayOverrideGuard. This two-step + * design lets the plugin register the callback once at load time while only + * activating it during specific save operations. + * + * @param writer The DataStructureWriter managing the save operation + * @param dataObject The DataObject about to be written + * @param parentGroup The HDF5 group that would normally receive the object + * @return std::nullopt to fall through, or Result<> to indicate the hook handled (or failed) the write + */ + using WriteArrayOverrideFnc = std::function>(HDF5::DataStructureWriter& writer, const DataObject* dataObject, HDF5::GroupIO& parentGroup)>; + + /** + * @brief Registers or clears the format resolver callback. + * + * Only one resolver can be active at a time. The OOC plugin typically calls + * this once during plugin loading to install its size-threshold logic. Passing + * a default-constructed (empty) std::function disables the resolver so that + * resolveFormat() returns "" for all queries. + * + * @param resolver The resolver callback, or an empty std::function to clear it + */ + void setFormatResolver(FormatResolverFnc resolver); + + /** + * @brief Checks whether a format resolver callback is currently registered. + * @return true if a non-empty resolver callback is set + */ + bool hasFormatResolver() const; + + /** + * @brief Queries the registered format resolver to determine which storage + * format should be used for a new array with the given metadata. + * + * This method is called by CreateArrayAction and similar actions when they + * allocate a new DataStore. If a resolver is registered, it is invoked with + * the array's properties and its return value selects the format. If no + * resolver is registered, or the resolver returns "", the caller falls back + * to the default in-memory DataStore. + * + * @param dataStructure The DataStructure containing (or about to contain) the array. + * The resolver can walk parent objects to determine geometry type. + * @param arrayPath The DataPath where the array lives or will be created. + * @param numericType The element data type of the array + * @param dataSizeBytes The total size of the array data in bytes + * @return A format name string (e.g., "HDF5-OOC") to use a specific format, + * or an empty string for the default in-memory format + */ + std::string resolveFormat(const DataStructure& dataStructure, const DataPath& arrayPath, DataType numericType, uint64 dataSizeBytes) const; + + /** + * @brief Registers or clears the data store import handler callback. + * + * The OOC plugin calls this once during loading to install a handler that + * replaces placeholder stores with real stores (in-core, OOC, or recovered) + * after a .dream3d file is imported. Passing an empty std::function disables + * the handler. + * + * @param handler The data store import handler callback, or an empty std::function to clear it + */ + void setDataStoreImportHandler(DataStoreImportHandlerFnc handler); + + /** + * @brief Checks whether a data store import handler callback is currently registered. + * @return true if a non-empty handler callback is set + */ + bool hasDataStoreImportHandler() const; + + /** + * @brief Invokes the registered data store import handler to replace placeholder + * stores with real stores after importing a .dream3d file. + * + * If no handler is registered, this is a no-op and returns an empty success + * Result. This allows the in-memory code path to skip store import entirely + * while the OOC code path processes every imported array. + * + * The @p eagerLoad callback is forwarded directly to the handler. It wraps the + * Dream3dIO loading infrastructure so the handler can bring individual arrays + * fully into memory without depending on Dream3dIO internals. + * + * @param dataStructure The target DataStructure whose placeholder stores should be replaced + * @param paths DataPaths of the objects imported from the file + * @param fileReader Open HDF5 file reader for the source .dream3d file + * @param eagerLoad Callback that loads a single array's data from HDF5 into memory + * @return Result<> indicating success, or containing errors from the handler + */ + Result<> runDataStoreImportHandler(DataStructure& dataStructure, const std::vector& paths, const nx::core::HDF5::FileIO& fileReader, EagerLoadFnc eagerLoad); + + /** + * @brief Registers or clears the write-array-override callback. + * + * The OOC plugin registers this once at load time. The callback is only + * invoked during DataStructure writes when it is also activated (see + * setWriteArrayOverrideActive()). This separation lets the plugin install + * the hook eagerly while only enabling it for specific save operations + * (e.g., recovery files) where OOC arrays need special handling. + * + * @param fnc The override callback, or an empty std::function to clear it + */ + void setWriteArrayOverride(WriteArrayOverrideFnc fnc); + + /** + * @brief Checks whether a write-array-override callback is currently registered. + * @return true if a non-empty override callback is set + */ + bool hasWriteArrayOverride() const; + + /** + * @brief Activates or deactivates the write-array-override. + * + * The override is invoked only when both registered AND active. Prefer using + * WriteArrayOverrideGuard for scoped activation to ensure the override is + * always deactivated when the save operation completes (even on exceptions). + * + * @param active true to activate, false to deactivate + */ + void setWriteArrayOverrideActive(bool active); + + /** + * @brief Checks whether the write-array-override is currently active. + * + * An override can be registered but inactive (the default state after + * setWriteArrayOverride()). It must be explicitly activated before it fires. + * + * @return true if the override is currently active + */ + bool isWriteArrayOverrideActive() const; + + /** + * @brief Invokes the write-array-override callback for a single DataObject + * if the override is both registered and active. + * + * Called by DataStructureWriter for each DataObject during a save. The + * override can choose to handle the write (returning Result<>), or decline + * by returning std::nullopt, in which case the normal HDF5 write path + * proceeds. + * + * @param writer The DataStructureWriter performing the save + * @param dataObject The DataObject about to be written + * @param parentGroup The HDF5 group that would normally receive the object + * @return std::nullopt if the override is not active, not registered, or + * declines this object; otherwise Result<> from the override + */ + std::optional> runWriteArrayOverride(HDF5::DataStructureWriter& writer, const DataObject* dataObject, HDF5::GroupIO& parentGroup); + DataIOCollection(); ~DataIOCollection() noexcept; /** * Adds a specified data IO manager for reading and writing to the target format. * @param manager + * @return Result<> with an error if the manager is null or attempts to register + * under the reserved k_InMemoryFormat name. */ - void addIOManager(std::shared_ptr manager); + Result<> addIOManager(std::shared_ptr manager); /** * @brief Returns the IDataIOManager for the specified format name. @@ -109,11 +351,101 @@ class SIMPLNX_EXPORT DataIOCollection } /** - * @brief Checks and validates the data format for the given data size. - * @param dataSize The size of the data in bytes - * @param dataFormat Reference to the data format string to validate/update + * @brief Checks whether any registered IO manager provides a factory for + * creating read-only reference DataStores (OOC stores backed by an existing + * HDF5 dataset). + * + * @param type The format name to query (e.g., "HDF5-OOC") + * @return true if at least one registered IO manager has a factory for @p type + */ + bool hasReadOnlyRefCreationFnc(const std::string& type) const; + + /** + * @brief Creates a read-only DataStore that lazily reads data from an existing + * dataset inside an HDF5 file. + * + * This is the primary entry point for constructing OOC DataArray backing + * stores. The method searches all registered IO managers for one that handles + * the requested format, then delegates to its DataStoreRefCreateFnc factory. + * The resulting store does not load the full dataset into memory; instead it + * reads chunks on demand using the provided chunk shape. + * + * @param type The format name (e.g., "HDF5-OOC") + * @param numericType The element data type (float32, int32, etc.) + * @param filePath Filesystem path to the HDF5 file containing the data + * @param datasetPath HDF5-internal path to the dataset + * @param tupleShape Logical tuple dimensions of the array + * @param componentShape Component dimensions per tuple + * @param chunkShape Chunk dimensions controlling how many tuples are loaded per I/O operation + * @return A new read-only IDataStore, or nullptr if no factory handles @p type + */ + std::unique_ptr createReadOnlyRefStore(const std::string& type, DataType numericType, const std::filesystem::path& filePath, const std::string& datasetPath, const ShapeType& tupleShape, + const ShapeType& componentShape, const ShapeType& chunkShape); + + /** + * @brief Checks whether any registered IO manager provides a factory for + * creating read-only reference ListStores (OOC NeighborList stores backed by + * existing HDF5 datasets). + * + * @param type The format name to query (e.g., "HDF5-OOC") + * @return true if at least one registered IO manager has a factory for @p type + */ + bool hasReadOnlyRefListCreationFnc(const std::string& type) const; + + /** + * @brief Creates a read-only ListStore that lazily reads variable-length + * NeighborList data from an existing dataset inside an HDF5 file. + * + * Analogous to createReadOnlyRefStore() but for NeighborList arrays. The + * resulting store reads list entries on demand using chunk-based I/O. + * + * @param type The format name (e.g., "HDF5-OOC") + * @param numericType The element data type + * @param filePath Filesystem path to the HDF5 file + * @param datasetPath HDF5-internal path to the dataset + * @param tupleShape Logical tuple dimensions + * @param chunkShape Chunk dimensions (tuples per chunk) + * @return A new read-only IListStore, or nullptr if no factory handles @p type */ - void checkStoreDataFormat(uint64 dataSize, std::string& dataFormat) const; + std::unique_ptr createReadOnlyRefListStore(const std::string& type, DataType numericType, const std::filesystem::path& filePath, const std::string& datasetPath, + const ShapeType& tupleShape, const ShapeType& chunkShape); + + /** + * @brief Checks whether any registered IO manager provides a factory for + * creating StringStores of the specified format. + * + * @param type The format name to query + * @return true if at least one registered IO manager has a StringStore factory for @p type + */ + bool hasStringStoreCreationFnc(const std::string& type) const; + + /** + * @brief Creates a StringStore (backing store for StringArray) of the + * specified format and dimensions. + * + * Searches all registered IO managers for one that handles the requested + * format, then delegates to its StringStoreCreateFnc factory. The resulting + * store may be in-memory or disk-backed depending on the IO manager. + * + * @param type The format name to use + * @param tupleShape Tuple dimensions for the string array + * @return A new AbstractStringStore, or nullptr if no factory handles @p type + */ + std::unique_ptr createStringStore(const std::string& type, const ShapeType& tupleShape); + + /** + * @brief Invokes all registered finalize-stores callbacks across every IO + * manager. + * + * Called after a pipeline finishes executing. Each IO manager that registered + * a FinalizeStoresFnc gets a chance to walk the DataStructure and transition + * its stores from write mode to read-only mode (e.g., closing HDF5 write + * handles and re-opening as read handles). This is a no-op if no IO manager + * has registered a finalize callback. + * + * @param dataStructure The DataStructure whose stores should be finalized + */ + void finalizeStores(DataStructure& dataStructure); /** * @brief Returns an iterator to the beginning of the manager collection. @@ -139,7 +471,128 @@ class SIMPLNX_EXPORT DataIOCollection */ const_iterator end() const; + /** + * @brief Registers a human-readable display name for a data store format. + * + * Plugins call this during hook registration to associate their internal + * format name (e.g., "HDF5-OOC") with a user-friendly label (e.g., + * "HDF5 Out-of-Core") for display in the DataStoreFormatParameter dropdown. + * + * @param formatName The internal format identifier + * @param displayName The human-readable label shown in the UI + */ + void registerFormatDisplayName(const std::string& formatName, const std::string& displayName); + + /** + * @brief Returns all known format display names as (formatName, displayName) pairs. + * + * The returned list always starts with: + * - ("", "Automatic") -- lets the resolver decide + * - (Preferences::k_InMemoryFormat, "In Memory") -- explicit in-memory + * + * Followed by any plugin-registered entries (e.g., ("HDF5-OOC", "HDF5 Out-of-Core")). + * + * @return Vector of (formatName, displayName) pairs + */ + std::vector> getFormatDisplayNames() const; + + /** + * @brief Produces a human-readable, multi-line description of every registered + * IO manager and the store types it can create. + * + * Intended for error messages when a createXxxStore() call returns nullptr so + * the user can immediately see which format names are available and what each + * one supports. Each row lists the manager's display name (falling back to its + * format-name identifier) followed by the set of factories it registers: + * DataStore, ListStore, StringStore, ReadOnlyRef(DataStore), ReadOnlyRef(ListStore). + * + * Example output: + * @code + * Registered IO managers and their capabilities: + * In Memory : DataStore, ListStore + * HDF5 : DataStore, ListStore, StringStore + * HDF5-OOC : DataStore, ListStore, StringStore, ReadOnlyRef(DataStore), ReadOnlyRef(ListStore) + * @endcode + * + * @return A multi-line string ready to drop into a fmt::format error message. + */ + std::string generateManagerListString() const; + private: map_type m_ManagerMap; + FormatResolverFnc m_FormatResolver; ///< Plugin-provided callback that selects storage format for new arrays + DataStoreImportHandlerFnc m_DataStoreImportHandler; ///< Plugin-provided callback that loads all data stores after .dream3d import + WriteArrayOverrideFnc m_WriteArrayOverride; ///< Plugin-provided callback that intercepts DataObject writes to HDF5 + bool m_WriteArrayOverrideActive = false; ///< Gate flag: override fires only when both registered and active + std::map m_FormatDisplayNames; ///< Plugin-registered human-readable format names +}; + +/** + * @brief RAII guard that activates the write-array-override callback for the + * duration of a scoped block, then automatically deactivates it on destruction. + * + * The write-array-override in DataIOCollection uses a two-phase design: + * 1. The callback is **registered** once at plugin load time via + * DataIOCollection::setWriteArrayOverride(). + * 2. The callback is **activated** only for specific save operations (e.g., + * recovery file writes) where OOC arrays need special write handling. + * + * This guard manages phase 2. Constructing the guard calls + * setWriteArrayOverrideActive(true), and the destructor calls + * setWriteArrayOverrideActive(false). This guarantees the override is + * deactivated even if an exception occurs during the save. + * + * Usage: + * @code + * { + * WriteArrayOverrideGuard guard(ioCollection); + * // All DataStructureWriter::writeDataObject() calls in this scope will + * // consult the override callback before performing the default HDF5 write. + * writer.writeDataStructure(dataStructure); + * } + * // Override is now deactivated; normal writes proceed without consulting the callback. + * @endcode + * + * This class is non-copyable and non-movable to prevent accidental misuse + * (e.g., moving the guard out of scope prematurely). + */ +class SIMPLNX_EXPORT WriteArrayOverrideGuard +{ +public: + /** + * @brief Constructs the guard and activates the write-array-override. + * + * The guard stores a reference to the collection, so the collection must + * outlive the guard. In practice the DataIOCollection is owned by the + * Application singleton, which outlives every caller. + * + * @param ioCollection The DataIOCollection whose override should be activated. + */ + explicit WriteArrayOverrideGuard(DataIOCollection& ioCollection) + : m_IOCollection(ioCollection) + { + // Activate the override so DataStructureWriter will consult the callback. + m_IOCollection.setWriteArrayOverrideActive(true); + } + + /** + * @brief Destructor that deactivates the write-array-override, restoring + * normal write behavior. + */ + ~WriteArrayOverrideGuard() + { + // Deactivate the override so subsequent writes go through the normal path. + m_IOCollection.setWriteArrayOverrideActive(false); + } + + // Non-copyable and non-movable to prevent scope-escape bugs. Additionally + // required because a reference member cannot be rebound or reassigned. + WriteArrayOverrideGuard(const WriteArrayOverrideGuard&) = delete; + WriteArrayOverrideGuard& operator=(const WriteArrayOverrideGuard&) = delete; + WriteArrayOverrideGuard(WriteArrayOverrideGuard&&) = delete; + WriteArrayOverrideGuard& operator=(WriteArrayOverrideGuard&&) = delete; + +private: + DataIOCollection& m_IOCollection; ///< Non-owning reference; collection must outlive the guard }; } // namespace nx::core diff --git a/src/simplnx/DataStructure/IO/Generic/IDataIOManager.cpp b/src/simplnx/DataStructure/IO/Generic/IDataIOManager.cpp index 29b80f5bcc..06928ab394 100644 --- a/src/simplnx/DataStructure/IO/Generic/IDataIOManager.cpp +++ b/src/simplnx/DataStructure/IO/Generic/IDataIOManager.cpp @@ -1,5 +1,7 @@ #include "IDataIOManager.hpp" +#include "simplnx/DataStructure/AbstractStringStore.hpp" + namespace nx::core { IDataIOManager::IDataIOManager() = default; @@ -53,4 +55,111 @@ void IDataIOManager::addListStoreCreationFnc(const std::string& type, ListStoreC { m_ListStoreCreationMap[type] = creationFnc; } + +// --------------------------------------------------------------------------- +// Read-only reference DataStore factory (OOC DataArray support) +// --------------------------------------------------------------------------- + +bool IDataIOManager::hasReadOnlyRefCreationFnc(const std::string& type) const +{ + return m_ReadOnlyRefCreationMap.find(type) != m_ReadOnlyRefCreationMap.end(); +} + +IDataIOManager::DataStoreRefCreateFnc IDataIOManager::readOnlyRefCreationFnc(const std::string& type) const +{ + // Look up the factory by format name; return nullptr if this manager does + // not provide OOC DataStore support for the requested format. + auto iter = m_ReadOnlyRefCreationMap.find(type); + if(iter == m_ReadOnlyRefCreationMap.end()) + { + return nullptr; + } + return iter->second; +} + +void IDataIOManager::addReadOnlyRefCreationFnc(const std::string& type, DataStoreRefCreateFnc creationFnc) +{ + // Register (or replace) the OOC DataStore factory for this format name. + m_ReadOnlyRefCreationMap[type] = std::move(creationFnc); +} + +// --------------------------------------------------------------------------- +// Read-only reference ListStore factory (OOC NeighborList support) +// --------------------------------------------------------------------------- + +bool IDataIOManager::hasListStoreRefCreationFnc(const std::string& type) const +{ + return m_ListStoreRefCreationMap.find(type) != m_ListStoreRefCreationMap.cend(); +} + +IDataIOManager::ListStoreRefCreateFnc IDataIOManager::listStoreRefCreationFnc(const std::string& type) const +{ + // Look up the factory by format name; return nullptr if this manager does + // not provide OOC ListStore support for the requested format. + auto iter = m_ListStoreRefCreationMap.find(type); + if(iter == m_ListStoreRefCreationMap.cend()) + { + return nullptr; + } + return iter->second; +} + +void IDataIOManager::addListStoreRefCreationFnc(const std::string& type, ListStoreRefCreateFnc creationFnc) +{ + // Register (or replace) the OOC ListStore factory for this format name. + m_ListStoreRefCreationMap[type] = std::move(creationFnc); +} + +// --------------------------------------------------------------------------- +// StringStore factory (StringArray backing store) +// --------------------------------------------------------------------------- + +bool IDataIOManager::hasStringStoreCreationFnc(const std::string& type) const +{ + return m_StringStoreCreationMap.find(type) != m_StringStoreCreationMap.cend(); +} + +IDataIOManager::StringStoreCreateFnc IDataIOManager::stringStoreCreationFnc(const std::string& type) const +{ + // Look up the factory by format name; return nullptr if this manager does + // not provide StringStore support for the requested format. + auto iter = m_StringStoreCreationMap.find(type); + if(iter == m_StringStoreCreationMap.cend()) + { + return nullptr; + } + return iter->second; +} + +void IDataIOManager::addStringStoreCreationFnc(const std::string& type, StringStoreCreateFnc creationFnc) +{ + // Register (or replace) the StringStore factory for this format name. + m_StringStoreCreationMap[type] = std::move(creationFnc); +} + +// --------------------------------------------------------------------------- +// Post-pipeline store finalization callbacks +// --------------------------------------------------------------------------- + +bool IDataIOManager::hasFinalizeStoresFnc(const std::string& type) const +{ + return m_FinalizeStoresMap.find(type) != m_FinalizeStoresMap.end(); +} + +IDataIOManager::FinalizeStoresFnc IDataIOManager::finalizeStoresFnc(const std::string& type) const +{ + // Look up the finalize callback; return nullptr if none is registered. + auto iter = m_FinalizeStoresMap.find(type); + if(iter == m_FinalizeStoresMap.end()) + { + return nullptr; + } + return iter->second; +} + +void IDataIOManager::addFinalizeStoresFnc(const std::string& type, FinalizeStoresFnc fnc) +{ + // Register (or replace) the post-pipeline finalization callback for this format. + m_FinalizeStoresMap[type] = std::move(fnc); +} } // namespace nx::core diff --git a/src/simplnx/DataStructure/IO/Generic/IDataIOManager.hpp b/src/simplnx/DataStructure/IO/Generic/IDataIOManager.hpp index b834afc916..6d3d210c81 100644 --- a/src/simplnx/DataStructure/IO/Generic/IDataIOManager.hpp +++ b/src/simplnx/DataStructure/IO/Generic/IDataIOManager.hpp @@ -8,6 +8,7 @@ #include "simplnx/Common/Types.hpp" +#include #include #include #include @@ -15,6 +16,8 @@ namespace nx::core { +class AbstractStringStore; +class DataStructure; class IDataFactory; /** @@ -27,10 +30,81 @@ class SIMPLNX_EXPORT IDataIOManager using factory_id_type = std::string; using factory_ptr = std::shared_ptr; using factory_collection = std::map; + /** + * @brief Factory callback for creating a new in-memory DataStore. + * + * Takes the numeric type, tuple shape, component shape, and an optional chunk + * shape hint. Returns a newly allocated IDataStore. Registered by IO managers + * that provide writable storage (e.g., CoreDataIOManager for in-memory, or an + * OOC manager for chunk-backed stores). + */ using DataStoreCreateFnc = std::function(DataType, const ShapeType&, const ShapeType&, const std::optional&)>; + + /** + * @brief Factory callback for creating a new in-memory NeighborList store. + * + * Takes the numeric type and tuple shape. Returns a newly allocated IListStore. + * Used by IO managers that can provide writable list-based storage. + */ using ListStoreCreateFnc = std::function(DataType, const ShapeType&)>; - using DataStoreCreationMap = std::map; - using ListStoreCreationMap = std::map; + + /** + * @brief Factory callback for creating a read-only DataStore that references + * an existing dataset inside an HDF5 file without loading it into memory. + * + * This is the core factory for out-of-core (OOC) DataArray access. The + * resulting store lazily reads data from disk on demand via chunk-based I/O. + * + * @param numericType The element data type (float32, int32, etc.) + * @param filePath Filesystem path to the HDF5 file containing the data + * @param datasetPath HDF5-internal path to the dataset (e.g., "/DataContainer/CellData/Phases") + * @param tupleShape Logical tuple dimensions of the array + * @param componentShape Component dimensions per tuple + * @param chunkShape Chunk dimensions controlling how many tuples are loaded per I/O operation + */ + using DataStoreRefCreateFnc = std::function(DataType numericType, const std::filesystem::path& filePath, const std::string& datasetPath, const ShapeType& tupleShape, + const ShapeType& componentShape, const ShapeType& chunkShape)>; + + /** + * @brief Factory callback for creating a read-only NeighborList store that + * references existing datasets inside an HDF5 file. + * + * Analogous to DataStoreRefCreateFnc but for NeighborList data. The resulting + * store lazily reads variable-length lists from disk on demand. + * + * @param numericType The element data type + * @param filePath Filesystem path to the HDF5 file + * @param datasetPath HDF5-internal path to the dataset + * @param tupleShape Logical tuple dimensions + * @param chunkShape Chunk dimensions (tuples per chunk) + */ + using ListStoreRefCreateFnc = + std::function(DataType numericType, const std::filesystem::path& filePath, const std::string& datasetPath, const ShapeType& tupleShape, const ShapeType& chunkShape)>; + + /** + * @brief Factory callback for creating a new StringStore (e.g., for StringArray). + * + * Takes the tuple shape and returns a newly allocated AbstractStringStore. + * Registered by IO managers that support string storage (in-memory or OOC). + */ + using StringStoreCreateFnc = std::function(const ShapeType& tupleShape)>; + + /** + * @brief Post-pipeline callback that finalizes all stores owned by a given + * IO format. + * + * Called after a pipeline finishes executing. The callback walks the + * DataStructure and transitions stores from write mode to read-only mode + * (e.g., closing HDF5 write handles and opening read handles). This ensures + * that subsequent reads from the stores see the fully written data. + */ + using FinalizeStoresFnc = std::function; + using DataStoreCreationMap = std::map; ///< Maps format name -> writable DataStore factory + using ListStoreCreationMap = std::map; ///< Maps format name -> writable ListStore factory + using DataStoreRefCreationMap = std::map; ///< Maps format name -> read-only DataStore factory (OOC) + using ListStoreRefCreationMap = std::map; ///< Maps format name -> read-only ListStore factory (OOC) + using StringStoreCreationMap = std::map; ///< Maps format name -> StringStore factory + using FinalizeStoresMap = std::map; ///< Maps format name -> post-pipeline finalize callback virtual ~IDataIOManager() noexcept; @@ -110,6 +184,93 @@ class SIMPLNX_EXPORT IDataIOManager */ ListStoreCreateFnc listStoreCreationFnc(const std::string& type) const; + /** + * @brief Checks whether this IO manager has registered a factory for creating + * read-only reference DataStores (out-of-core stores that lazily read from an + * existing HDF5 file). + * + * Callers should check this before calling readOnlyRefCreationFnc() to avoid + * a null return. + * + * @param type The format name to look up (e.g., "HDF5-OOC") + * @return true if a DataStoreRefCreateFnc is registered for @p type + */ + bool hasReadOnlyRefCreationFnc(const std::string& type) const; + + /** + * @brief Returns the factory callback for creating a read-only reference + * DataStore that performs lazy chunk-based reads from an HDF5 file. + * + * The returned callback, when invoked, produces a store that does not copy + * the full dataset into memory; instead it reads chunks on demand. + * + * @param type The format name to look up (e.g., "HDF5-OOC") + * @return The registered DataStoreRefCreateFnc, or nullptr if none is registered + */ + DataStoreRefCreateFnc readOnlyRefCreationFnc(const std::string& type) const; + + /** + * @brief Checks whether this IO manager has registered a factory for creating + * read-only reference ListStores (out-of-core NeighborList stores). + * + * @param type The format name to look up (e.g., "HDF5-OOC") + * @return true if a ListStoreRefCreateFnc is registered for @p type + */ + bool hasListStoreRefCreationFnc(const std::string& type) const; + + /** + * @brief Returns the factory callback for creating a read-only reference + * ListStore that performs lazy reads of variable-length NeighborList data + * from an HDF5 file. + * + * @param type The format name to look up (e.g., "HDF5-OOC") + * @return The registered ListStoreRefCreateFnc, or nullptr if none is registered + */ + ListStoreRefCreateFnc listStoreRefCreationFnc(const std::string& type) const; + + /** + * @brief Checks whether this IO manager has registered a factory for creating + * StringStores (stores backing StringArray objects). + * + * @param type The format name to look up + * @return true if a StringStoreCreateFnc is registered for @p type + */ + bool hasStringStoreCreationFnc(const std::string& type) const; + + /** + * @brief Returns the factory callback for creating a StringStore. The + * resulting store backs a StringArray and may be in-memory or out-of-core + * depending on the IO manager that registered it. + * + * @param type The format name to look up + * @return The registered StringStoreCreateFnc, or nullptr if none is registered + */ + StringStoreCreateFnc stringStoreCreationFnc(const std::string& type) const; + + /** + * @brief Checks whether this IO manager has registered a finalize-stores + * callback for the specified format. + * + * The finalize callback is invoked after pipeline execution to transition + * stores from write mode to read-only mode. + * + * @param type The format name to look up + * @return true if a FinalizeStoresFnc is registered for @p type + */ + bool hasFinalizeStoresFnc(const std::string& type) const; + + /** + * @brief Returns the finalize-stores callback for the specified format. + * + * When invoked, this callback walks the DataStructure and closes any write + * handles, replacing them with read-only handles so that data written during + * the pipeline is accessible for subsequent reads. + * + * @param type The format name to look up + * @return The registered FinalizeStoresFnc, or nullptr if none is registered + */ + FinalizeStoresFnc finalizeStoresFnc(const std::string& type) const; + protected: /** * @brief Default constructor. @@ -130,9 +291,69 @@ class SIMPLNX_EXPORT IDataIOManager */ void addListStoreCreationFnc(const std::string& type, ListStoreCreateFnc creationFnc); + /** + * @brief Registers a factory callback that creates read-only reference + * DataStores for the given format name. + * + * Derived IO managers call this during construction to advertise their + * ability to create OOC DataStores. DataIOCollection::createReadOnlyRefStore() + * dispatches to the callback registered here. + * + * @param type The format name to register under (e.g., "HDF5-OOC") + * @param creationFnc The factory callback. Replaces any previously registered + * callback for the same @p type. + */ + void addReadOnlyRefCreationFnc(const std::string& type, DataStoreRefCreateFnc creationFnc); + + /** + * @brief Registers a factory callback that creates read-only reference + * ListStores (NeighborList) for the given format name. + * + * Derived IO managers call this during construction to advertise their + * ability to create OOC ListStores. DataIOCollection::createReadOnlyRefListStore() + * dispatches to the callback registered here. + * + * @param type The format name to register under (e.g., "HDF5-OOC") + * @param creationFnc The factory callback. Replaces any previously registered + * callback for the same @p type. + */ + void addListStoreRefCreationFnc(const std::string& type, ListStoreRefCreateFnc creationFnc); + + /** + * @brief Registers a factory callback that creates StringStores for the given + * format name. + * + * Derived IO managers call this during construction to advertise their + * ability to create StringStores (in-memory or OOC). DataIOCollection::createStringStore() + * dispatches to the callback registered here. + * + * @param type The format name to register under + * @param creationFnc The factory callback. Replaces any previously registered + * callback for the same @p type. + */ + void addStringStoreCreationFnc(const std::string& type, StringStoreCreateFnc creationFnc); + + /** + * @brief Registers a post-pipeline finalization callback for the given format. + * + * The callback is invoked by DataIOCollection::finalizeStores() after pipeline + * execution completes. It should walk the DataStructure and transition any + * stores owned by this format from write mode to read-only mode (e.g., closing + * HDF5 write handles and re-opening as read handles). + * + * @param type The format name to register under + * @param fnc The finalization callback. Replaces any previously registered + * callback for the same @p type. + */ + void addFinalizeStoresFnc(const std::string& type, FinalizeStoresFnc fnc); + private: factory_collection m_FactoryCollection; DataStoreCreationMap m_DataStoreCreationMap; ListStoreCreationMap m_ListStoreCreationMap; + DataStoreRefCreationMap m_ReadOnlyRefCreationMap; ///< OOC read-only DataStore factories keyed by format name + ListStoreRefCreationMap m_ListStoreRefCreationMap; ///< OOC read-only ListStore factories keyed by format name + StringStoreCreationMap m_StringStoreCreationMap; ///< StringStore factories keyed by format name + FinalizeStoresMap m_FinalizeStoresMap; ///< Post-pipeline finalize callbacks keyed by format name }; } // namespace nx::core diff --git a/src/simplnx/DataStructure/IO/HDF5/DataArrayIO.hpp b/src/simplnx/DataStructure/IO/HDF5/DataArrayIO.hpp index 0f2db650e1..b6245759c8 100644 --- a/src/simplnx/DataStructure/IO/HDF5/DataArrayIO.hpp +++ b/src/simplnx/DataStructure/IO/HDF5/DataArrayIO.hpp @@ -39,11 +39,28 @@ class DataArrayIO : public IDataIO */ template static void importDataArray(DataStructure& dataStructure, const nx::core::HDF5::DatasetIO& datasetReader, const std::string dataArrayName, DataObject::IdType importId, - nx::core::HDF5::ErrorType& err, const std::optional& parentId, bool preflight) + nx::core::HDF5::ErrorType& err, const std::optional& parentId, bool preflight, std::vector& warnings) { - std::shared_ptr> dataStore = - preflight ? std::shared_ptr>(EmptyDataStoreIO::ReadDataStore(datasetReader)) : (DataStoreIO::ReadDataStore(datasetReader)); - DataArray* data = DataArray::Import(dataStructure, dataArrayName, importId, std::move(dataStore), parentId); + if(preflight) + { + std::shared_ptr> dataStore(EmptyDataStoreIO::ReadDataStore(datasetReader)); + DataArray* data = DataArray::Import(dataStructure, dataArrayName, importId, std::move(dataStore), parentId); + err = (data == nullptr) ? -400 : 0; + return; + } + + auto storeResult = DataStoreIO::ReadDataStoreIntoMemory(datasetReader); + for(auto&& warning : storeResult.warnings()) + { + warnings.push_back(std::move(warning)); + } + if(storeResult.value() == nullptr) + { + // Placeholder detected — skip this array without error + err = 0; + return; + } + DataArray* data = DataArray::Import(dataStructure, dataArrayName, importId, std::move(storeResult.value()), parentId); err = (data == nullptr) ? -400 : 0; } @@ -58,13 +75,16 @@ class DataArrayIO : public IDataIO template static Result<> importDataStore(data_type* dataArray, const DataPath& dataPath, const nx::core::HDF5::DatasetIO& datasetReader) { - std::shared_ptr> dataStore = DataStoreIO::ReadDataStore(datasetReader); - if(dataStore == nullptr) + auto storeResult = DataStoreIO::ReadDataStoreIntoMemory(datasetReader); + Result<> result; + result.m_Warnings = std::move(storeResult.warnings()); + if(storeResult.value() == nullptr) { - return MakeErrorResult(-150202, fmt::format("Failed to import DataArray data at path '{}'.", dataPath.toString())); + // Placeholder detected — propagate warnings, skip without error + return result; } - dataArray->setDataStore(dataStore); - return {}; + dataArray->setDataStore(std::move(storeResult.value())); + return result; } /** @@ -167,45 +187,46 @@ class DataArrayIO : public IDataIO } int32 err = 0; + std::vector warnings; switch(type) { case DataType::float32: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::float64: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::int8: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::int16: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::int32: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::int64: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::uint8: { if(isBoolArray) { - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); } else { - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); } } break; case DataType::uint16: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::uint32: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; case DataType::uint64: - importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore); + importDataArray(dataStructureReader.getDataStructure(), datasetReader, dataArrayName, importId, err, parentId, useEmptyDataStore, warnings); break; default: { err = -777; @@ -215,10 +236,14 @@ class DataArrayIO : public IDataIO if(err < 0) { - return MakeErrorResult(err, fmt::format("Error importing dataset from HDF5 file. DataArray name '{}' that is a child of '{}'", dataArrayName, parentGroup.getName())); + auto result = MakeErrorResult(err, fmt::format("Error importing dataset from HDF5 file. DataArray name '{}' that is a child of '{}'", dataArrayName, parentGroup.getName())); + result.m_Warnings = std::move(warnings); + return result; } - return {}; + Result<> result; + result.m_Warnings = std::move(warnings); + return result; } /** diff --git a/src/simplnx/DataStructure/IO/HDF5/DataStoreIO.hpp b/src/simplnx/DataStructure/IO/HDF5/DataStoreIO.hpp index 908e9bdde9..86104438fb 100644 --- a/src/simplnx/DataStructure/IO/HDF5/DataStoreIO.hpp +++ b/src/simplnx/DataStructure/IO/HDF5/DataStoreIO.hpp @@ -1,5 +1,6 @@ #pragma once +#include "simplnx/Common/Result.hpp" #include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/IO/HDF5/IDataStoreIO.hpp" #include "simplnx/Utilities/DataStoreUtilities.hpp" @@ -7,6 +8,9 @@ #include +#include +#include + namespace nx::core { namespace HDF5 @@ -44,20 +48,48 @@ inline Result<> WriteDataStore(nx::core::HDF5::DatasetIO& datasetWriter, const A } /** - * @brief Attempts to read a DataStore from the dataset reader - * @param datasetReader - * @return std::unique_ptr> + * @brief Reads an HDF5 dataset into an in-memory DataStore. + * + * Reads tuple/component shapes from HDF5 attributes, allocates an + * in-core DataStore, and loads all data from the dataset into memory. + * This function does not handle OOC stores or recovery-file placeholders; + * those are handled by the data store import handler at a higher level. + * + * If the physical HDF5 dataset element count does not match the expected + * count from shape attributes, the dataset is skipped and a warning is + * returned. This guards against reading placeholder datasets written by + * plugins that are not currently loaded. + * + * @param datasetReader The HDF5 dataset to read from + * @return Result containing the in-memory data store, or a warning with + * nullptr if the dataset is a placeholder */ template -inline std::shared_ptr> ReadDataStore(const nx::core::HDF5::DatasetIO& datasetReader) +inline Result>> ReadDataStoreIntoMemory(const nx::core::HDF5::DatasetIO& datasetReader) { auto tupleShape = IDataStoreIO::ReadTupleShape(datasetReader); auto componentShape = IDataStoreIO::ReadComponentShape(datasetReader); - // Create DataStore + // Check that the physical HDF5 dataset size matches the expected size + // from shape attributes. A mismatch indicates the dataset is a + // placeholder (e.g. written by an OOC plugin that is not loaded). + usize expectedElements = std::accumulate(tupleShape.cbegin(), tupleShape.cend(), static_cast(1), std::multiplies<>()) * + std::accumulate(componentShape.cbegin(), componentShape.cend(), static_cast(1), std::multiplies<>()); + usize physicalElements = datasetReader.getNumElements(); + + if(physicalElements != expectedElements) + { + Result>> result; + result.warnings().push_back(Warning{-89200, fmt::format("Unable to read dataset '{}' at path '{}': the file contains {} elements but the shape " + "attributes indicate {} elements. This typically means the dataset is a placeholder written " + "by a plugin that is not currently loaded. Ensure all required plugins are loaded and try again.", + datasetReader.getName(), datasetReader.getObjectPath(), physicalElements, expectedElements)}); + return result; + } + auto dataStore = DataStoreUtilities::CreateDataStore(tupleShape, componentShape, IDataAction::Mode::Execute); dataStore->readHdf5(datasetReader); - return dataStore; + return {std::move(dataStore)}; } } // namespace DataStoreIO } // namespace HDF5 diff --git a/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.cpp b/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.cpp index cd4f4ae3ed..0fc41e2bd3 100644 --- a/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.cpp +++ b/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.cpp @@ -2,8 +2,10 @@ #include "simplnx/Core/Application.hpp" #include "simplnx/DataStructure/INeighborList.hpp" +#include "simplnx/DataStructure/IO/Generic/DataIOCollection.hpp" #include "simplnx/DataStructure/IO/HDF5/DataIOManager.hpp" #include "simplnx/DataStructure/IO/HDF5/IDataIO.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" @@ -115,21 +117,43 @@ Result<> DataStructureWriter::writeDataObject(const DataObject* dataObject, nx:: // Create an HDF5 link return writeDataObjectLink(dataObject, parentGroup); } - else + + // ----------------------------------------------------------------------- + // Write-array-override hook (OOC recovery-file support) + // ----------------------------------------------------------------------- + // When a WriteArrayOverrideGuard is on the stack (e.g. during + // WriteRecoveryFile), a plugin-registered callback intercepts DataArray + // writes. For OOC-backed arrays the callback writes a zero-byte + // placeholder dataset annotated with OocBackingFilePath / + // OocBackingDatasetPath / OocChunkShape attributes, so the recovery + // file stays small while preserving enough metadata to reattach to the + // backing file on reload (see the data store import handler). + // + // If the callback returns std::nullopt the object is not OOC-backed, so + // we fall through to the normal HDF5 write path below. + auto& ioCollection = DataStoreUtilities::GetIOCollection(); + if(ioCollection.isWriteArrayOverrideActive()) { - // Write new data - auto factory = m_IOManager->getFactoryAs(dataObject->getTypeName()); - if(factory == nullptr) + auto overrideResult = ioCollection.runWriteArrayOverride(*this, dataObject, parentGroup); + if(overrideResult.has_value()) { - std::string ss = fmt::format("Could not find IO factory for datatype: {}", dataObject->getTypeName()); - return MakeErrorResult(-5, ss); + return overrideResult.value(); } + // std::nullopt -- not an OOC-backed object; continue to normal write. + } - auto result = factory->writeDataObject(*this, dataObject, parentGroup); - if(result.invalid()) - { - return result; - } + // Normal write path + auto factory = m_IOManager->getFactoryAs(dataObject->getTypeName()); + if(factory == nullptr) + { + std::string ss = fmt::format("Could not find IO factory for datatype: {}", dataObject->getTypeName()); + return MakeErrorResult(-5, ss); + } + + auto result = factory->writeDataObject(*this, dataObject, parentGroup); + if(result.invalid()) + { + return result; } return {}; diff --git a/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp b/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp index 4403359b0a..a6da75d8cc 100644 --- a/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp +++ b/src/simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp @@ -75,12 +75,24 @@ class SIMPLNX_EXPORT DataStructureWriter /** * @brief Writes the DataObject under the given GroupIO. If the - * DataObject has already been written, a link is create instead. + * DataObject has already been written, a link is created instead. + * + * Before using the normal type-factory write path, this method checks + * two conditions in order: + * + * 1. **Deduplication** -- If the DataObject has already been written to + * this file, an HDF5 hard link is created instead of a duplicate copy. + * + * 2. **Write-array-override hook** -- If a WriteArrayOverrideGuard is + * active (e.g. during WriteRecoveryFile), the registered plugin + * callback gets a chance to write OOC-backed arrays as lightweight + * placeholder datasets. If the callback returns std::nullopt the + * object is not OOC-backed, and the normal path is used. * * If the process encounters an error, the error code is returned. Otherwise, * this method returns 0. - * @param dataObject - * @param parentGroup + * @param dataObject The DataObject to write + * @param parentGroup The HDF5 group to write the object into * @return Result<> */ Result<> writeDataObject(const DataObject* dataObject, GroupIO& parentGroup); diff --git a/src/simplnx/DataStructure/IO/HDF5/NeighborListIO.hpp b/src/simplnx/DataStructure/IO/HDF5/NeighborListIO.hpp index 6475ce80cb..25c250cf99 100644 --- a/src/simplnx/DataStructure/IO/HDF5/NeighborListIO.hpp +++ b/src/simplnx/DataStructure/IO/HDF5/NeighborListIO.hpp @@ -9,6 +9,7 @@ #include "simplnx/DataStructure/IO/HDF5/DataStoreIO.hpp" #include "simplnx/DataStructure/IO/HDF5/IDataIO.hpp" #include "simplnx/DataStructure/NeighborList.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include @@ -28,13 +29,27 @@ class NeighborListIO : public IDataIO ~NeighborListIO() noexcept override = default; /** - * @brief Attempts to read the NeighborList data from HDF5. - * Returns a Result<> with any errors or warnings encountered during the process. - * @param parentGroup - * @param dataReader - * @return Result<> + * @brief Reads NeighborList data from an HDF5 dataset. + * + * When useEmptyDataStore is true, only the TupleDimensions attribute from + * the linked NumNeighbors dataset is read, and an EmptyListStore placeholder + * is returned. The actual data is loaded later by finishImportingData(). + * + * When useEmptyDataStore is false, the full flat data array is read from + * HDF5, split into per-tuple vectors using the NumNeighbors companion + * array, and packed into an in-memory ListStore. + * + * If the NumNeighbors companion array is a placeholder (element count + * mismatch), warnings are accumulated and nullptr is returned. The caller + * should treat this as a skip, not an error. + * + * @param parentGroup The HDF5 group containing the dataset and its companion + * @param dataReader The HDF5 dataset containing the flat packed neighbor data + * @param useEmptyDataStore If true, return an EmptyListStore placeholder + * @param warnings Output vector to accumulate any warnings encountered + * @return std::shared_ptr The created list store, or nullptr on error/placeholder */ - static std::shared_ptr ReadHdf5Data(const nx::core::HDF5::GroupIO& parentGroup, const nx::core::HDF5::DatasetIO& dataReader, bool useEmptyDataStore = false) + static std::shared_ptr ReadHdf5Data(const nx::core::HDF5::GroupIO& parentGroup, const nx::core::HDF5::DatasetIO& dataReader, bool useEmptyDataStore, std::vector& warnings) { try { @@ -58,8 +73,17 @@ class NeighborListIO : public IDataIO return std::make_shared>(tupleDimsResult.value()); } - auto numNeighborsPtr = DataStoreIO::ReadDataStore(numNeighborsReader); - auto& numNeighborsStore = *numNeighborsPtr.get(); + auto numNeighborsResult = DataStoreIO::ReadDataStoreIntoMemory(numNeighborsReader); + for(auto&& warning : numNeighborsResult.warnings()) + { + warnings.push_back(std::move(warning)); + } + if(numNeighborsResult.value() == nullptr) + { + // NumNeighbors is a placeholder — cannot populate NeighborList + return nullptr; + } + auto& numNeighborsStore = *numNeighborsResult.value(); auto flatDataStorePtr = dataReader.template readAsDataStore(); if(flatDataStorePtr == nullptr) @@ -112,21 +136,39 @@ class NeighborListIO : public IDataIO const std::optional& parentId, bool useEmptyDataStore = false) const override { auto datasetReader = parentGroup.openDataset(objectName); - auto listStorePtr = ReadHdf5Data(parentGroup, datasetReader, useEmptyDataStore); + std::vector warnings; + auto listStorePtr = ReadHdf5Data(parentGroup, datasetReader, useEmptyDataStore, warnings); + + Result<> result; + result.m_Warnings = std::move(warnings); + + if(listStorePtr == nullptr && !result.m_Warnings.empty()) + { + // Placeholder detected — skip this NeighborList, propagate warnings + return result; + } + auto* dataObject = data_type::Import(dataStructureReader.getDataStructure(), objectName, importId, listStorePtr, parentId); if(dataObject == nullptr) { std::string ss = "Failed to import NeighborList from HDF5"; return MakeErrorResult(-505, ss); } - return {}; + return result; } /** - * @brief Replaces the AbstractListStore using data from the HDF5 dataset. - * @param dataStructure - * @param dataPath - * @param dataStructureReader + * @brief Replaces the placeholder AbstractListStore with real data from the + * HDF5 dataset. This is the "backfill" step called after preflight when the + * DataStructure was initially loaded with empty stores. + * + * Reads the flat data array from HDF5 and scatters it into per-tuple vectors + * in an in-memory ListStore. OOC format decisions for imported data are + * handled at a higher level by the backfill strategy. + * + * @param dataStructure The DataStructure containing the NeighborList to populate + * @param dataPath Path to the NeighborList in the DataStructure + * @param parentGroup The HDF5 group containing the dataset * @return Result<> */ Result<> finishImportingData(DataStructure& dataStructure, const DataPath& dataPath, const group_reader_type& parentGroup) const override @@ -147,10 +189,32 @@ class NeighborListIO : public IDataIO } numNeighborsName = std::move(numNeighborsNameResult.value()); + // Read the "NumNeighbors" companion array, which stores the per-tuple + // neighbor count used to interpret the flat packed data array. auto numNeighborsReader = parentGroup.openDataset(numNeighborsName); - auto numNeighborsPtr = DataStoreIO::ReadDataStore(numNeighborsReader); - auto& numNeighborsStore = *numNeighborsPtr.get(); + auto numNeighborsResult = DataStoreIO::ReadDataStoreIntoMemory(numNeighborsReader); + Result<> result; + for(auto&& warning : numNeighborsResult.warnings()) + { + result.m_Warnings.push_back(std::move(warning)); + } + if(numNeighborsResult.value() == nullptr) + { + // NumNeighbors is a placeholder — cannot populate NeighborList, propagate warnings + return result; + } + auto& numNeighborsStore = *numNeighborsResult.value(); + + const auto numTuples = numNeighborsStore.getNumberOfTuples(); + const auto tupleShape = numNeighborsStore.getTupleShape(); + + // Format resolution for imported data is handled by the backfill strategy + // at a higher level (CreateNeighborListAction / ImportH5ObjectPathsAction). + // During the eager HDF5 read path, we always load in-core. + // + // Read the entire flat data array from HDF5 and scatter it into + // per-tuple vectors in an in-memory ListStore. auto flatDataStorePtr = dataReader.template readAsDataStore(); if(flatDataStorePtr == nullptr) { @@ -163,8 +227,7 @@ class NeighborListIO : public IDataIO } usize offset = 0; - const auto numTuples = numNeighborsStore.getNumberOfTuples(); - auto listStorePtr = DataStoreUtilities::CreateListStore(numNeighborsStore.getTupleShape()); + auto listStorePtr = DataStoreUtilities::CreateListStore(tupleShape); AbstractListStore& listStore = *listStorePtr.get(); for(usize i = 0; i < numTuples; i++) { @@ -179,7 +242,7 @@ class NeighborListIO : public IDataIO } neighborList.setStore(listStorePtr); - return {}; + return result; } /** diff --git a/src/simplnx/DataStructure/IO/HDF5/StringArrayIO.cpp b/src/simplnx/DataStructure/IO/HDF5/StringArrayIO.cpp index 7a5116aabe..98176e015e 100644 --- a/src/simplnx/DataStructure/IO/HDF5/StringArrayIO.cpp +++ b/src/simplnx/DataStructure/IO/HDF5/StringArrayIO.cpp @@ -1,7 +1,7 @@ #include "StringArrayIO.hpp" -#include #include "DataStructureReader.hpp" +#include "simplnx/DataStructure/EmptyStringStore.hpp" #include "simplnx/DataStructure/StringArray.hpp" #include "simplnx/DataStructure/StringStore.hpp" @@ -52,10 +52,29 @@ Result<> StringArrayIO::readData(DataStructureReader& dataStructureReader, const { tupleShape = std::move(tupleShapeResult.value()); } - usize numValues = std::accumulate(tupleShape.cbegin(), tupleShape.cend(), 1ULL, std::multiplies<>()); - std::vector strings = useEmptyDataStore ? std::vector(numValues) : datasetReader.readAsVectorOfStrings(); - const auto* data = StringArray::Import(dataStructureReader.getDataStructure(), dataArrayName, tupleShape, importId, std::move(strings), parentId); + StringArray* data = nullptr; + if(useEmptyDataStore) + { + // During preflight (useEmptyDataStore == true), we create the StringArray + // with an empty string vector to avoid allocating potentially millions of + // std::string objects that would never be used. We then immediately swap + // the underlying store for an EmptyStringStore placeholder that reports + // the correct tuple shape/count but holds no data. The actual string + // content will be loaded later by finishImportingData() when the + // pipeline transitions from preflight to execution. + data = StringArray::Import(dataStructureReader.getDataStructure(), dataArrayName, tupleShape, importId, std::vector{}, parentId); + if(data != nullptr) + { + auto emptyStore = std::make_shared(tupleShape); + data->setStore(emptyStore); + } + } + else + { + std::vector strings = datasetReader.readAsVectorOfStrings(); + data = StringArray::Import(dataStructureReader.getDataStructure(), dataArrayName, tupleShape, importId, std::move(strings), parentId); + } if(data == nullptr) { diff --git a/src/simplnx/DataStructure/StringStore.hpp b/src/simplnx/DataStructure/StringStore.hpp index b6eaa39480..d6560354ed 100644 --- a/src/simplnx/DataStructure/StringStore.hpp +++ b/src/simplnx/DataStructure/StringStore.hpp @@ -7,7 +7,20 @@ namespace nx::core { -class StringStore : public AbstractStringStore +/** + * @class StringStore + * @brief The concrete in-memory string storage backend for StringArray. + * + * StringStore owns a `std::vector` and provides full + * read/write access to its elements. This is the "real" store that holds + * loaded string data, as opposed to EmptyStringStore which is a + * metadata-only placeholder. + * + * @see AbstractStringStore The abstract interface this class implements. + * @see EmptyStringStore The placeholder counterpart used during preflight + * or OOC skeleton construction. + */ +class SIMPLNX_EXPORT StringStore : public AbstractStringStore { public: /** @@ -26,7 +39,7 @@ class StringStore : public AbstractStringStore /** * @brief Destructor. */ - ~StringStore(); + ~StringStore() override; /** * @brief Creates a deep copy of this StringStore. @@ -65,6 +78,21 @@ class StringStore : public AbstractStringStore */ bool empty() const override; + /** + * @brief Returns false because StringStore always contains real, accessible + * string data (backed by a std::vector). + * + * This distinguishes StringStore from EmptyStringStore, which is a + * metadata-only placeholder. Import/backfill code uses isPlaceholder() + * to decide which string arrays still need their data loaded from disk. + * + * @return false Always returns false for StringStore. + */ + bool isPlaceholder() const override + { + return false; + } + /** * @brief Array subscript operator to access the string at the specified index. * @param index The index to access diff --git a/src/simplnx/Filter/Actions/CreateArrayAction.cpp b/src/simplnx/Filter/Actions/CreateArrayAction.cpp index c5ea2caa40..19b0333986 100644 --- a/src/simplnx/Filter/Actions/CreateArrayAction.cpp +++ b/src/simplnx/Filter/Actions/CreateArrayAction.cpp @@ -10,7 +10,7 @@ namespace struct CreateArrayFunctor { template - Result<> operator()(DataStructure& dataStructure, const std::vector& tDims, const std::vector& cDims, const DataPath& path, IDataAction::Mode mode, std::string dataFormat, + Result<> operator()(DataStructure& dataStructure, const std::vector& tDims, const std::vector& cDims, const DataPath& path, IDataAction::Mode mode, const std::string& dataFormat, std::string fillValue) { return ArrayCreationUtilities::CreateArray(dataStructure, tDims, cDims, path, mode, dataFormat, fillValue); @@ -25,8 +25,8 @@ CreateArrayAction::CreateArrayAction(DataType type, const std::vector& tD , m_Type(type) , m_Dims(tDims) , m_CDims(cDims) -, m_DataFormat(dataFormat) -, m_FillValue(fillValue) +, m_DataFormat(std::move(dataFormat)) +, m_FillValue(std::move(fillValue)) { } @@ -39,7 +39,7 @@ Result<> CreateArrayAction::apply(DataStructure& dataStructure, Mode mode) const IDataAction::UniquePointer CreateArrayAction::clone() const { - return std::make_unique(m_Type, m_Dims, m_CDims, getCreatedPath()); + return std::make_unique(m_Type, m_Dims, m_CDims, getCreatedPath(), m_DataFormat, m_FillValue); } DataType CreateArrayAction::type() const @@ -67,13 +67,13 @@ std::vector CreateArrayAction::getAllCreatedPaths() const return {getCreatedPath()}; } -std::string CreateArrayAction::dataFormat() const +std::string CreateArrayAction::fillValue() const { - return m_DataFormat; + return m_FillValue; } -std::string CreateArrayAction::fillValue() const +std::string CreateArrayAction::dataFormat() const { - return m_FillValue; + return m_DataFormat; } } // namespace nx::core diff --git a/src/simplnx/Filter/Actions/CreateArrayAction.hpp b/src/simplnx/Filter/Actions/CreateArrayAction.hpp index 8c43775aed..2d49197427 100644 --- a/src/simplnx/Filter/Actions/CreateArrayAction.hpp +++ b/src/simplnx/Filter/Actions/CreateArrayAction.hpp @@ -18,8 +18,6 @@ namespace nx::core class SIMPLNX_EXPORT CreateArrayAction : public IDataCreationAction { public: - inline static constexpr StringLiteral k_DefaultDataFormat = ""; - CreateArrayAction() = delete; /** @@ -28,7 +26,9 @@ class SIMPLNX_EXPORT CreateArrayAction : public IDataCreationAction * @param tDims The tuple dimensions * @param cDims The component dimensions * @param path The path where the DataArray will be created - * @param dataFormat The data format (empty string for in-memory) + * @param dataFormat The data store format override. Empty string means "Automatic" + * (let the format resolver decide). A non-empty value bypasses the + * resolver and uses the specified format directly. * @param fillValue The fill value for the array */ CreateArrayAction(DataType type, const std::vector& tDims, const std::vector& cDims, const DataPath& path, std::string dataFormat = "", std::string fillValue = ""); @@ -86,18 +86,21 @@ class SIMPLNX_EXPORT CreateArrayAction : public IDataCreationAction std::vector getAllCreatedPaths() const override; /** - * @brief Returns the data formatting name for use in creating the appropriate data store. - * An empty string results in creating an in-memory DataStore. - * Other formats must be defined in external plugins. + * @brief Returns the fill value of the DataArray to be created. * @return std::string */ - std::string dataFormat() const; + std::string fillValue() const; /** - * @brief Returns the fill value of the DataArray to be created. - * @return std::string + * @brief Returns the data store format override for this action. + * + * Empty string means "Automatic" -- the format resolver decides. A non-empty + * value bypasses the resolver and uses the specified format directly, allowing + * individual filters to override the global format policy. + * + * @return The data format string */ - std::string fillValue() const; + std::string dataFormat() const; private: DataType m_Type; diff --git a/src/simplnx/Filter/Actions/CreateGeometry1DAction.hpp b/src/simplnx/Filter/Actions/CreateGeometry1DAction.hpp index aaf4ca6b02..1a4221a0ff 100644 --- a/src/simplnx/Filter/Actions/CreateGeometry1DAction.hpp +++ b/src/simplnx/Filter/Actions/CreateGeometry1DAction.hpp @@ -3,8 +3,10 @@ #include "simplnx/Common/Array.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataGroup.hpp" +#include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/EdgeGeom.hpp" #include "simplnx/DataStructure/Geometry/IGeometry.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" #include "simplnx/Filter/Output.hpp" #include "simplnx/Utilities/ArrayCreationUtilities.hpp" #include "simplnx/simplnx_export.hpp" @@ -35,7 +37,7 @@ class CreateGeometry1DAction : public IDataCreationAction * @param sharedEdgesName The name of the shared edge list array to be created */ CreateGeometry1DAction(const DataPath& geometryPath, size_t numEdges, size_t numVertices, const std::string& vertexAttributeMatrixName, const std::string& edgeAttributeMatrixName, - const std::string& sharedVerticesName, const std::string& sharedEdgesName, std::string createdDataFormat = "") + const std::string& sharedVerticesName, const std::string& sharedEdgesName) : IDataCreationAction(geometryPath) , m_NumEdges(numEdges) , m_NumVertices(numVertices) @@ -43,7 +45,6 @@ class CreateGeometry1DAction : public IDataCreationAction , m_EdgeDataName(edgeAttributeMatrixName) , m_SharedVerticesName(sharedVerticesName) , m_SharedEdgesName(sharedEdgesName) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -57,7 +58,7 @@ class CreateGeometry1DAction : public IDataCreationAction * @param arrayType Tells whether to copy, move, or reference the existing input vertices array */ CreateGeometry1DAction(const DataPath& geometryPath, const DataPath& inputVerticesArrayPath, const DataPath& inputEdgesArrayPath, const std::string& vertexAttributeMatrixName, - const std::string& edgeAttributeMatrixName, const ArrayHandlingType& arrayType, std::string createdDataFormat = "") + const std::string& edgeAttributeMatrixName, const ArrayHandlingType& arrayType) : IDataCreationAction(geometryPath) , m_VertexDataName(vertexAttributeMatrixName) , m_EdgeDataName(edgeAttributeMatrixName) @@ -66,7 +67,6 @@ class CreateGeometry1DAction : public IDataCreationAction , m_InputVertices(inputVerticesArrayPath) , m_InputEdges(inputEdgesArrayPath) , m_ArrayHandlingType(arrayType) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -140,11 +140,42 @@ class CreateGeometry1DAction : public IDataCreationAction DimensionType edgeTupleShape = {m_NumEdges}; DimensionType vertexTupleShape = {m_NumVertices}; // We probably don't know how many Vertices there are but take what ever the developer sends us - if(m_ArrayHandlingType == ArrayHandlingType::Copy) + // For Copy/Move/Reference, read shapes and materialize OOC stores upfront + if(m_ArrayHandlingType != ArrayHandlingType::Create) { edgeTupleShape = edges->getTupleShape(); vertexTupleShape = vertices->getTupleShape(); + // If the source arrays have OOC-backed stores, materialize them into + // in-core stores. These arrays may have been created OOC earlier in + // the pipeline when they lived outside any geometry. Unstructured/poly + // geometry topology arrays must be in-core for the visualization layer. + if(vertices->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(vertexTupleShape, ShapeType{3}, std::optional{}); + auto copyResult = vertices->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-5410, fmt::format("{}Failed to materialize out-of-core vertices array '{}' into in-core store: {}", prefix, m_InputVertices.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + vertices->setDataStore(std::move(inCoreStore)); + } + if(edges->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(edgeTupleShape, ShapeType{2}, std::optional{}); + auto copyResult = edges->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-5411, fmt::format("{}Failed to materialize out-of-core edges array '{}' into in-core store: {}", prefix, m_InputEdges.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + edges->setDataStore(std::move(inCoreStore)); + } + } + + if(m_ArrayHandlingType == ArrayHandlingType::Copy) + { std::shared_ptr vertexCopy = vertices->deepCopy(getCreatedPath().createChildPath(m_SharedVerticesName)); const auto vertexArray = std::dynamic_pointer_cast(vertexCopy); @@ -156,8 +187,6 @@ class CreateGeometry1DAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Move) { - edgeTupleShape = edges->getTupleShape(); - vertexTupleShape = vertices->getTupleShape(); const auto geomId = geometry1d->getId(); const auto verticesId = vertices->getId(); @@ -185,8 +214,6 @@ class CreateGeometry1DAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Reference) { - edgeTupleShape = edges->getTupleShape(); - vertexTupleShape = vertices->getTupleShape(); const auto geomId = geometry1d->getId(); dataStructure.setAdditionalParent(vertices->getId(), geomId); dataStructure.setAdditionalParent(edges->getId(), geomId); @@ -198,7 +225,7 @@ class CreateGeometry1DAction : public IDataCreationAction DataPath edgesPath = getCreatedPath().createChildPath(m_SharedEdgesName); // Create the default DataArray that will hold the EdgeList and Vertices. We // size these to 1 because the Csv parser will resize them to the appropriate number of tuples - Result result = ArrayCreationUtilities::CreateArray(dataStructure, edgeTupleShape, {2}, edgesPath, mode, m_CreatedDataStoreFormat); + Result result = ArrayCreationUtilities::CreateArray(dataStructure, edgeTupleShape, {2}, edgesPath, mode); if(result.invalid()) { return MergeResults(result, MakeErrorResult(-5409, fmt::format("{}CreateGeometry1DAction: Could not allocate SharedEdgeList '{}'", prefix, edgesPath.toString()))); @@ -213,7 +240,7 @@ class CreateGeometry1DAction : public IDataCreationAction // Create the Vertex Array with a component size of 3 DataPath vertexPath = getCreatedPath().createChildPath(m_SharedVerticesName); - result = ArrayCreationUtilities::CreateArray(dataStructure, vertexTupleShape, {3}, vertexPath, mode, m_CreatedDataStoreFormat); + result = ArrayCreationUtilities::CreateArray(dataStructure, vertexTupleShape, {3}, vertexPath, mode); if(result.invalid()) { return MergeResults(result, MakeErrorResult(-5410, fmt::format("{}CreateGeometry1DAction: Could not allocate SharedVertList '{}'", prefix, vertexPath.toString()))); @@ -332,7 +359,6 @@ class CreateGeometry1DAction : public IDataCreationAction DataPath m_InputVertices; DataPath m_InputEdges; ArrayHandlingType m_ArrayHandlingType = ArrayHandlingType::Create; - std::string m_CreatedDataStoreFormat; }; using CreateEdgeGeometryAction = CreateGeometry1DAction; diff --git a/src/simplnx/Filter/Actions/CreateGeometry2DAction.hpp b/src/simplnx/Filter/Actions/CreateGeometry2DAction.hpp index ba1cca68d9..fe8ae75127 100644 --- a/src/simplnx/Filter/Actions/CreateGeometry2DAction.hpp +++ b/src/simplnx/Filter/Actions/CreateGeometry2DAction.hpp @@ -3,9 +3,11 @@ #include "simplnx/Common/Array.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataGroup.hpp" +#include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/IGeometry.hpp" #include "simplnx/DataStructure/Geometry/QuadGeom.hpp" #include "simplnx/DataStructure/Geometry/TriangleGeom.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" #include "simplnx/Filter/Output.hpp" #include "simplnx/Utilities/ArrayCreationUtilities.hpp" #include "simplnx/simplnx_export.hpp" @@ -36,7 +38,7 @@ class CreateGeometry2DAction : public IDataCreationAction * @param sharedFacesName The name of the shared face list array to be created */ CreateGeometry2DAction(const DataPath& geometryPath, size_t numFaces, size_t numVertices, const std::string& vertexAttributeMatrixName, const std::string& faceAttributeMatrixName, - const std::string& sharedVerticesName, const std::string& sharedFacesName, std::string createdDataFormat = "") + const std::string& sharedVerticesName, const std::string& sharedFacesName) : IDataCreationAction(geometryPath) , m_NumFaces(numFaces) , m_NumVertices(numVertices) @@ -44,7 +46,6 @@ class CreateGeometry2DAction : public IDataCreationAction , m_FaceDataName(faceAttributeMatrixName) , m_SharedVerticesName(sharedVerticesName) , m_SharedFacesName(sharedFacesName) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -58,7 +59,7 @@ class CreateGeometry2DAction : public IDataCreationAction * @param arrayType Tells whether to copy, move, or reference the existing input vertices array */ CreateGeometry2DAction(const DataPath& geometryPath, const DataPath& inputVerticesArrayPath, const DataPath& inputFacesArrayPath, const std::string& vertexAttributeMatrixName, - const std::string& faceAttributeMatrixName, const ArrayHandlingType& arrayType, std::string createdDataFormat = "") + const std::string& faceAttributeMatrixName, const ArrayHandlingType& arrayType) : IDataCreationAction(geometryPath) , m_VertexDataName(vertexAttributeMatrixName) , m_FaceDataName(faceAttributeMatrixName) @@ -67,7 +68,6 @@ class CreateGeometry2DAction : public IDataCreationAction , m_InputVertices(inputVerticesArrayPath) , m_InputFaces(inputFacesArrayPath) , m_ArrayHandlingType(arrayType) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -140,11 +140,42 @@ class CreateGeometry2DAction : public IDataCreationAction DimensionType faceTupleShape = {m_NumFaces}; DimensionType vertexTupleShape = {m_NumVertices}; // We probably don't know how many Vertices there are but take what ever the developer sends us - if(m_ArrayHandlingType == ArrayHandlingType::Copy) + // For Copy/Move/Reference, read shapes and materialize OOC stores upfront + if(m_ArrayHandlingType != ArrayHandlingType::Create) { faceTupleShape = faces->getTupleShape(); vertexTupleShape = vertices->getTupleShape(); + // If the source arrays have OOC-backed stores, materialize them into + // in-core stores. These arrays may have been created OOC earlier in + // the pipeline when they lived outside any geometry. Unstructured/poly + // geometry topology arrays must be in-core for the visualization layer. + if(vertices->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(vertexTupleShape, ShapeType{3}, std::optional{}); + auto copyResult = vertices->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-5510, fmt::format("{}Failed to materialize out-of-core vertices array '{}' into in-core store: {}", prefix, m_InputVertices.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + vertices->setDataStore(std::move(inCoreStore)); + } + if(faces->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(faceTupleShape, ShapeType{Geometry2DType::k_NumVerts}, std::optional{}); + auto copyResult = faces->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-5511, fmt::format("{}Failed to materialize out-of-core faces array '{}' into in-core store: {}", prefix, m_InputFaces.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + faces->setDataStore(std::move(inCoreStore)); + } + } + + if(m_ArrayHandlingType == ArrayHandlingType::Copy) + { std::shared_ptr vertexCopy = vertices->deepCopy(getCreatedPath().createChildPath(m_SharedVerticesName)); const auto vertexArray = std::dynamic_pointer_cast(vertexCopy); @@ -156,8 +187,6 @@ class CreateGeometry2DAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Move) { - faceTupleShape = faces->getTupleShape(); - vertexTupleShape = vertices->getTupleShape(); const auto geomId = geometry2d->getId(); const auto verticesId = vertices->getId(); @@ -185,8 +214,6 @@ class CreateGeometry2DAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Reference) { - faceTupleShape = faces->getTupleShape(); - vertexTupleShape = vertices->getTupleShape(); const auto geomId = geometry2d->getId(); dataStructure.setAdditionalParent(vertices->getId(), geomId); dataStructure.setAdditionalParent(faces->getId(), geomId); @@ -198,7 +225,7 @@ class CreateGeometry2DAction : public IDataCreationAction DataPath trianglesPath = getCreatedPath().createChildPath(m_SharedFacesName); // Create the default DataArray that will hold the FaceList and Vertices. We // size these to 1 because the Csv parser will resize them to the appropriate number of tuples - Result result = ArrayCreationUtilities::CreateArray(dataStructure, faceTupleShape, {Geometry2DType::k_NumVerts}, trianglesPath, mode, m_CreatedDataStoreFormat); + Result result = ArrayCreationUtilities::CreateArray(dataStructure, faceTupleShape, {Geometry2DType::k_NumVerts}, trianglesPath, mode); if(result.invalid()) { return MergeResults(result, MakeErrorResult(-5509, fmt::format("{}CreateGeometry2DAction: Could not allocate SharedTriList '{}'", prefix, trianglesPath.toString()))); @@ -213,7 +240,7 @@ class CreateGeometry2DAction : public IDataCreationAction // Create the Vertex Array with a component size of 3 DataPath vertexPath = getCreatedPath().createChildPath(m_SharedVerticesName); - result = ArrayCreationUtilities::CreateArray(dataStructure, vertexTupleShape, {3}, vertexPath, mode, m_CreatedDataStoreFormat); + result = ArrayCreationUtilities::CreateArray(dataStructure, vertexTupleShape, {3}, vertexPath, mode); if(result.invalid()) { return MergeResults(result, MakeErrorResult(-5510, fmt::format("{}CreateGeometry2DAction: Could not allocate SharedVertList '{}'", prefix, vertexPath.toString()))); @@ -332,7 +359,6 @@ class CreateGeometry2DAction : public IDataCreationAction DataPath m_InputVertices; DataPath m_InputFaces; ArrayHandlingType m_ArrayHandlingType = ArrayHandlingType::Create; - std::string m_CreatedDataStoreFormat; }; using CreateTriangleGeometryAction = CreateGeometry2DAction; diff --git a/src/simplnx/Filter/Actions/CreateGeometry3DAction.hpp b/src/simplnx/Filter/Actions/CreateGeometry3DAction.hpp index d944fab566..5b32659ab1 100644 --- a/src/simplnx/Filter/Actions/CreateGeometry3DAction.hpp +++ b/src/simplnx/Filter/Actions/CreateGeometry3DAction.hpp @@ -3,9 +3,11 @@ #include "simplnx/Common/Array.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataGroup.hpp" +#include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/HexahedralGeom.hpp" #include "simplnx/DataStructure/Geometry/IGeometry.hpp" #include "simplnx/DataStructure/Geometry/TetrahedralGeom.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" #include "simplnx/Filter/Output.hpp" #include "simplnx/Utilities/ArrayCreationUtilities.hpp" #include "simplnx/simplnx_export.hpp" @@ -36,7 +38,7 @@ class CreateGeometry3DAction : public IDataCreationAction * @param sharedCellsName The name of the shared cell list array to be created */ CreateGeometry3DAction(const DataPath& geometryPath, size_t numCells, size_t numVertices, const std::string& vertexAttributeMatrixName, const std::string& cellAttributeMatrixName, - const std::string& sharedVerticesName, const std::string& sharedCellsName, std::string createdDataFormat = "") + const std::string& sharedVerticesName, const std::string& sharedCellsName) : IDataCreationAction(geometryPath) , m_NumCells(numCells) , m_NumVertices(numVertices) @@ -44,7 +46,6 @@ class CreateGeometry3DAction : public IDataCreationAction , m_CellDataName(cellAttributeMatrixName) , m_SharedVerticesName(sharedVerticesName) , m_SharedCellsName(sharedCellsName) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -58,7 +59,7 @@ class CreateGeometry3DAction : public IDataCreationAction * @param arrayType Tells whether to copy, move, or reference the existing input vertices array */ CreateGeometry3DAction(const DataPath& geometryPath, const DataPath& inputVerticesArrayPath, const DataPath& inputCellsArrayPath, const std::string& vertexAttributeMatrixName, - const std::string& cellAttributeMatrixName, const ArrayHandlingType& arrayType, std::string createdDataFormat = "") + const std::string& cellAttributeMatrixName, const ArrayHandlingType& arrayType) : IDataCreationAction(geometryPath) , m_VertexDataName(vertexAttributeMatrixName) , m_CellDataName(cellAttributeMatrixName) @@ -67,7 +68,6 @@ class CreateGeometry3DAction : public IDataCreationAction , m_InputVertices(inputVerticesArrayPath) , m_InputCells(inputCellsArrayPath) , m_ArrayHandlingType(arrayType) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -140,11 +140,42 @@ class CreateGeometry3DAction : public IDataCreationAction DimensionType cellTupleShape = {m_NumCells}; DimensionType vertexTupleShape = {m_NumVertices}; // We probably don't know how many Vertices there are but take what ever the developer sends us - if(m_ArrayHandlingType == ArrayHandlingType::Copy) + // For Copy/Move/Reference, read shapes and materialize OOC stores upfront + if(m_ArrayHandlingType != ArrayHandlingType::Create) { cellTupleShape = cells->getTupleShape(); vertexTupleShape = vertices->getTupleShape(); + // If the source arrays have OOC-backed stores, materialize them into + // in-core stores. These arrays may have been created OOC earlier in + // the pipeline when they lived outside any geometry. Unstructured/poly + // geometry topology arrays must be in-core for the visualization layer. + if(vertices->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(vertexTupleShape, ShapeType{3}, std::optional{}); + auto copyResult = vertices->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-5610, fmt::format("{}Failed to materialize out-of-core vertices array '{}' into in-core store: {}", prefix, m_InputVertices.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + vertices->setDataStore(std::move(inCoreStore)); + } + if(cells->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(cellTupleShape, ShapeType{Geometry3DType::k_NumVerts}, std::optional{}); + auto copyResult = cells->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-5611, fmt::format("{}Failed to materialize out-of-core cells array '{}' into in-core store: {}", prefix, m_InputCells.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + cells->setDataStore(std::move(inCoreStore)); + } + } + + if(m_ArrayHandlingType == ArrayHandlingType::Copy) + { std::shared_ptr vertexCopy = vertices->deepCopy(getCreatedPath().createChildPath(m_SharedVerticesName)); const auto vertexArray = std::dynamic_pointer_cast(vertexCopy); @@ -156,8 +187,6 @@ class CreateGeometry3DAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Move) { - cellTupleShape = cells->getTupleShape(); - vertexTupleShape = vertices->getTupleShape(); const auto geomId = geometry3d->getId(); const auto verticesId = vertices->getId(); @@ -185,8 +214,6 @@ class CreateGeometry3DAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Reference) { - cellTupleShape = cells->getTupleShape(); - vertexTupleShape = vertices->getTupleShape(); const auto geomId = geometry3d->getId(); dataStructure.setAdditionalParent(vertices->getId(), geomId); dataStructure.setAdditionalParent(cells->getId(), geomId); @@ -197,7 +224,7 @@ class CreateGeometry3DAction : public IDataCreationAction { const DataPath cellsPath = getCreatedPath().createChildPath(m_SharedCellsName); // Create the default DataArray that will hold the CellList and Vertices. - Result result = ArrayCreationUtilities::CreateArray(dataStructure, cellTupleShape, {Geometry3DType::k_NumVerts}, cellsPath, mode, m_CreatedDataStoreFormat); + Result result = ArrayCreationUtilities::CreateArray(dataStructure, cellTupleShape, {Geometry3DType::k_NumVerts}, cellsPath, mode); if(result.invalid()) { return MergeResults(result, MakeErrorResult(-5609, fmt::format("{}CreateGeometry3DAction: Could not allocate SharedCellList '{}'", prefix, cellsPath.toString()))); @@ -212,7 +239,7 @@ class CreateGeometry3DAction : public IDataCreationAction // Create the Vertex Array with a component size of 3 const DataPath vertexPath = getCreatedPath().createChildPath(m_SharedVerticesName); - result = ArrayCreationUtilities::CreateArray(dataStructure, vertexTupleShape, {3}, vertexPath, mode, m_CreatedDataStoreFormat); + result = ArrayCreationUtilities::CreateArray(dataStructure, vertexTupleShape, {3}, vertexPath, mode); if(result.invalid()) { return MergeResults(result, MakeErrorResult(-5610, fmt::format("{}CreateGeometry3DAction: Could not allocate SharedVertList '{}'", prefix, vertexPath.toString()))); @@ -331,7 +358,6 @@ class CreateGeometry3DAction : public IDataCreationAction DataPath m_InputVertices; DataPath m_InputCells; ArrayHandlingType m_ArrayHandlingType = ArrayHandlingType::Create; - std::string m_CreatedDataStoreFormat; }; using CreateTetrahedralGeometryAction = CreateGeometry3DAction; diff --git a/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.cpp b/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.cpp index b4706181d3..189d5c9a26 100644 --- a/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.cpp +++ b/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.cpp @@ -10,7 +10,7 @@ namespace nx::core { CreateRectGridGeometryAction::CreateRectGridGeometryAction(const DataPath& path, usize xBoundTuples, usize yBoundTuples, usize zBoundTuples, const std::string& cellAttributeMatrixName, - const std::string& xBoundsName, const std::string& yBoundsName, const std::string& zBoundsName, std::string createdDataFormat) + const std::string& xBoundsName, const std::string& yBoundsName, const std::string& zBoundsName) : IDataCreationAction(path) , m_NumXBoundTuples(xBoundTuples) , m_NumYBoundTuples(yBoundTuples) @@ -19,12 +19,11 @@ CreateRectGridGeometryAction::CreateRectGridGeometryAction(const DataPath& path, , m_XBoundsArrayName(xBoundsName) , m_YBoundsArrayName(yBoundsName) , m_ZBoundsArrayName(zBoundsName) -, m_CreatedDataStoreFormat(createdDataFormat) { } CreateRectGridGeometryAction::CreateRectGridGeometryAction(const DataPath& path, const DataPath& inputXBoundsPath, const DataPath& inputYBoundsPath, const DataPath& inputZBoundsPath, - const std::string& cellAttributeMatrixName, const ArrayHandlingType& arrayType, std::string createdDataFormat) + const std::string& cellAttributeMatrixName, const ArrayHandlingType& arrayType) : IDataCreationAction(path) , m_CellDataName(cellAttributeMatrixName) , m_XBoundsArrayName(inputXBoundsPath.getTargetName()) @@ -34,7 +33,6 @@ CreateRectGridGeometryAction::CreateRectGridGeometryAction(const DataPath& path, , m_InputYBounds(inputYBoundsPath) , m_InputZBounds(inputZBoundsPath) , m_ArrayHandlingType(arrayType) -, m_CreatedDataStoreFormat(createdDataFormat) { } @@ -185,7 +183,7 @@ Float32Array* CreateRectGridGeometryAction::createBoundArray(DataStructure& data { const DimensionType componentShape = {1}; const DataPath boundsPath = getCreatedPath().createChildPath(arrayName); - if(Result<> result = ArrayCreationUtilities::CreateArray(dataStructure, {numTuples}, componentShape, boundsPath, mode, m_CreatedDataStoreFormat); result.invalid()) + if(Result<> result = ArrayCreationUtilities::CreateArray(dataStructure, {numTuples}, componentShape, boundsPath, mode); result.invalid()) { errors.insert(errors.end(), result.errors().begin(), result.errors().end()); return nullptr; diff --git a/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.hpp b/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.hpp index 47eb5d831f..0844af81fb 100644 --- a/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.hpp +++ b/src/simplnx/Filter/Actions/CreateRectGridGeometryAction.hpp @@ -29,7 +29,7 @@ class SIMPLNX_EXPORT CreateRectGridGeometryAction : public IDataCreationAction * @param zBoundsName The name of the zBounds array to be created */ CreateRectGridGeometryAction(const DataPath& path, usize xBoundsDim, usize yBoundsDim, usize zBoundsDim, const std::string& cellAttributeMatrixName, const std::string& xBoundsName, - const std::string& yBoundsName, const std::string& zBoundsName, std::string createdDataFormat = ""); + const std::string& yBoundsName, const std::string& zBoundsName); /** * @brief Constructor to create the geometry using existing x, y, and z bounds arrays by either copying, moving, or referencing them @@ -41,7 +41,7 @@ class SIMPLNX_EXPORT CreateRectGridGeometryAction : public IDataCreationAction * @param arrayType Tells whether to copy, move, or reference the existing input bounds arrays */ CreateRectGridGeometryAction(const DataPath& path, const DataPath& inputXBoundsPath, const DataPath& inputYBoundsPath, const DataPath& inputZBoundsPath, const std::string& cellAttributeMatrixName, - const ArrayHandlingType& arrayType, std::string createdDataFormat = ""); + const ArrayHandlingType& arrayType); ~CreateRectGridGeometryAction() noexcept override; @@ -109,7 +109,6 @@ class SIMPLNX_EXPORT CreateRectGridGeometryAction : public IDataCreationAction DataPath m_InputYBounds; DataPath m_InputZBounds; ArrayHandlingType m_ArrayHandlingType = ArrayHandlingType::Create; - std::string m_CreatedDataStoreFormat; Float32Array* createBoundArray(DataStructure& dataStructure, Mode mode, const std::string& arrayName, usize numTuples, std::vector& errors) const; }; diff --git a/src/simplnx/Filter/Actions/CreateVertexGeometryAction.hpp b/src/simplnx/Filter/Actions/CreateVertexGeometryAction.hpp index 25a8a8d4fe..4a99b2d602 100644 --- a/src/simplnx/Filter/Actions/CreateVertexGeometryAction.hpp +++ b/src/simplnx/Filter/Actions/CreateVertexGeometryAction.hpp @@ -3,8 +3,10 @@ #include "simplnx/Common/Array.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataGroup.hpp" +#include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/IGeometry.hpp" #include "simplnx/DataStructure/Geometry/VertexGeom.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" #include "simplnx/Filter/Output.hpp" #include "simplnx/Utilities/ArrayCreationUtilities.hpp" #include "simplnx/simplnx_export.hpp" @@ -29,13 +31,11 @@ class CreateVertexGeometryAction : public IDataCreationAction * @param vertexAttributeMatrixName The name of the vertex AttributeMatrix to be created * @param sharedVertexListName The name of the shared vertex list array to be created */ - CreateVertexGeometryAction(const DataPath& geometryPath, IGeometry::MeshIndexType numVertices, const std::string& vertexAttributeMatrixName, const std::string& sharedVertexListName, - std::string createdDataFormat = "") + CreateVertexGeometryAction(const DataPath& geometryPath, IGeometry::MeshIndexType numVertices, const std::string& vertexAttributeMatrixName, const std::string& sharedVertexListName) : IDataCreationAction(geometryPath) , m_NumVertices(numVertices) , m_VertexDataName(vertexAttributeMatrixName) , m_SharedVertexListName(sharedVertexListName) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -46,14 +46,12 @@ class CreateVertexGeometryAction : public IDataCreationAction * @param vertexAttributeMatrixName The name of the vertex AttributeMatrix to be created * @param arrayType Tells whether to copy, move, or reference the existing input vertices array */ - CreateVertexGeometryAction(const DataPath& geometryPath, const DataPath& inputVerticesArrayPath, const std::string& vertexAttributeMatrixName, const ArrayHandlingType& arrayType, - std::string createdDataFormat = "") + CreateVertexGeometryAction(const DataPath& geometryPath, const DataPath& inputVerticesArrayPath, const std::string& vertexAttributeMatrixName, const ArrayHandlingType& arrayType) : IDataCreationAction(geometryPath) , m_VertexDataName(vertexAttributeMatrixName) , m_SharedVertexListName(inputVerticesArrayPath.getTargetName()) , m_InputVertices(inputVerticesArrayPath) , m_ArrayHandlingType(arrayType) - , m_CreatedDataStoreFormat(createdDataFormat) { } @@ -114,11 +112,31 @@ class CreateVertexGeometryAction : public IDataCreationAction ShapeType tupleShape = {m_NumVertices}; // We don't probably know how many Vertices there are but take what ever the developer sends us - // Create the Vertex Array with a component size of 3 - if(m_ArrayHandlingType == ArrayHandlingType::Copy) + // For Copy/Move/Reference, read shapes and materialize OOC stores upfront + if(m_ArrayHandlingType != ArrayHandlingType::Create) { tupleShape = vertices->getTupleShape(); + // If the source array has an OOC-backed store, materialize it into + // an in-core store. The array may have been created OOC earlier in + // the pipeline when it lived outside any geometry. Unstructured/poly + // geometry topology arrays must be in-core for the visualization layer. + if(vertices->getIDataStore()->getStoreType() == IDataStore::StoreType::OutOfCore) + { + auto inCoreStore = std::make_shared>(tupleShape, ShapeType{3}, std::optional{}); + auto copyResult = vertices->getDataStoreRef().copyIntoBuffer(0, nonstd::span(inCoreStore->data(), inCoreStore->getSize())); + if(copyResult.invalid()) + { + return MakeErrorResult(-6107, fmt::format("{}Failed to materialize OOC vertices array '{}' into in-core store: {}", prefix, m_InputVertices.toString(), + copyResult.errors().empty() ? "unknown error" : copyResult.errors()[0].message)); + } + vertices->setDataStore(std::move(inCoreStore)); + } + } + + // Create the Vertex Array with a component size of 3 + if(m_ArrayHandlingType == ArrayHandlingType::Copy) + { std::shared_ptr copy = vertices->deepCopy(getCreatedPath().createChildPath(m_SharedVertexListName)); const auto vertexArray = std::dynamic_pointer_cast(copy); @@ -126,7 +144,6 @@ class CreateVertexGeometryAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Move) { - tupleShape = vertices->getTupleShape(); const auto geomId = vertexGeom->getId(); const auto verticesId = vertices->getId(); dataStructure.setAdditionalParent(verticesId, geomId); @@ -141,7 +158,6 @@ class CreateVertexGeometryAction : public IDataCreationAction } else if(m_ArrayHandlingType == ArrayHandlingType::Reference) { - tupleShape = vertices->getTupleShape(); dataStructure.setAdditionalParent(vertices->getId(), vertexGeom->getId()); vertexGeom->setVertices(*vertices); } @@ -150,7 +166,7 @@ class CreateVertexGeometryAction : public IDataCreationAction const DataPath vertexPath = getCreatedPath().createChildPath(m_SharedVertexListName); const ShapeType componentShape = {3}; - Result<> result = ArrayCreationUtilities::CreateArray(dataStructure, tupleShape, componentShape, vertexPath, mode, m_CreatedDataStoreFormat); + Result<> result = ArrayCreationUtilities::CreateArray(dataStructure, tupleShape, componentShape, vertexPath, mode); if(result.invalid()) { return result; @@ -246,7 +262,6 @@ class CreateVertexGeometryAction : public IDataCreationAction std::string m_SharedVertexListName; DataPath m_InputVertices; ArrayHandlingType m_ArrayHandlingType = ArrayHandlingType::Create; - std::string m_CreatedDataStoreFormat; }; } // namespace nx::core diff --git a/src/simplnx/Filter/Actions/ImportH5ObjectPathsAction.cpp b/src/simplnx/Filter/Actions/ImportH5ObjectPathsAction.cpp index 8e89250d0e..87ff1dce5f 100644 --- a/src/simplnx/Filter/Actions/ImportH5ObjectPathsAction.cpp +++ b/src/simplnx/Filter/Actions/ImportH5ObjectPathsAction.cpp @@ -1,34 +1,25 @@ #include "ImportH5ObjectPathsAction.hpp" +#include "simplnx/Common/StringLiteralFormatting.hpp" #include "simplnx/DataStructure/BaseGroup.hpp" -#include "simplnx/DataStructure/DataArray.hpp" -#include "simplnx/DataStructure/DataStore.hpp" +#include "simplnx/DataStructure/DataObject.hpp" #include "simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp" -#include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" #include #include -#include using namespace nx::core; - -namespace -{ -void sortImportPaths(std::vector& importPaths) -{ - std::sort(importPaths.begin(), importPaths.end(), [](const DataPath& first, const DataPath& second) { return first.getLength() < second.getLength(); }); -} -} // namespace +namespace fs = std::filesystem; namespace nx::core { -ImportH5ObjectPathsAction::ImportH5ObjectPathsAction(const std::filesystem::path& importFile, const PathsType& paths) +ImportH5ObjectPathsAction::ImportH5ObjectPathsAction(const fs::path& importFile, const PathsType& paths) : IDataCreationAction(DataPath{}) , m_H5FilePath(importFile) , m_Paths(paths) { - sortImportPaths(m_Paths); + std::sort(m_Paths.begin(), m_Paths.end(), [](const DataPath& a, const DataPath& b) { return a.getLength() < b.getLength(); }); } ImportH5ObjectPathsAction::~ImportH5ObjectPathsAction() noexcept = default; @@ -37,42 +28,58 @@ Result<> ImportH5ObjectPathsAction::apply(DataStructure& dataStructure, Mode mod { static constexpr StringLiteral prefix = "ImportH5ObjectPathsAction: "; - auto fileReader = nx::core::HDF5::FileIO::ReadFile(m_H5FilePath); - // Import as a preflight data structure to start to conserve memory and only allocate the data you want later - Result dataStructureResult = DREAM3D::ImportDataStructureFromFile(fileReader, true); - if(dataStructureResult.invalid()) + // Get the source DataStructure — metadata only for preflight, loaded arrays for execute + // Preflight: metadata only. Execute: full load (not LoadDataStructureArrays, because the + // merge loop below selectively copies only m_Paths — pruning would break Geometry/AttributeMatrix + // relationships in the source structure before the merge has a chance to pick the right objects). + auto result = (mode == Mode::Preflight) ? DREAM3D::LoadDataStructureMetadata(m_H5FilePath) : DREAM3D::LoadDataStructure(m_H5FilePath); + + if(result.invalid()) { - return ConvertResult(std::move(dataStructureResult)); + return ConvertResult(std::move(result)); } - // Ensure there are no conflicting DataObject ID values - DataStructure importStructure = std::move(dataStructureResult.value()); - importStructure.resetIds(dataStructure.getNextId()); + DataStructure sourceStructure = std::move(result.value()); + sourceStructure.resetIds(dataStructure.getNextId()); + + // Merge source objects into the pipeline's DataStructure. + // Sort paths shortest-first so parents are inserted before children. + auto sortedPaths = m_Paths; + std::sort(sortedPaths.begin(), sortedPaths.end(), [](const DataPath& a, const DataPath& b) { return a.getLength() < b.getLength(); }); - const bool preflighting = mode == Mode::Preflight; - std::stringstream errorMessages; - for(const auto& targetPath : m_Paths) + for(const auto& targetPath : sortedPaths) { if(dataStructure.getDataAs(targetPath) != nullptr) { - return MakeErrorResult(-6203, fmt::format("{}Unable to import DataObject at '{}' because an object already exists there. Consider a rename of existing object.", prefix, targetPath.toString())); + return MakeErrorResult(-6203, fmt::format("{}Unable to import DataObject at '{}' because an object " + "already exists at that path. Consider renaming the existing object before importing, or " + "exclude this path from the import selection.", + prefix, targetPath.toString())); } - auto result = DREAM3D::FinishImportingObject(importStructure, dataStructure, targetPath, fileReader, preflighting); - if(result.invalid()) + if(!sourceStructure.containsData(targetPath)) { - for(const auto& errorResult : result.errors()) - { - errorMessages << errorResult.message << std::endl; - } + continue; + } + + // Shallow-copy the object from the source structure (which has real stores) + // and insert it into the pipeline's DataStructure. Clear children on groups + // because child objects will be inserted by their own paths in the loop. + const auto sourceObject = sourceStructure.getSharedData(targetPath); + const auto objectCopy = std::shared_ptr(sourceObject->shallowCopy()); + if(const auto group = std::dynamic_pointer_cast(objectCopy); group != nullptr) + { + group->clear(); + } + if(!dataStructure.insert(objectCopy, targetPath.getParent())) + { + return MakeErrorResult(-6202, fmt::format("{}Unable to insert DataObject at path '{}' into the DataStructure. " + "The parent path '{}' may not exist.", + prefix, targetPath.toString(), targetPath.getParent().toString())); } - } - if(!errorMessages.str().empty()) - { - return MakeErrorResult(-6201, errorMessages.str()); } - return ConvertResult(std::move(dataStructureResult)); + return {}; } IDataAction::UniquePointer ImportH5ObjectPathsAction::clone() const diff --git a/src/simplnx/Parameters/ArraySelectionParameter.cpp b/src/simplnx/Parameters/ArraySelectionParameter.cpp index af1ba3ff33..b8635a4c0c 100644 --- a/src/simplnx/Parameters/ArraySelectionParameter.cpp +++ b/src/simplnx/Parameters/ArraySelectionParameter.cpp @@ -201,11 +201,7 @@ Result<> ArraySelectionParameter::validatePath(const DataStructure& dataStructur { IDataStore::StoreType storeType = dataArray->getStoreType(); - if(allowsInMemory() && (storeType == IDataStore::StoreType::Empty)) - { - return {}; - } - else if(allowsOutOfCore() && (storeType == IDataStore::StoreType::EmptyOutOfCore)) + if(storeType == IDataStore::StoreType::Empty) { return {}; } diff --git a/src/simplnx/Parameters/DataStoreFormatParameter.cpp b/src/simplnx/Parameters/DataStoreFormatParameter.cpp index a423d1f190..f97e4c59d2 100644 --- a/src/simplnx/Parameters/DataStoreFormatParameter.cpp +++ b/src/simplnx/Parameters/DataStoreFormatParameter.cpp @@ -66,13 +66,30 @@ typename DataStoreFormatParameter::ValueType DataStoreFormatParameter::defaultSt typename DataStoreFormatParameter::AvailableValuesType DataStoreFormatParameter::availableValues() const { - return Application::GetOrCreateInstance()->getDataStoreFormats(); + const auto displayNames = Application::GetOrCreateInstance()->getDataStoreFormatDisplayNames(); + AvailableValuesType result; + result.reserve(displayNames.size()); + for(const auto& [formatName, displayName] : displayNames) + { + result.push_back(formatName); + } + return result; +} + +std::vector> DataStoreFormatParameter::availableFormatsWithDisplayNames() const +{ + return Application::GetOrCreateInstance()->getDataStoreFormatDisplayNames(); } Result<> DataStoreFormatParameter::validate(const std::any& value) const { [[maybe_unused]] const auto& stringValue = GetAnyRef(value); - const auto formats = Application::GetOrCreateInstance()->getDataStoreFormats(); + // Empty string is always valid — it means "Automatic" (let the resolver decide) + if(stringValue.empty()) + { + return {}; + } + const auto formats = availableValues(); if(std::find(formats.begin(), formats.end(), stringValue) == formats.end()) { std::string ss = fmt::format("DataStore format not known: '{}'", stringValue); diff --git a/src/simplnx/Parameters/DataStoreFormatParameter.hpp b/src/simplnx/Parameters/DataStoreFormatParameter.hpp index eeca60a785..ee9e2033b7 100644 --- a/src/simplnx/Parameters/DataStoreFormatParameter.hpp +++ b/src/simplnx/Parameters/DataStoreFormatParameter.hpp @@ -5,6 +5,7 @@ #include "simplnx/simplnx_export.hpp" #include +#include #include namespace nx::core @@ -64,11 +65,22 @@ class SIMPLNX_EXPORT DataStoreFormatParameter : public ValueParameter ValueType defaultString() const; /** - * @brief - * @retrurn + * @brief Returns the list of available format name strings. + * @return Vector of format name strings (keys only, no display names) */ AvailableValuesType availableValues() const; + /** + * @brief Returns all available formats as (formatName, displayName) pairs. + * + * The list always includes ("", "Automatic") and (k_InMemoryFormat, "In Memory"), + * plus any plugin-registered formats. This is intended for UI widgets that need + * to display human-readable labels alongside the internal format identifiers. + * + * @return Vector of (formatName, displayName) pairs + */ + std::vector> availableFormatsWithDisplayNames() const; + /** * @brief * @param value diff --git a/src/simplnx/Utilities/AlgorithmDispatch.hpp b/src/simplnx/Utilities/AlgorithmDispatch.hpp index 564d277c41..275101d069 100644 --- a/src/simplnx/Utilities/AlgorithmDispatch.hpp +++ b/src/simplnx/Utilities/AlgorithmDispatch.hpp @@ -2,7 +2,9 @@ #include "simplnx/Common/Result.hpp" #include "simplnx/DataStructure/IDataArray.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" +#include #include namespace nx::core @@ -11,7 +13,7 @@ namespace nx::core /** * @brief Checks whether an IDataArray is backed by out-of-core (chunked) storage. * - * Returns true when the array's data store reports a chunk shape (e.g. ZarrStore), + * Returns true when the array's data store reports StoreType::OutOfCore, * indicating that data lives on disk in compressed chunks rather than in a * contiguous in-memory buffer. * @@ -20,7 +22,7 @@ namespace nx::core */ inline bool IsOutOfCore(const IDataArray& array) { - return array.getIDataStoreRef().getChunkShape().has_value(); + return array.getIDataStoreRef().getStoreType() == IDataStore::StoreType::OutOfCore; } /** @@ -49,8 +51,12 @@ inline bool AnyOutOfCore(std::initializer_list arrays) * to always select the out-of-core algorithm, regardless of storage type. * * This is primarily used in unit tests to exercise the OOC algorithm path - * even when data is stored in-core. Use ForceOocAlgorithmGuard for RAII-safe - * toggling in tests. + * even when data is stored in-core. The flag is backed by a function-local + * static, so it persists for the lifetime of the process. + * + * @warning This flag is NOT thread-safe. It should only be set from the main + * test thread before any parallel work begins. Use ForceOocAlgorithmGuard + * for RAII-safe toggling in tests. * * @return Reference to the static force flag */ @@ -60,15 +66,61 @@ inline bool& ForceOocAlgorithm() return s_force; } +/** + * @brief Integer array of forceOoc values for Catch2 GENERATE(from_range(...)). + * + * Controlled by CMake option SIMPLNX_TEST_ALGORITHM_PATH (passed as a + * compile definition to test targets): + * 0 (Both) - {0, 1}: tests both in-core and OOC paths (default) + * 1 (OocOnly) - {1}: tests only OOC path (use for OOC builds) + * 2 (InCoreOnly) - {0}: tests only in-core path (quick validation) + * + * Uses int instead of bool because Catch2 v2's FixedValuesGenerator + * does not support bool due to std::vector specialization. + * + * Usage in tests: + * @code + * bool forceOoc = static_cast(GENERATE(from_range(nx::core::k_ForceOocTestValues))); + * const nx::core::ForceOocAlgorithmGuard guard(forceOoc); + * @endcode + * + * Set via: cmake -DSIMPLNX_TEST_ALGORITHM_PATH=1 ... + */ +#ifndef SIMPLNX_TEST_ALGORITHM_PATH +#define SIMPLNX_TEST_ALGORITHM_PATH 0 +#endif + +// clang-format off +#if SIMPLNX_TEST_ALGORITHM_PATH == 1 +inline const std::array k_ForceOocTestValues = {1}; +#elif SIMPLNX_TEST_ALGORITHM_PATH == 2 +inline const std::array k_ForceOocTestValues = {0}; +#else +inline const std::array k_ForceOocTestValues = {0, 1}; +#endif +// clang-format on + /** * @brief RAII guard that sets ForceOocAlgorithm() on construction and * restores the previous value on destruction. * - * Usage in tests with Catch2 GENERATE: + * The guard captures the current value of ForceOocAlgorithm() when constructed, + * overrides it with the requested value, and restores the original value when + * the guard goes out of scope. This ensures the global flag is always cleaned + * up, even if the test throws an exception or fails early. + * + * Copy and move operations are deleted to prevent accidental double-restore + * of the original value, which would corrupt the global flag state. + * + * @warning Not thread-safe. The underlying flag is a bare static bool with + * no synchronization. In Catch2 tests this is safe because each + * TEST_CASE runs on the main thread, but do not use this guard + * from worker threads. + * + * Usage in tests: * @code - * bool forceOoc = GENERATE(false, true); + * bool forceOoc = static_cast(GENERATE(from_range(nx::core::k_ForceOocTestValues))); * const nx::core::ForceOocAlgorithmGuard guard(forceOoc); - * // ... test body runs with both algorithm paths ... * @endcode */ class ForceOocAlgorithmGuard @@ -77,11 +129,13 @@ class ForceOocAlgorithmGuard ForceOocAlgorithmGuard(bool force) : m_Original(ForceOocAlgorithm()) { + // Override the global flag for the duration of this guard's lifetime ForceOocAlgorithm() = force; } ~ForceOocAlgorithmGuard() { + // Restore the original value so subsequent tests start with a clean state ForceOocAlgorithm() = m_Original; } @@ -91,7 +145,74 @@ class ForceOocAlgorithmGuard ForceOocAlgorithmGuard& operator=(ForceOocAlgorithmGuard&&) = delete; private: - bool m_Original; + bool m_Original = false; +}; + +/** + * @brief Returns a reference to the global flag that forces DispatchAlgorithm + * to always select the in-core algorithm, overriding storage-type detection. + * + * This is primarily used in unit tests to exercise the in-core algorithm path + * even when data is stored out-of-core (e.g., loaded from HDF5 in an OOC build). + * The flag is backed by a function-local static, so it persists for the lifetime + * of the process. + * + * ForceInCoreAlgorithm() takes the highest precedence in DispatchAlgorithm: + * when set to true, neither AnyOutOfCore() nor ForceOocAlgorithm() can + * override it. This allows tests to verify in-core correctness even when + * running in an OOC-enabled build where arrays may be loaded as chunked stores. + * + * @warning Not thread-safe. See ForceOocAlgorithm() for details. + * + * @return Reference to the static force flag + */ +inline bool& ForceInCoreAlgorithm() +{ + static bool s_force = false; + return s_force; +} + +/** + * @brief RAII guard that unconditionally sets ForceInCoreAlgorithm() to true + * on construction and restores the previous value on destruction. + * + * Unlike ForceOocAlgorithmGuard, this guard always forces in-core mode and + * does not accept a boolean parameter. This is intentional: forcing in-core + * is an override that should only be applied deliberately in tests that need + * to verify in-core behavior in an OOC-enabled build. + * + * Copy and move operations are deleted to prevent accidental double-restore. + * + * @warning Not thread-safe. See ForceOocAlgorithmGuard for details. + * + * Usage in tests: + * @code + * const nx::core::ForceInCoreAlgorithmGuard guard; + * @endcode + */ +class ForceInCoreAlgorithmGuard +{ +public: + ForceInCoreAlgorithmGuard() + : m_Original(ForceInCoreAlgorithm()) + { + // Unconditionally force in-core dispatch for the guard's lifetime + ForceInCoreAlgorithm() = true; + } + + ~ForceInCoreAlgorithmGuard() + { + // Restore the original value so subsequent tests start with a clean state + ForceInCoreAlgorithm() = m_Original; + } + + ForceInCoreAlgorithmGuard(const ForceInCoreAlgorithmGuard&) = delete; + ForceInCoreAlgorithmGuard(ForceInCoreAlgorithmGuard&&) = delete; + ForceInCoreAlgorithmGuard& operator=(const ForceInCoreAlgorithmGuard&) = delete; + ForceInCoreAlgorithmGuard& operator=(ForceInCoreAlgorithmGuard&&) = delete; + +private: + bool m_Original = false; }; /** @@ -105,12 +226,11 @@ class ForceOocAlgorithmGuard * a different algorithm (e.g. scanline CCL with sequential chunk access) can be * orders of magnitude faster for OOC data. * - * This function checks the storage type of the given arrays and the global force - * flag. If *any* array is out-of-core or ForceOocAlgorithm() is true, the OOC - * algorithm is selected. Callers should pass all input and output arrays the - * filter operates on. Both algorithm classes must: - * - Be constructible from the same ArgsT... parameter pack - * - Provide operator()() returning Result<> + * Selection logic (evaluated in order): + * 1. ForceInCoreAlgorithm() == true -> always use InCoreAlgo + * 2. AnyOutOfCore(arrays) == true -> use OocAlgo + * 3. ForceOocAlgorithm() == true -> use OocAlgo + * 4. Otherwise -> use InCoreAlgo * * @tparam InCoreAlgo Algorithm class optimized for in-memory data * @tparam OocAlgo Algorithm class optimized for out-of-core (chunked) data @@ -122,12 +242,19 @@ class ForceOocAlgorithmGuard template Result<> DispatchAlgorithm(std::initializer_list arrays, ArgsT&&... args) { - if(AnyOutOfCore(arrays) || ForceOocAlgorithm()) + // Selection priority (highest to lowest): + // 1. ForceInCoreAlgorithm == true -> InCoreAlgo (test override, wins over everything) + // 2. AnyOutOfCore(arrays) == true -> OocAlgo (real OOC data detected at runtime) + // 3. ForceOocAlgorithm == true -> OocAlgo (test override for exercising OOC path) + // 4. Default -> InCoreAlgo (all data is in-memory) + if(!ForceInCoreAlgorithm() && (AnyOutOfCore(arrays) || ForceOocAlgorithm())) { + // Construct the OOC algorithm with the forwarded args and invoke operator()() return OocAlgo(std::forward(args)...)(); } else { + // Construct the in-core algorithm with the forwarded args and invoke operator()() return InCoreAlgo(std::forward(args)...)(); } } diff --git a/src/simplnx/Utilities/ArrayCreationUtilities.cpp b/src/simplnx/Utilities/ArrayCreationUtilities.cpp index ce74100bbc..10d929b4f3 100644 --- a/src/simplnx/Utilities/ArrayCreationUtilities.cpp +++ b/src/simplnx/Utilities/ArrayCreationUtilities.cpp @@ -1,37 +1,13 @@ #include "ArrayCreationUtilities.hpp" -#include "simplnx/Core/Application.hpp" #include "simplnx/Utilities/MemoryUtilities.hpp" using namespace nx::core; //----------------------------------------------------------------------------- -bool ArrayCreationUtilities::CheckMemoryRequirement(DataStructure& dataStructure, uint64 requiredMemory, std::string& format) +bool ArrayCreationUtilities::CheckMemoryRequirement(const DataStructure& dataStructure, uint64 requiredMemory) { static const uint64 k_AvailableMemory = Memory::GetTotalMemory(); - - // Only check if format is set to in-memory - if(!format.empty()) - { - return true; - } - - Preferences* preferencesPtr = Application::GetOrCreateInstance()->getPreferences(); - const uint64 memoryUsage = dataStructure.memoryUsage() + requiredMemory; - const uint64 largeDataStructureSize = preferencesPtr->largeDataStructureSize(); - const std::string largeDataFormat = preferencesPtr->largeDataFormat(); - - if(memoryUsage >= largeDataStructureSize) - { - // Check if out-of-core is available / enabled - if(largeDataFormat.empty() && memoryUsage >= k_AvailableMemory) - { - return false; - } - // Use out-of-core - format = largeDataFormat; - } - - return true; + return memoryUsage < k_AvailableMemory; } diff --git a/src/simplnx/Utilities/ArrayCreationUtilities.hpp b/src/simplnx/Utilities/ArrayCreationUtilities.hpp index 0767672056..8c9e7b1f4d 100644 --- a/src/simplnx/Utilities/ArrayCreationUtilities.hpp +++ b/src/simplnx/Utilities/ArrayCreationUtilities.hpp @@ -3,6 +3,7 @@ #include "simplnx/simplnx_export.hpp" #include "simplnx/Common/Result.hpp" +#include "simplnx/Core/Preferences.hpp" #include "simplnx/DataStructure/AttributeMatrix.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataStructure.hpp" @@ -19,9 +20,7 @@ namespace nx::core::ArrayCreationUtilities { -inline static constexpr StringLiteral k_DefaultDataFormat = ""; - -SIMPLNX_EXPORT bool CheckMemoryRequirement(DataStructure& dataStructure, uint64 requiredMemory, std::string& format); +SIMPLNX_EXPORT bool CheckMemoryRequirement(const DataStructure& dataStructure, uint64 requiredMemory); /** * @brief Creates a DataArray with the given properties @@ -34,7 +33,7 @@ SIMPLNX_EXPORT bool CheckMemoryRequirement(DataStructure& dataStructure, uint64 * @return */ template -Result<> CreateArray(DataStructure& dataStructure, const ShapeType& tupleShape, const ShapeType& compShape, const DataPath& path, IDataAction::Mode mode, std::string dataFormat = "", +Result<> CreateArray(DataStructure& dataStructure, const ShapeType& tupleShape, const ShapeType& compShape, const DataPath& path, IDataAction::Mode mode, const std::string& dataFormat = "", std::string fillValue = "") { auto parentPath = path.getParent(); @@ -75,18 +74,61 @@ Result<> CreateArray(DataStructure& dataStructure, const ShapeType& tupleShape, const usize numTuples = std::accumulate(tupleShape.cbegin(), tupleShape.cend(), static_cast(1), std::multiplies<>()); uint64 requiredMemory = numTuples * numComponents * sizeof(T); - if(!CheckMemoryRequirement(dataStructure, requiredMemory, dataFormat)) + + // Resolve the storage format through the registered hook (e.g., SimplnxOoc). + // The format resolver is the single decision point for whether an array uses + // in-core or OOC storage. It is registered by the SimplnxOoc plugin and + // considers parent geometry type, user preferences, and data size. If no + // resolver is registered, all arrays default to in-core. + // + // Arrays under unstructured/poly geometries are always resolved to in-core + // because OOC support for those geometry types has been deferred. See the + // resolver implementation in SimplnxOoc for the full rationale. + std::string resolvedFormat; + if(mode == IDataAction::Mode::Execute) { - uint64 totalMemory = requiredMemory + dataStructure.memoryUsage(); - uint64 availableMemory = Memory::GetTotalMemory(); - return MakeErrorResult(-264, fmt::format("CreateArray: Cannot create DataArray '{}'.\n\tTotal memory required for DataStructure: '{}' Bytes.\n\tTotal reported memory: '{}' Bytes", name, - totalMemory, availableMemory)); + if(!dataFormat.empty()) + { + // User explicitly chose a format via the filter UI — bypass the resolver. + // Both k_InMemoryFormat and any other plugin format name (e.g., "HDF5-OOC") + // pass through unchanged; the DataStore factory in DataIOCollection routes + // k_InMemoryFormat to the built-in core manager directly. + resolvedFormat = dataFormat; + } + else + { + // No per-filter override — ask the resolver (which consults user preferences, + // size thresholds, and geometry type). The resolver returns either "" for + // "default in-memory" or a plugin format name like "HDF5-OOC". + resolvedFormat = DataStoreUtilities::GetIOCollection().resolveFormat(dataStructure, path, GetDataType(), requiredMemory); + } + + // Only check RAM availability for in-core arrays. OOC arrays go to disk + // and do not consume RAM for their primary storage. "In-core" means either + // the empty/unset sentinel (resolver defaulted) or the explicit k_InMemoryFormat + // constant (user forced in-memory). + const bool isInCore = resolvedFormat.empty() || resolvedFormat == Preferences::k_InMemoryFormat.str(); + if(isInCore && !CheckMemoryRequirement(dataStructure, requiredMemory)) + { + uint64 totalMemory = requiredMemory + dataStructure.memoryUsage(); + uint64 availableMemory = Memory::GetTotalMemory(); + return MakeErrorResult(-264, fmt::format("Cannot create array '{}': the DataStructure would require {} bytes total, " + "but only {} bytes of RAM are available. Consider enabling out-of-core " + "storage or lowering the size thresholds in Preferences so that large " + "arrays are stored on disk instead of in memory.", + path.toString(), totalMemory, availableMemory)); + } } - auto store = DataStoreUtilities::CreateDataStore(tupleShape, compShape, mode, dataFormat); + auto store = DataStoreUtilities::CreateDataStore(tupleShape, compShape, mode, resolvedFormat); if(nullptr == store) { - return MakeErrorResult(-265, fmt::format("CreateArray: Unable to create DataStore at '{}' of DataStore format '{}'", path.toString(), dataFormat)); + // No registered IO manager could produce a DataStore for this format. + // Include the full manager capability list so the user can tell whether + // the format is a typo, whether the required plugin is missing, or whether + // the format simply does not support this store type. + return MakeErrorResult(-265, fmt::format("CreateArray: Unable to create DataStore at '{}' of DataStore format '{}'.\n{}", path.toString(), resolvedFormat, + DataStoreUtilities::GetIOCollection().generateManagerListString())); } if(!fillValue.empty()) { diff --git a/src/simplnx/Utilities/DataStoreUtilities.cpp b/src/simplnx/Utilities/DataStoreUtilities.cpp index 55edfeb3d5..8a10dfc1b6 100644 --- a/src/simplnx/Utilities/DataStoreUtilities.cpp +++ b/src/simplnx/Utilities/DataStoreUtilities.cpp @@ -5,17 +5,7 @@ using namespace nx::core; //----------------------------------------------------------------------------- -void DataStoreUtilities::TryForceLargeDataFormatFromPrefs(std::string& dataFormat) -{ - auto* preferencesPtr = Application::GetOrCreateInstance()->getPreferences(); - if(preferencesPtr->forceOocData()) - { - dataFormat = preferencesPtr->largeDataFormat(); - } -} - -//----------------------------------------------------------------------------- -std::shared_ptr DataStoreUtilities::GetIOCollection() +DataIOCollection& DataStoreUtilities::GetIOCollection() { return Application::GetOrCreateInstance()->getIOCollection(); } diff --git a/src/simplnx/Utilities/DataStoreUtilities.hpp b/src/simplnx/Utilities/DataStoreUtilities.hpp index aa6ae83f90..5a4014c4a8 100644 --- a/src/simplnx/Utilities/DataStoreUtilities.hpp +++ b/src/simplnx/Utilities/DataStoreUtilities.hpp @@ -13,17 +13,16 @@ namespace nx::core::DataStoreUtilities { /** - * @brief Sets the dataFormat string to the large data format from the prefs - * if forceOocData is true. - * @param dataFormat + * @brief Returns a non-owning reference to the application's DataIOCollection. + * + * The DataIOCollection is owned by the Application singleton and lives for the + * entire process lifetime. Callers receive a reference, not a shared_ptr, to + * make the non-ownership relationship explicit and prevent accidental lifetime + * extension. + * + * @return Reference to the Application's DataIOCollection. */ -SIMPLNX_EXPORT void TryForceLargeDataFormatFromPrefs(std::string& dataFormat); - -/** - * @brief Returns the application's DataIOCollection. - * @return - */ -SIMPLNX_EXPORT std::shared_ptr GetIOCollection(); +SIMPLNX_EXPORT DataIOCollection& GetIOCollection(); template uint64 CalculateDataSize(const ShapeType& tupleShape, const ShapeType& componentShape) @@ -34,15 +33,30 @@ uint64 CalculateDataSize(const ShapeType& tupleShape, const ShapeType& component } /** - * @brief Creates a DataStore with the given properties - * @tparam T Primitive Type (int, float, ...) - * @param tupleShape The Tuple Dimensions - * @param componentShape The component dimensions - * @param mode The mode to assume: PREFLIGHT or EXECUTE. Preflight will NOT allocate any storage. EXECUTE will allocate the memory/storage - * @return + * @brief Simple factory that creates a DataStore with the given properties. + * + * This function does NOT resolve the storage format. The caller is responsible + * for determining the correct format (e.g., by calling the format resolver in + * CreateArray) and passing it in via the dataFormat parameter. + * + * In Preflight mode, returns an EmptyDataStore that records shape metadata + * without allocating any storage. In Execute mode, forwards directly to + * createDataStoreWithType() which creates either an in-memory DataStore + * (for "" unset or k_InMemoryFormat explicit) or an OOC-backed store + * (for "HDF5-OOC" etc.). + * + * @tparam T Primitive type (int8, float32, uint64, etc.) + * @param tupleShape The tuple dimensions (e.g., {100, 200, 300} for a 3D volume) + * @param componentShape The component dimensions (e.g., {3} for a 3-component vector) + * @param mode PREFLIGHT returns an EmptyDataStore; EXECUTE allocates real storage + * @param dataFormat The already-resolved format name. An empty string means + * "unset/auto — default to in-memory". k_InMemoryFormat means + * "explicit in-memory". Any other non-empty value must be a + * plugin-registered format name (e.g., "HDF5-OOC"). + * @return Shared pointer to the created AbstractDataStore */ template -std::shared_ptr> CreateDataStore(const ShapeType& tupleShape, const ShapeType& componentShape, IDataAction::Mode mode, std::string dataFormat = "") +std::shared_ptr> CreateDataStore(const ShapeType& tupleShape, const ShapeType& componentShape, IDataAction::Mode mode, const std::string& dataFormat = "") { switch(mode) { @@ -50,11 +64,7 @@ std::shared_ptr> CreateDataStore(const ShapeType& tupleShap return std::make_unique>(tupleShape, componentShape, dataFormat); } case IDataAction::Mode::Execute: { - uint64 dataSize = CalculateDataSize(tupleShape, componentShape); - TryForceLargeDataFormatFromPrefs(dataFormat); - auto ioCollection = GetIOCollection(); - ioCollection->checkStoreDataFormat(dataSize, dataFormat); - return ioCollection->createDataStoreWithType(dataFormat, tupleShape, componentShape); + return GetIOCollection().createDataStoreWithType(dataFormat, tupleShape, componentShape); } default: { throw std::runtime_error("Invalid mode"); @@ -62,20 +72,39 @@ std::shared_ptr> CreateDataStore(const ShapeType& tupleShap } } +/** + * @brief Simple factory that creates a ListStore with the given properties. + * + * This function does NOT resolve the storage format. The caller is responsible + * for determining the correct format (e.g., by calling the format resolver in + * CreateNeighborListAction) and passing it in via the dataFormat parameter. + * + * In Preflight mode, returns an EmptyListStore that records shape metadata + * without allocating any storage. In Execute mode, forwards directly to + * createListStoreWithType() which creates either an in-memory ListStore + * (for "" unset or k_InMemoryFormat explicit) or an OOC-backed store + * (for "HDF5-OOC" etc.). + * + * @tparam T Primitive type of the list elements + * @param tupleShape The tuple dimensions + * @param mode PREFLIGHT returns an EmptyListStore; EXECUTE allocates real storage + * @param dataFormat The already-resolved format name. An empty string means + * "unset/auto — default to in-memory". k_InMemoryFormat means + * "explicit in-memory". Any other non-empty value must be a + * plugin-registered format name (e.g., "HDF5-OOC"). + * @return Shared pointer to the created AbstractListStore + */ template -std::shared_ptr> CreateListStore(const ShapeType& tupleShape, IDataAction::Mode mode = IDataAction::Mode::Execute, std::string dataFormat = "") +std::shared_ptr> CreateListStore(const ShapeType& tupleShape, IDataAction::Mode mode = IDataAction::Mode::Execute, const std::string& dataFormat = "") { switch(mode) { case IDataAction::Mode::Preflight: { + // Preflight: no storage allocated, just record the tuple shape return std::make_unique>(tupleShape); } case IDataAction::Mode::Execute: { - uint64 dataSize = CalculateDataSize(tupleShape, {10}); - TryForceLargeDataFormatFromPrefs(dataFormat); - auto ioCollection = GetIOCollection(); - ioCollection->checkStoreDataFormat(dataSize, dataFormat); - return ioCollection->createListStoreWithType(dataFormat, tupleShape); + return GetIOCollection().createListStoreWithType(dataFormat, tupleShape); } default: { throw std::runtime_error("Invalid mode"); @@ -91,8 +120,7 @@ std::shared_ptr> ConvertDataStore(const AbstractDataStore> newStore = ioCollection->createDataStoreWithType(dataFormat, dataStore.getTupleShape(), dataStore.getComponentShape()); + std::shared_ptr> newStore = GetIOCollection().createDataStoreWithType(dataFormat, dataStore.getTupleShape(), dataStore.getComponentShape()); if(newStore == nullptr) { return nullptr; diff --git a/src/simplnx/Utilities/IParallelAlgorithm.cpp b/src/simplnx/Utilities/IParallelAlgorithm.cpp index dc04752a06..6b6b22a7f2 100644 --- a/src/simplnx/Utilities/IParallelAlgorithm.cpp +++ b/src/simplnx/Utilities/IParallelAlgorithm.cpp @@ -1,6 +1,6 @@ #include "IParallelAlgorithm.hpp" -#include "simplnx/Core/Application.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" namespace { @@ -19,7 +19,7 @@ bool CheckStoresInMemory(const nx::core::IParallelAlgorithm::AlgorithmStores& st continue; } - if(!storePtr->getDataFormat().empty()) + if(storePtr->getStoreType() == nx::core::IDataStore::StoreType::OutOfCore) { return false; } @@ -43,7 +43,7 @@ bool CheckArraysInMemory(const nx::core::IParallelAlgorithm::AlgorithmArrays& ar continue; } - if(!arrayPtr->getIDataStoreRef().getDataFormat().empty()) + if(arrayPtr->getIDataStoreRef().getStoreType() == nx::core::IDataStore::StoreType::OutOfCore) { return false; } @@ -58,10 +58,11 @@ namespace nx::core // ----------------------------------------------------------------------------- IParallelAlgorithm::IParallelAlgorithm() { -#ifdef SIMPLNX_ENABLE_MULTICORE - // Do not run OOC data in parallel by default. - m_RunParallel = !Application::GetOrCreateInstance()->getPreferences()->useOocData(); -#endif + // m_RunParallel defaults to true (ifdef SIMPLNX_ENABLE_MULTICORE) or false. + // Individual filters disable via requireArraysInMemory()/requireStoresInMemory() + // if they genuinely need in-memory data (e.g., ITK filters). + // OOC stores are now thread-safe (ChunkCache + HDF5 global mutex), so + // TBB parallelism is safe on OOC data. } // ----------------------------------------------------------------------------- diff --git a/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.cpp b/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.cpp index 9a79726a53..ece5cc6ae8 100644 --- a/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.cpp +++ b/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.cpp @@ -2,6 +2,7 @@ #include "simplnx/Common/Aliases.hpp" #include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/BaseGroup.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/DataGroup.hpp" #include "simplnx/DataStructure/DataStore.hpp" @@ -15,6 +16,7 @@ #include "simplnx/DataStructure/Geometry/TetrahedralGeom.hpp" #include "simplnx/DataStructure/Geometry/TriangleGeom.hpp" #include "simplnx/DataStructure/Geometry/VertexGeom.hpp" +#include "simplnx/DataStructure/IO/Generic/DataIOCollection.hpp" #include "simplnx/DataStructure/IO/HDF5/DataStructureReader.hpp" #include "simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp" #include "simplnx/DataStructure/IO/HDF5/IDataStoreIO.hpp" @@ -23,6 +25,7 @@ #include "simplnx/DataStructure/StringArray.hpp" #include "simplnx/DataStructure/StringStore.hpp" #include "simplnx/Pipeline/Pipeline.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" #include @@ -35,6 +38,7 @@ #include using namespace nx::core; +namespace fs = std::filesystem; namespace { @@ -746,14 +750,14 @@ void WriteXdmf(std::ostream& out, const DataStructure& dataStructure, std::strin } } // namespace -void DREAM3D::WriteXdmf(const std::filesystem::path& filePath, const DataStructure& dataStructure, std::string_view hdf5FilePath) +void DREAM3D::WriteXdmf(const fs::path& filePath, const DataStructure& dataStructure, std::string_view hdf5FilePath) { std::ofstream file(filePath); ::WriteXdmf(file, dataStructure, hdf5FilePath); } -DREAM3D::FileVersionType DREAM3D::GetFileVersion(const std::filesystem::path& path) +DREAM3D::FileVersionType DREAM3D::GetFileVersion(const fs::path& path) { auto fileReader = HDF5::FileIO::ReadFile(path); return GetFileVersion(fileReader); @@ -1094,21 +1098,53 @@ Result readLegacyNodeConnectivityList(DataStructure& dataStructure return ConvertResultTo(std::move(voidResult), std::move(value)); } +/** + * @brief Creates a NeighborList from a legacy DREAM3D (SIMPL) HDF5 dataset. + * + * The preflight parameter is forwarded to ReadHdf5Data so that during preflight + * the function creates the NeighborList with an empty store (just shape metadata) + * rather than reading the full variable-length data from disk. This avoids + * materializing potentially large NeighborList data during pipeline validation, + * which would be wasteful and could exhaust memory in OOC workflows. + * + * @tparam T Element type of the NeighborList + * @param dataStructure Target DataStructure to insert the NeighborList into + * @param parentId Parent object ID for the new NeighborList + * @param parentReader HDF5 group reader for the parent AttributeMatrix + * @param datasetReader HDF5 dataset reader for the NeighborList dataset + * @param tupleDims Tuple dimensions read from the legacy "TupleDimensions" attribute + * @param preflight When true, creates an empty store without reading data + * @return Result<> indicating success or failure + */ template Result<> createLegacyNeighborList(DataStructure& dataStructure, DataObject ::IdType parentId, const nx::core::HDF5::GroupIO& parentReader, const nx::core::HDF5::DatasetIO& datasetReader, - const ShapeType& tupleDims) + const ShapeType& tupleDims, bool preflight = false) { - auto listStore = HDF5::NeighborListIO::ReadHdf5Data(parentReader, datasetReader); + // Read the NeighborList data from HDF5. In preflight mode, this returns + // an empty store with the correct tuple count but no actual list data. + std::vector warnings; + auto listStore = HDF5::NeighborListIO::ReadHdf5Data(parentReader, datasetReader, preflight, warnings); + + Result<> result; + result.m_Warnings = std::move(warnings); + + if(listStore == nullptr && !result.m_Warnings.empty()) + { + // Placeholder detected — skip without error, propagate warnings + return result; + } + auto* neighborList = NeighborList::Create(dataStructure, datasetReader.getName(), listStore, parentId); if(neighborList == nullptr) { std::string ss = fmt::format("Failed to create NeighborList: '{}'", datasetReader.getName()); return MakeErrorResult(Legacy::k_FailedCreatingNeighborList_Code, ss); } - return {}; + return result; } -Result<> readLegacyNeighborList(DataStructure& dataStructure, const nx::core::HDF5::GroupIO& parentReader, const nx::core::HDF5::DatasetIO& datasetReader, DataObject::IdType parentId) +Result<> readLegacyNeighborList(DataStructure& dataStructure, const nx::core::HDF5::GroupIO& parentReader, const nx::core::HDF5::DatasetIO& datasetReader, DataObject::IdType parentId, + bool preflight = false) { auto dataTypeResult = datasetReader.getDataType(); if(dataTypeResult.invalid()) @@ -1126,36 +1162,36 @@ Result<> readLegacyNeighborList(DataStructure& dataStructure, const nx::core::HD switch(dataType) { case DataType::float32: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::float64: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::boolean: [[fallthrough]]; case DataType::int8: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::int16: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::int32: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::int64: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::uint8: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::uint16: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::uint32: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; case DataType::uint64: - result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims); + result = createLegacyNeighborList(dataStructure, parentId, parentReader, datasetReader, tDims, preflight); break; } @@ -1171,13 +1207,23 @@ Result<> finishImportingLegacyNeighborListImpl(DataStructure& dataStructure, con return MakeErrorResult(-4210426, fmt::format("Failed to finish importing legacy NeighborList at path '{}'. Imported NeighborList not found.", dataPath.toString())); } - auto listStore = HDF5::NeighborListIO::ReadHdf5Data(parentReader, datasetReader); + std::vector warnings; + auto listStore = HDF5::NeighborListIO::ReadHdf5Data(parentReader, datasetReader, false, warnings); + + Result<> result; + result.m_Warnings = std::move(warnings); + if(listStore == nullptr) { + if(!result.m_Warnings.empty()) + { + // Placeholder detected — skip without error, propagate warnings + return result; + } return MakeErrorResult(-4210427, fmt::format("Failed to finish importing legacy NeighborList at path '{}'. Failed to import HDF5 data.", dataPath.toString())); } existingList->setStore(listStore); - return {}; + return result; } Result<> finishImportingLegacyNeighborList(DataStructure& dataStructure, const nx::core::HDF5::GroupIO& parentReader, const HDF5::DatasetIO& datasetReader, const DataPath& dataPath) @@ -1252,7 +1298,7 @@ Result<> readLegacyArray(DataStructure& dataStructure, const nx::core::HDF5::Gro auto dataArraySet = amGroupReader.openDataset(arrayName); if(isLegacyNeighborList(dataArraySet)) { - return readLegacyNeighborList(dataStructure, amGroupReader, dataArraySet, 0); + return readLegacyNeighborList(dataStructure, amGroupReader, dataArraySet, 0, preflight); } else if(isLegacyStringArray(dataArraySet)) { @@ -1614,7 +1660,7 @@ Result<> readLegacyAttributeMatrix(DataStructure& dataStructure, const nx::core: if(isLegacyNeighborList(dataArraySet)) { - daResults.push_back(readLegacyNeighborList(dataStructure, amGroupReader, dataArraySet, attributeMatrix->getId())); + daResults.push_back(readLegacyNeighborList(dataStructure, amGroupReader, dataArraySet, attributeMatrix->getId(), preflight)); } else if(isLegacyStringArray(dataArraySet)) { @@ -2176,33 +2222,6 @@ Result ImportLegacyDataStructure(const nx::core::HDF5::FileIO& fi return nx::core::ConvertResultTo(std::move(result), std::move(dataStructure)); } -Result DREAM3D::ImportDataStructureFromFile(const nx::core::HDF5::FileIO& fileReader, bool preflight) -{ - const auto fileVersion = GetFileVersion(fileReader); - if(fileVersion == k_CurrentFileVersion) - { - return ImportDataStructureV8(fileReader, preflight); - } - else if(fileVersion == k_LegacyFileVersion) - { - return ImportLegacyDataStructure(fileReader, preflight); - } - // Unsupported file version - return MakeErrorResult(k_InvalidDataStructureVersion, fmt::format("Could not parse DataStructure version {}. Expected versions: {} or {}. Actual value: {}", fileVersion, - k_CurrentFileVersion, k_LegacyFileVersion, fileVersion)); -} - -Result DREAM3D::ImportDataStructureFromFile(const std::filesystem::path& filePath, bool preflight) -{ - auto fileReader = nx::core::HDF5::FileIO::ReadFile(filePath); - if(!fileReader.isValid()) - { - return MakeErrorResult(-1, fmt::format("DREAM3D::ImportDataStructureFromFile: Unable to open '{}' for reading", filePath.string())); - } - - return ImportDataStructureFromFile(fileReader, preflight); -} - Result DREAM3D::ImportPipelineFromFile(const nx::core::HDF5::FileIO& fileReader) { Result pipelineJson = ImportPipelineJsonFromFile(fileReader); @@ -2239,9 +2258,9 @@ Result DREAM3D::ImportPipelineJsonFromFile(const nx::core::HDF5: return {nlohmann::json::parse(pipelineJsonString)}; } -Result DREAM3D::ImportPipelineFromFile(const std::filesystem::path& filePath) +Result DREAM3D::ImportPipelineFromFile(const fs::path& filePath) { - if(!std::filesystem::exists(filePath)) + if(!fs::exists(filePath)) { return MakeErrorResult(-1, fmt::format("DREAM3D::ImportPipelineFromFile: File does not exist. '{}'", filePath.string())); } @@ -2254,9 +2273,9 @@ Result DREAM3D::ImportPipelineFromFile(const std::filesystem::path& fi return ImportPipelineFromFile(fileReader); } -Result DREAM3D::ImportPipelineJsonFromFile(const std::filesystem::path& filePath) +Result DREAM3D::ImportPipelineJsonFromFile(const fs::path& filePath) { - if(!std::filesystem::exists(filePath)) + if(!fs::exists(filePath)) { return MakeErrorResult(-1, fmt::format("DREAM3D::ImportPipelineFromFile: File does not exist. '{}'", filePath.string())); } @@ -2269,117 +2288,6 @@ Result DREAM3D::ImportPipelineJsonFromFile(const std::filesystem return ImportPipelineJsonFromFile(fileReader); } -Result> DREAM3D::ImportDataObjectFromFile(const nx::core::HDF5::FileIO& fileReader, const DataPath& dataPath) -{ - const auto fileVersion = GetFileVersion(fileReader); - if(fileVersion == k_CurrentFileVersion) - { - return HDF5::DataStructureReader::ReadObject(fileReader, dataPath); - } - else if(fileVersion == k_LegacyFileVersion) - { - auto result = ImportLegacyDataObjectFromFile(fileReader, dataPath); - if(result.invalid()) - { - return ConvertInvalidResult>(std::move(result)); - } - std::vector> value = result.value(); - if(value.size() != 0) - { - return MakeErrorResult>(-48264, fmt::format("Error extracting a single DataObject from legacy DREAM3D file at path '{}'", dataPath.toString())); - } - return {result.value().front()}; - } - return MakeErrorResult>(-523242, fmt::format("Error extracting a single DataObject from legacy DREAM3D file at path '{}'", dataPath.toString())); -} - -Result>> DREAM3D::ImportSelectDataObjectsFromFile(const nx::core::HDF5::FileIO& fileReader, const std::vector& dataPaths) -{ - std::vector> dataObjects; - for(const DataPath& dataPath : dataPaths) - { - auto importResult = ImportDataObjectFromFile(fileReader, dataPath); - if(importResult.invalid()) - { - return ConvertInvalidResult>>(std::move(importResult)); - } - dataObjects.push_back(std::move(importResult.value())); - } - - return {dataObjects}; -} - -Result<> DREAM3D::FinishImportingObject(DataStructure& importStructure, DataStructure& dataStructure, const DataPath& dataPath, const nx::core::HDF5::FileIO& fileReader, bool preflight) -{ - if(!importStructure.containsData(dataPath)) - { - return MakeErrorResult(-6200, fmt::format("DataStructure Object Path '{}' does not exist for importing.", dataPath.toString())); - } - const auto importObject = importStructure.getSharedData(dataPath); - const auto importData = std::shared_ptr(importObject->shallowCopy()); - // Clear all children before inserting into the DataStructure - if(const auto importGroup = std::dynamic_pointer_cast(importData); importGroup != nullptr) - { - importGroup->clear(); - } - - if(!dataStructure.insert(importData, dataPath.getParent())) - { - return MakeErrorResult(-6202, fmt::format("Unable to insert DataObject at DatPath '{}' into the DataStructure", dataPath.toString())); - } - if(!preflight) - { - const auto dataPtr = dataStructure.getSharedData(dataPath); - if(dataPtr == nullptr) - { - return MakeErrorResult(-1502234, fmt::format("Cannot finish importing HDF5 data at DataPath '{}'. DataObject does not exist to copy data into.", dataPath.toString())); - } - - const auto fileVersion = GetFileVersion(fileReader); - if(fileVersion == k_CurrentFileVersion) - { - return HDF5::DataStructureReader::FinishImportingObject(dataStructure, fileReader, dataPath); - } - else if(fileVersion == k_LegacyFileVersion) - { - const auto dataStructureReader = fileReader.openGroup(k_LegacyDataStructureGroupTag); - return FinishImportingLegacyDataObject(dataStructure, dataStructureReader, dataPath); - } - } - return {}; -} - -Result DREAM3D::ReadFile(const nx::core::HDF5::FileIO& fileReader, bool preflight) -{ - // Pipeline pipeline; - auto pipeline = ImportPipelineFromFile(fileReader); - if(pipeline.invalid()) - { - return {{nonstd::make_unexpected(std::move(pipeline.errors()))}, std::move(pipeline.warnings())}; - } - - auto dataStructure = ImportDataStructureFromFile(fileReader, preflight); - if(pipeline.invalid()) - { - return {{nonstd::make_unexpected(std::move(dataStructure.errors()))}, std::move(dataStructure.warnings())}; - } - - return {DREAM3D::FileData{std::move(pipeline.value()), std::move(dataStructure.value())}}; -} - -Result DREAM3D::ReadFile(const std::filesystem::path& path) -{ - auto reader = nx::core::HDF5::FileIO::ReadFile(path); - nx::core::HDF5::ErrorType error = 0; - - Result fileData = ReadFile(reader, error); - if(error < 0) - { - return MakeErrorResult(-1, fmt::format("DREAM3D::ReadFile: Unable to read '{}'", path.string())); - } - return fileData; -} - Result<> WritePipeline(nx::core::HDF5::FileIO& fileWriter, const Pipeline& pipeline) { if(!fileWriter.isValid()) @@ -2433,7 +2341,7 @@ Result<> DREAM3D::WriteFile(nx::core::HDF5::FileIO& fileWriter, const Pipeline& return WriteDataStructure(fileWriter, dataStructure); } -Result<> DREAM3D::WriteFile(const std::filesystem::path& path, const DataStructure& dataStructure, const Pipeline& pipeline, bool writeXdmf) +Result<> DREAM3D::WriteFile(const fs::path& path, const DataStructure& dataStructure, const Pipeline& pipeline, bool writeXdmf) { auto fileWriter = nx::core::HDF5::FileIO::WriteFile(path); if(!fileWriter.isValid()) @@ -2449,14 +2357,33 @@ Result<> DREAM3D::WriteFile(const std::filesystem::path& path, const DataStructu if(writeXdmf) { - std::filesystem::path xdmfFilePath = std::filesystem::path(path).replace_extension(".xdmf"); + fs::path xdmfFilePath = fs::path(path).replace_extension(".xdmf"); WriteXdmf(xdmfFilePath, dataStructure, path.filename().string()); } return {}; } -Result<> DREAM3D::AppendFile(const std::filesystem::path& path, const DataStructure& dataStructure, const DataPath& dataPath) +Result<> DREAM3D::WriteRecoveryFile(const fs::path& path, const DataStructure& dataStructure, const Pipeline& pipeline) +{ + // Obtain the global DataIOCollection so we can activate the write-array-override. + // The SimplnxOoc plugin registers a callback on this collection at startup that + // knows how to write OOC array placeholders instead of full data. + auto& ioCollection = DataStoreUtilities::GetIOCollection(); + + // The RAII guard sets the override active on construction. While active, + // HDF5::DataStructureWriter will check each DataArray against the override + // callback before writing. The guard deactivates the override on destruction, + // ensuring it does not leak into subsequent normal WriteFile calls. + WriteArrayOverrideGuard guard(ioCollection); + + // Delegate to the standard WriteFile path. The only difference is that the + // override is now active, so OOC arrays get placeholder writes. XDMF output + // is disabled (false) for recovery files since they are transient. + return WriteFile(path, dataStructure, pipeline, false); +} + +Result<> DREAM3D::AppendFile(const fs::path& path, const DataStructure& dataStructure, const DataPath& dataPath) { auto file = nx::core::HDF5::FileIO::AppendFile(path); if(!file.isValid()) @@ -2520,3 +2447,266 @@ std::vector DREAM3D::ExpandSelectedPathsToDescendants(const return expandedDataPaths; } + +namespace +{ +// --------------------------------------------------------------------------- +// Internal helpers for the new LoadDataStructure* public API +// --------------------------------------------------------------------------- + +/** + * @brief Builds a metadata-only (preflight) DataStructure from an open HDF5 file. + * This is a copy of DREAM3D::ImportDataStructureFromFile with preflight hardcoded to true. + */ +Result LoadDataStructureMetadataInternal(const nx::core::HDF5::FileIO& fileReader) +{ + const auto fileVersion = DREAM3D::GetFileVersion(fileReader); + if(fileVersion == DREAM3D::k_CurrentFileVersion) + { + return ImportDataStructureV8(fileReader, true); + } + else if(fileVersion == DREAM3D::k_LegacyFileVersion) + { + return ImportLegacyDataStructure(fileReader, true); + } + // Unsupported file version + return MakeErrorResult(DREAM3D::k_InvalidDataStructureVersion, fmt::format("Could not parse DataStructure version {}. Expected versions: {} or {}. Actual value: {}", fileVersion, + DREAM3D::k_CurrentFileVersion, DREAM3D::k_LegacyFileVersion, fileVersion)); +} + +/** + * @brief Loads a single DataObject from HDF5 into the target DataStructure. + * This is a copy of DREAM3D::FinishImportingObject placed in the anonymous namespace. + */ +Result<> LoadDataObjectFromHDF5(DataStructure& importStructure, DataStructure& dataStructure, const DataPath& dataPath, const nx::core::HDF5::FileIO& fileReader, bool preflight) +{ + if(!importStructure.containsData(dataPath)) + { + return MakeErrorResult(-6200, fmt::format("DataStructure Object Path '{}' does not exist for importing.", dataPath.toString())); + } + const auto importObject = importStructure.getSharedData(dataPath); + const auto importData = std::shared_ptr(importObject->shallowCopy()); + // Clear all children before inserting into the DataStructure + if(const auto importGroup = std::dynamic_pointer_cast(importData); importGroup != nullptr) + { + importGroup->clear(); + } + + if(!dataStructure.insert(importData, dataPath.getParent())) + { + return MakeErrorResult(-6202, fmt::format("Unable to insert DataObject at DataPath '{}' into the DataStructure", dataPath.toString())); + } + if(!preflight) + { + const auto dataPtr = dataStructure.getSharedData(dataPath); + if(dataPtr == nullptr) + { + return MakeErrorResult(-1502234, fmt::format("Cannot finish importing HDF5 data at DataPath '{}'. DataObject does not exist to copy data into.", dataPath.toString())); + } + + const auto fileVersion = DREAM3D::GetFileVersion(fileReader); + if(fileVersion == DREAM3D::k_CurrentFileVersion) + { + return HDF5::DataStructureReader::FinishImportingObject(dataStructure, fileReader, dataPath); + } + else if(fileVersion == DREAM3D::k_LegacyFileVersion) + { + const auto dataStructureReader = fileReader.openGroup(k_LegacyDataStructureGroupTag); + return FinishImportingLegacyDataObject(dataStructure, dataStructureReader, dataPath); + } + } + return {}; +} + +/** + * @brief Loads data from HDF5 into an already-inserted DataObject. + * + * Unlike LoadDataObjectFromHDF5, this does NOT insert the object — it only + * reads the HDF5 data for an object that is already present in dataStructure. + * Used as the eagerLoad callback when the OOC handler decides an array should + * be loaded in-core (below the size threshold). + */ +Result<> EagerLoadDataFromHDF5(DataStructure& dataStructure, const DataPath& dataPath, const nx::core::HDF5::FileIO& fileReader) +{ + const auto dataPtr = dataStructure.getSharedData(dataPath); + if(dataPtr == nullptr) + { + return MakeErrorResult(-6203, fmt::format("Cannot eager-load HDF5 data at DataPath '{}'. DataObject does not exist in the DataStructure.", dataPath.toString())); + } + + const auto fileVersion = DREAM3D::GetFileVersion(fileReader); + if(fileVersion == DREAM3D::k_CurrentFileVersion) + { + return HDF5::DataStructureReader::FinishImportingObject(dataStructure, fileReader, dataPath); + } + else if(fileVersion == DREAM3D::k_LegacyFileVersion) + { + const auto dataStructureReader = fileReader.openGroup(k_LegacyDataStructureGroupTag); + return FinishImportingLegacyDataObject(dataStructure, dataStructureReader, dataPath); + } + return {}; +} + +/** + * @brief Removes all DataObjects from ds that are not an ancestor of or equal to any path in keepPaths. + */ +void PruneDataStructure(DataStructure& ds, const std::vector& keepPaths) +{ + auto allPaths = ds.getAllDataPaths(); + // Sort longest-first so children are removed before parents + std::sort(allPaths.begin(), allPaths.end(), [](const DataPath& a, const DataPath& b) { return a.getLength() > b.getLength(); }); + + for(const auto& existingPath : allPaths) + { + bool isNeeded = false; + for(const auto& requestedPath : keepPaths) + { + // Keep if it equals a requested path or is an ancestor of one + if(existingPath == requestedPath) + { + isNeeded = true; + break; + } + // Check if existingPath is an ancestor of requestedPath + const auto& existingVec = existingPath.getPathVector(); + const auto& requestedVec = requestedPath.getPathVector(); + if(existingVec.size() < requestedVec.size()) + { + bool isPrefix = true; + for(usize i = 0; i < existingVec.size(); ++i) + { + if(existingVec[i] != requestedVec[i]) + { + isPrefix = false; + break; + } + } + if(isPrefix) + { + isNeeded = true; + break; + } + } + } + if(!isNeeded) + { + ds.removeData(existingPath); + } + } +} + +/** + * @brief Shared logic for LoadDataStructure and LoadDataStructureArrays. + * Builds a metadata skeleton, then either delegates to the registered data store import handler + * (e.g. OOC) or eager-loads everything in-core. + * + * Follows the same pattern as ImportH5ObjectPathsAction::apply: + * 1. Preflight-import to get the metadata skeleton (importStructure) + * 2. Expand paths to include ancestors, sorted shortest-first + * 3. Insert each object via LoadDataObjectFromHDF5 (shallow copy + insert + optional data load) + * 4. If a handler is registered, run it for deferred loading + */ +Result LoadDataStructureWithHandler(const fs::path& filePath, const std::vector& paths) +{ + auto fileReader = nx::core::HDF5::FileIO::ReadFile(filePath); + if(!fileReader.isValid()) + { + return MakeErrorResult(-1, fmt::format("Failed to open .dream3d file '{}'. Check that the file exists and is a valid HDF5 file.", filePath.string())); + } + + // Build placeholder skeleton + auto metadataResult = LoadDataStructureMetadataInternal(fileReader); + if(metadataResult.invalid()) + { + return metadataResult; + } + DataStructure importStructure = std::move(metadataResult.value()); + + // Reopen file for data reading + auto dataFileReader = nx::core::HDF5::FileIO::ReadFile(filePath); + + // Check if a handler is registered (e.g. OOC plugin) + const bool useDeferredLoad = DataStoreUtilities::GetIOCollection().hasDataStoreImportHandler(); + + // Expand to include ancestor containers, sorted shortest-first + auto allPaths = DREAM3D::ExpandSelectedPathsToAncestors(paths); + std::sort(allPaths.begin(), allPaths.end(), [](const DataPath& a, const DataPath& b) { return a.getLength() < b.getLength(); }); + + // Insert each object into the target DataStructure. + // When deferring, pass preflight=true to insert placeholders without loading data. + // When not deferring, pass preflight=false to insert and load data immediately. + DataStructure dataStructure; + for(const auto& objectPath : allPaths) + { + auto result = LoadDataObjectFromHDF5(importStructure, dataStructure, objectPath, dataFileReader, useDeferredLoad); + if(result.invalid()) + { + return ConvertInvalidResult(std::move(result)); + } + } + + // If a handler is registered, let it finalize loading (e.g. attach OOC stores) + std::vector handlerWarnings; + if(useDeferredLoad) + { + auto eagerLoad = [&dataFileReader](DataStructure& ds, const DataPath& path) -> Result<> { return EagerLoadDataFromHDF5(ds, path, dataFileReader); }; + auto handlerResult = DataStoreUtilities::GetIOCollection().runDataStoreImportHandler(dataStructure, paths, dataFileReader, eagerLoad); + if(handlerResult.invalid()) + { + return ConvertInvalidResult(std::move(handlerResult)); + } + handlerWarnings = std::move(handlerResult.warnings()); + } + + Result finalResult{std::move(dataStructure)}; + finalResult.warnings() = std::move(handlerWarnings); + return finalResult; +} +} // namespace + +// --------------------------------------------------------------------------- +// New public LoadDataStructure* API +// --------------------------------------------------------------------------- + +Result DREAM3D::LoadDataStructureMetadata(const fs::path& path) +{ + auto fileReader = nx::core::HDF5::FileIO::ReadFile(path); + if(!fileReader.isValid()) + { + return MakeErrorResult(-1, fmt::format("Failed to open .dream3d file '{}'. Check that the file exists and is a valid HDF5 file.", path.string())); + } + return LoadDataStructureMetadataInternal(fileReader); +} + +Result DREAM3D::LoadDataStructure(const fs::path& path) +{ + auto metadataResult = DREAM3D::LoadDataStructureMetadata(path); + if(metadataResult.invalid()) + { + return metadataResult; + } + std::vector allPaths = metadataResult.value().getAllDataPaths(); + return LoadDataStructureWithHandler(path, allPaths); +} + +Result DREAM3D::LoadDataStructureArrays(const fs::path& path, const std::vector& dataPaths) +{ + auto result = LoadDataStructureWithHandler(path, dataPaths); + if(result.invalid()) + { + return result; + } + PruneDataStructure(result.value(), dataPaths); + return result; +} + +Result DREAM3D::LoadDataStructureArraysMetadata(const fs::path& path, const std::vector& dataPaths) +{ + auto result = DREAM3D::LoadDataStructureMetadata(path); + if(result.invalid()) + { + return result; + } + PruneDataStructure(result.value(), dataPaths); + return result; +} diff --git a/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp b/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp index 7ca661751c..92b70a8052 100644 --- a/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp +++ b/src/simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp @@ -52,27 +52,58 @@ SIMPLNX_EXPORT FileVersionType GetFileVersion(const nx::core::HDF5::FileIO& file SIMPLNX_EXPORT PipelineVersionType GetPipelineVersion(const nx::core::HDF5::FileIO& fileReader); /** - * @brief Imports and returns the Pipeline / DataStructure pair from the target - * .dream3d file. + * @brief Loads a complete DataStructure from a .dream3d file with all arrays + * receiving real data stores (in-core or OOC via the registered import handler). * - * This method imports both current and legacy DataStructures but will return - * an empty Pipeline when given a legacy file. - * @param fileReader - * @param preflight = false - * @return FileData + * Supports both v8.0 and legacy v7.0 file formats. When an OOC import handler + * is registered (via DataIOCollection), it decides whether each array becomes + * an in-core DataStore or a lazy OOC store backed by the HDF5 file. + * + * @param path Filesystem path to the .dream3d file + * @return Result containing the fully loaded DataStructure, or errors on failure */ -SIMPLNX_EXPORT Result ReadFile(const nx::core::HDF5::FileIO& fileReader, bool preflight = false); +SIMPLNX_EXPORT Result LoadDataStructure(const std::filesystem::path& path); /** - * @brief Imports and returns the Pipeline / DataStructure pair from the target - * .dream3d file. + * @brief Loads specific arrays from a .dream3d file with real data stores, + * pruning all unrequested objects from the result. * - * This method imports both current and legacy DataStructures but will return - * an empty Pipeline and warning when given a legacy file. - * @param path - * @return Result + * Only the requested arrays (and their ancestor containers) are present in + * the returned DataStructure. No Empty placeholder stores remain — every + * array in the result has been fully loaded or attached to an OOC store. + * + * @param path Filesystem path to the .dream3d file + * @param dataPaths The specific DataPaths to load from the file + * @return Result containing the pruned DataStructure with only requested arrays + */ +SIMPLNX_EXPORT Result LoadDataStructureArrays(const std::filesystem::path& path, const std::vector& dataPaths); + +/** + * @brief Loads the topology (metadata skeleton) of a .dream3d file without + * loading any array data. All DataArrays receive Empty placeholder stores. + * + * This is the preflight/metadata-only path: the returned DataStructure has + * the complete hierarchy (geometries, attribute matrices, arrays) but none + * of the arrays contain real data. + * + * @param path Filesystem path to the .dream3d file + * @return Result containing the metadata-only DataStructure with Empty stores */ -SIMPLNX_EXPORT Result ReadFile(const std::filesystem::path& path); +SIMPLNX_EXPORT Result LoadDataStructureMetadata(const std::filesystem::path& path); + +/** + * @brief Loads the topology (metadata skeleton) for specific arrays from a + * .dream3d file. All arrays receive Empty placeholder stores, and unrequested + * objects are pruned from the result. + * + * Combines the metadata-only behavior of LoadDataStructureMetadata with the + * path-based pruning of LoadDataStructureArrays. + * + * @param path Filesystem path to the .dream3d file + * @param dataPaths The specific DataPaths whose metadata to load + * @return Result containing the pruned metadata-only DataStructure + */ +SIMPLNX_EXPORT Result LoadDataStructureArraysMetadata(const std::filesystem::path& path, const std::vector& dataPaths); /** * @brief Writes a .dream3d file with the specified data. @@ -99,6 +130,34 @@ SIMPLNX_EXPORT Result<> WriteFile(nx::core::HDF5::FileIO& fileWriter, const Pipe */ SIMPLNX_EXPORT Result<> WriteFile(const std::filesystem::path& path, const DataStructure& dataStructure, const Pipeline& pipeline = {}, bool writeXdmf = false); +/** + * @brief Writes a recovery .dream3d file with optimized handling of OOC arrays. + * + * A recovery file captures the current pipeline state so that execution can + * be resumed after a crash or interruption. For OOC arrays, materializing + * the full data into HDF5 would be extremely expensive (potentially hundreds + * of GB). Instead, this function activates the DataIOCollection's + * write-array-override hook via a WriteArrayOverrideGuard RAII object. + * + * When the override is active, the HDF5 DataStructureWriter checks each + * DataArray against the registered override callback (set by the SimplnxOoc + * plugin). For OOC-backed arrays, the callback writes a lightweight + * placeholder (just the store metadata: file path, chunk layout, shape) + * instead of the full data. For in-core arrays, the callback returns + * std::nullopt, causing the writer to fall through to the standard HDF5 + * write path. + * + * If no write-array-override callback is registered (i.e., the OOC plugin + * is not loaded), the guard is a no-op and the function behaves identically + * to WriteFile. + * + * @param path Output file path for the recovery .dream3d file + * @param dataStructure The DataStructure to write + * @param pipeline The Pipeline to serialize alongside the data (default empty) + * @return Result<> with any errors from the write operation + */ +SIMPLNX_EXPORT Result<> WriteRecoveryFile(const std::filesystem::path& path, const DataStructure& dataStructure, const Pipeline& pipeline = {}); + /** * @brief Appends the object at the path in the data structure to the dream3d file * @param path @@ -108,30 +167,6 @@ SIMPLNX_EXPORT Result<> WriteFile(const std::filesystem::path& path, const DataS */ SIMPLNX_EXPORT Result<> AppendFile(const std::filesystem::path& path, const DataStructure& dataStructure, const DataPath& dataPath); -/** - * @brief Imports and returns the DataStructure from the target .dream3d file. - * - * This method imports both current and legacy DataStructures. - * @param fileReader - * @param preflight = false - * @return DataStructure - */ -SIMPLNX_EXPORT Result ImportDataStructureFromFile(const nx::core::HDF5::FileIO& fileReader, bool preflight); - -SIMPLNX_EXPORT Result> ImportDataObjectFromFile(const nx::core::HDF5::FileIO& fileReader, const DataPath& dataPath); - -SIMPLNX_EXPORT Result>> ImportSelectDataObjectsFromFile(const nx::core::HDF5::FileIO& fileReader, const std::vector& dataPaths); - -SIMPLNX_EXPORT Result<> FinishImportingObject(DataStructure& importStructure, DataStructure& dataStructure, const DataPath& dataPath, const nx::core::HDF5::FileIO& fileReader, bool preflight); - -/** - * @brief Imports and returns the DataStructure from the target .dream3d file. - * This method imports both current and legacy DataStructures. - * @param filePath - * @return DataStructure - */ -SIMPLNX_EXPORT Result ImportDataStructureFromFile(const std::filesystem::path& filePath, bool preflight); - /** * @brief Imports and returns a Pipeline from the target .dream3d file. * diff --git a/src/simplnx/Utilities/Parsing/HDF5/H5DataStore.hpp b/src/simplnx/Utilities/Parsing/HDF5/H5DataStore.hpp index 1366a2188e..dbf9617c7e 100644 --- a/src/simplnx/Utilities/Parsing/HDF5/H5DataStore.hpp +++ b/src/simplnx/Utilities/Parsing/HDF5/H5DataStore.hpp @@ -2,6 +2,8 @@ #include "H5Support.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" + namespace nx::core::HDF5 { namespace Support @@ -80,7 +82,7 @@ Result<> FillDataArray(DataStructure& dataStructure, const DataPath& dataArrayPa const std::optional>& count = std::nullopt) { auto& dataArray = dataStructure.getDataRefAs>(dataArrayPath); - if(dataArray.getDataFormat().empty()) + if(dataArray.getIDataStoreRef().getStoreType() != IDataStore::StoreType::OutOfCore) { return FillDataStore(dataArray, dataArrayPath, datasetReader, start, count); } diff --git a/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.cpp b/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.cpp index 988519b9fe..49c09d265d 100644 --- a/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.cpp +++ b/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.cpp @@ -961,6 +961,133 @@ Result<> DatasetIO::writeSpan(const DimsType& dims, nonstd::span +Result<> DatasetIO::createEmptyDataset(const DimsType& dims) +{ + // Resolve the HDF5 native type ID for the template parameter. + hid_t dataType = HdfTypeForPrimitive(); + if(dataType == -1) + { + return MakeErrorResult(-1020, "createEmptyDataset error: Unsupported data type."); + } + + // Convert the DimsType vector to HDF5's hsize_t vector and create a + // simple N-D dataspace matching the full array dimensions. + std::vector hDims(dims.size()); + std::transform(dims.begin(), dims.end(), hDims.begin(), [](DimsType::value_type x) { return static_cast(x); }); + hid_t dataspaceId = H5Screate_simple(static_cast(hDims.size()), hDims.data(), nullptr); + if(dataspaceId < 0) + { + return MakeErrorResult(-1021, "createEmptyDataset error: Unable to create dataspace."); + } + + // Create (or reopen) the dataset. The dataset is left empty; data will + // be written later via writeSpanHyperslab(). + auto datasetId = createOrOpenDataset(dataspaceId); + H5Sclose(dataspaceId); + if(datasetId < 0) + { + return MakeErrorResult(-1022, "createEmptyDataset error: Unable to create dataset."); + } + + return {}; +} + +// ----------------------------------------------------------------------------- +// writeSpanHyperslab +// ----------------------------------------------------------------------------- +// Writes a contiguous buffer of values into a rectangular sub-region (hyperslab) +// of an existing HDF5 dataset. The dataset must already exist on disk, created +// either by createEmptyDataset() or writeSpan(). +// +// This is the second step of the two-step OOC write pattern. An OOC store +// iterates over its backing file chunk-by-chunk, reads each chunk into a +// temporary buffer, and calls this method to write that buffer into the +// corresponding region of the output dataset. The pattern avoids ever +// materializing the entire array in memory. +// +// The method works by: +// 1. Opening the dataset's file dataspace +// 2. Selecting a hyperslab defined by start[] and count[] +// 3. Creating a compact memory dataspace matching count[] +// 4. Writing from the caller's span into the selected hyperslab +// ----------------------------------------------------------------------------- +template +Result<> DatasetIO::writeSpanHyperslab(nonstd::span values, const std::vector& start, const std::vector& count) +{ + if(!isValid()) + { + return MakeErrorResult(-506, fmt::format("Cannot open HDF5 data at {} / {}", getFilePath().string(), getNamePath())); + } + + // Resolve the HDF5 native type for T. + hid_t dataType = HdfTypeForPrimitive(); + if(dataType == -1) + { + return MakeErrorResult(-1010, "writeSpanHyperslab error: Unsupported data type."); + } + + // Open the existing dataset and retrieve its file-side dataspace. + hid_t datasetId = open(); + hid_t fileSpaceId = H5Dget_space(datasetId); + if(fileSpaceId < 0) + { + return MakeErrorResult(-1011, "writeSpanHyperslab error: Unable to open the dataspace."); + } + + // Select the hyperslab region [start, start+count) in the file dataspace. + // On macOS, hsize_t (unsigned long long) differs from uint64 (unsigned long), + // so we must copy into vectors of the correct type to avoid mismatched + // pointer casts. +#if defined(__APPLE__) + std::vector startVec(start.begin(), start.end()); + std::vector countVec(count.begin(), count.end()); + if(H5Sselect_hyperslab(fileSpaceId, H5S_SELECT_SET, startVec.data(), NULL, countVec.data(), NULL) < 0) +#else + if(H5Sselect_hyperslab(fileSpaceId, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL) < 0) +#endif + { + H5Sclose(fileSpaceId); + return MakeErrorResult(-1012, "writeSpanHyperslab error: Unable to select hyperslab."); + } + + // Create a memory-side dataspace that matches the hyperslab extent. The + // caller's span must contain exactly product(count) elements. + std::vector memDims(count.begin(), count.end()); + hid_t memSpaceId = H5Screate_simple(static_cast(memDims.size()), memDims.data(), nullptr); + if(memSpaceId < 0) + { + H5Sclose(fileSpaceId); + return MakeErrorResult(-1013, "writeSpanHyperslab error: Unable to create memory dataspace."); + } + + // Write from the in-memory buffer into the selected hyperslab on disk. + herr_t error = H5Dwrite(datasetId, dataType, memSpaceId, fileSpaceId, H5P_DEFAULT, values.data()); + + H5Sclose(memSpaceId); + H5Sclose(fileSpaceId); + + if(error < 0) + { + return MakeErrorResult(-1014, fmt::format("writeSpanHyperslab error: H5Dwrite failed with error {}", error)); + } + + return {}; +} + template nx::core::Result DatasetIO::initChunkedDataset(const DimsType& h5Dims, const DimsType& chunkDims) const { @@ -1592,4 +1719,29 @@ template SIMPLNX_EXPORT Result<> DatasetIO::writeChunk(const ChunkedDataIn #ifdef _WIN32 template SIMPLNX_EXPORT Result<> DatasetIO::writeChunk(const ChunkedDataInfo&, const DimsType&, nonstd::span, const DimsType&, const DimsType&, nonstd::span); #endif + +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); +template SIMPLNX_EXPORT Result<> DatasetIO::createEmptyDataset(const DimsType&); + +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); +template SIMPLNX_EXPORT Result<> DatasetIO::writeSpanHyperslab(nonstd::span, const std::vector&, const std::vector&); + } // namespace nx::core::HDF5 diff --git a/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.hpp b/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.hpp index 3a01eb6717..05f0ccc883 100644 --- a/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.hpp +++ b/src/simplnx/Utilities/Parsing/HDF5/IO/DatasetIO.hpp @@ -205,6 +205,45 @@ class SIMPLNX_EXPORT DatasetIO : public ObjectIO template nx::core::Result<> writeSpan(const DimsType& dims, nonstd::span values); + /** + * @brief Creates an HDF5 dataset with the correct type and dimensions but does + * not write any data. This is used by out-of-core (OOC) stores that cannot + * call writeSpan() because the entire array is not resident in memory. Instead, + * OOC stores first create the empty dataset, then fill it region-by-region using + * writeSpanHyperslab() as they stream data from the backing file. + * + * In-core stores do not need this method — they use writeSpan(), which creates + * the dataset and writes all data in a single call. + * + * @tparam T The element type of the dataset + * @param dims The N-D dimensions of the dataset to create + * @return Result indicating success or failure + */ + template + nx::core::Result<> createEmptyDataset(const DimsType& dims); + + /** + * @brief Writes a contiguous span of values into a sub-region (hyperslab) of an + * existing HDF5 dataset. The dataset must already exist, created either by + * createEmptyDataset() or writeSpan(). + * + * This method exists for out-of-core (OOC) stores that materialize their data + * in chunks: the store reads a region from its backing file into a temporary + * buffer, then writes that buffer into the corresponding hyperslab of the output + * dataset. This is repeated for each region until the entire dataset is filled. + * + * In-core stores do not need this method — they use writeSpan(), which writes + * the full array in one call. + * + * @tparam T The element type of the dataset + * @param values The data to write into the hyperslab + * @param start N-D start offset for the hyperslab selection + * @param count N-D extent of the hyperslab in each dimension + * @return Result indicating success or failure + */ + template + nx::core::Result<> writeSpanHyperslab(nonstd::span values, const std::vector& start, const std::vector& count); + template Result initChunkedDataset(const DimsType& dims, const DimsType& chunkDims) const; nx::core::Result<> closeChunkedDataset(const ChunkedDataInfo& datasetInfo) const; diff --git a/test/AppTest.cpp b/test/AppTest.cpp index 434c4fb63e..6cc841fff4 100644 --- a/test/AppTest.cpp +++ b/test/AppTest.cpp @@ -276,12 +276,6 @@ TEST_CASE("Application::getIOCollection", "[Application]") { auto app = Application::GetOrCreateInstance(); - SECTION("IOCollection is never null") - { - auto collection = app->getIOCollection(); - REQUIRE(collection != nullptr); - } - SECTION("getDataStoreFormats returns format names") { auto formats = app->getDataStoreFormats(); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index aae13f39c1..e178f7fe39 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -25,17 +25,21 @@ add_executable(simplnx_test ArgumentsTest.cpp BitTest.cpp DataArrayTest.cpp + DataIOCollectionHooksTest.cpp DataPathTest.cpp + Dream3dLoadingApiTest.cpp DataStructObserver.hpp DataStructObserver.cpp DataStructTest.cpp DynamicFilterInstantiationTest.cpp + EmptyStringStoreTest.cpp FilePathGeneratorTest.cpp GeometryTest.cpp GeometryTestUtilities.hpp H5Test.cpp IntersectionUtilitiesTest.cpp IOFormat.cpp + IParallelAlgorithmTest.cpp MontageTest.cpp PluginTest.cpp ParametersTest.cpp diff --git a/test/DataIOCollectionHooksTest.cpp b/test/DataIOCollectionHooksTest.cpp new file mode 100644 index 0000000000..e6d1703cb5 --- /dev/null +++ b/test/DataIOCollectionHooksTest.cpp @@ -0,0 +1,51 @@ +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/DataStructure/IO/Generic/DataIOCollection.hpp" + +#include + +using namespace nx::core; + +TEST_CASE("DataIOCollectionHooks: format resolver default behavior") +{ + DataIOCollection collection; + REQUIRE(collection.hasFormatResolver() == false); + + // With no resolver set, should return empty string + DataStructure ds; + DataPath dp({"TestArray"}); + std::string result = collection.resolveFormat(ds, dp, DataType::float32, 4000000); + REQUIRE(result.empty()); +} + +TEST_CASE("DataIOCollectionHooks: format resolver returns plugin result") +{ + DataIOCollection collection; + collection.setFormatResolver([](const DataStructure&, const DataPath&, DataType, uint64 sizeBytes) -> std::string { return sizeBytes > 1000 ? "HDF5-OOC" : ""; }); + REQUIRE(collection.hasFormatResolver() == true); + + DataStructure ds; + DataPath dp({"TestArray"}); + // Small array - returns "" + REQUIRE(collection.resolveFormat(ds, dp, DataType::float32, 40).empty()); + // Large array - returns "HDF5-OOC" + REQUIRE(collection.resolveFormat(ds, dp, DataType::float32, 4000) == "HDF5-OOC"); +} + +TEST_CASE("DataIOCollectionHooks: format resolver can be unset") +{ + DataIOCollection collection; + collection.setFormatResolver([](const DataStructure&, const DataPath&, DataType, uint64) -> std::string { return "HDF5-OOC"; }); + REQUIRE(collection.hasFormatResolver() == true); + + collection.setFormatResolver(nullptr); + REQUIRE(collection.hasFormatResolver() == false); + DataStructure ds; + DataPath dp({"TestArray"}); + REQUIRE(collection.resolveFormat(ds, dp, DataType::float32, 40).empty()); +} + +TEST_CASE("DataIOCollectionHooks: data store import handler default behavior") +{ + DataIOCollection collection; + REQUIRE(collection.hasDataStoreImportHandler() == false); +} diff --git a/test/Dream3dLoadingApiTest.cpp b/test/Dream3dLoadingApiTest.cpp new file mode 100644 index 0000000000..b52226d055 --- /dev/null +++ b/test/Dream3dLoadingApiTest.cpp @@ -0,0 +1,397 @@ +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/DataGroup.hpp" +#include "simplnx/DataStructure/DataStore.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/DataStructure/EmptyDataStore.hpp" +#include "simplnx/DataStructure/IDataStore.hpp" +#include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp" + +#include "simplnx/unit_test/simplnx_test_dirs.hpp" + +#include + +#include + +namespace fs = std::filesystem; +using namespace nx::core; + +namespace +{ +// --------------------------------------------------------------------------- +// Constants for path construction +// --------------------------------------------------------------------------- +constexpr StringLiteral k_GroupName = "TopGroup"; +constexpr StringLiteral k_SmallAttrMatName = "SmallAM"; +constexpr StringLiteral k_LargeAttrMatName = "LargeAM"; +constexpr StringLiteral k_SmallArrayName = "SmallArray"; +constexpr StringLiteral k_LargeArrayName = "LargeArray"; + +constexpr usize k_SmallArraySize = 10; +constexpr usize k_LargeArraySize = 100; + +// Common DataPaths for the simple test structure +const DataPath k_GroupPath({k_GroupName}); +const DataPath k_SmallAMPath({k_GroupName, k_SmallAttrMatName}); +const DataPath k_LargeAMPath({k_GroupName, k_LargeAttrMatName}); +const DataPath k_SmallArrayPath({k_GroupName, k_SmallAttrMatName, k_SmallArrayName}); +const DataPath k_LargeArrayPath({k_GroupName, k_LargeAttrMatName, k_LargeArrayName}); + +// Paths used in the multi-group prune test (scenario 7) +constexpr StringLiteral k_GroupAName = "GroupA"; +constexpr StringLiteral k_GroupBName = "GroupB"; +constexpr StringLiteral k_AttrMatAName = "AttrMatA"; +constexpr StringLiteral k_AttrMatBName = "AttrMatB"; +constexpr StringLiteral k_ArrayA1Name = "ArrayA1"; +constexpr StringLiteral k_ArrayA2Name = "ArrayA2"; +constexpr StringLiteral k_ArrayB1Name = "ArrayB1"; +constexpr StringLiteral k_ArrayB2Name = "ArrayB2"; +constexpr usize k_PruneArraySize = 20; + +const DataPath k_GroupAPath({k_GroupAName}); +const DataPath k_AttrMatAPath({k_GroupAName, k_AttrMatAName}); +const DataPath k_ArrayA1Path({k_GroupAName, k_AttrMatAName, k_ArrayA1Name}); +const DataPath k_ArrayA2Path({k_GroupAName, k_AttrMatAName, k_ArrayA2Name}); +const DataPath k_GroupBPath({k_GroupBName}); +const DataPath k_AttrMatBPath({k_GroupBName, k_AttrMatBName}); +const DataPath k_ArrayB1Path({k_GroupBName, k_AttrMatBName, k_ArrayB1Name}); +const DataPath k_ArrayB2Path({k_GroupBName, k_AttrMatBName, k_ArrayB2Name}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fs::path GetTestOutputDir() +{ + return fs::path(unit_test::k_BinaryTestOutputDir.view()); +} + +/** + * @brief Creates a DataStructure with the hierarchy: + * TopGroup / SmallAM / SmallArray (10 x int32) + * / LargeAM / LargeArray (100 x float32) + * + * Two separate AttributeMatrices are needed because AM enforces that all + * child arrays share the same tuple dimensions. + * SmallArray values are filled with i * 3, LargeArray with i * 1.5f. + */ +DataStructure CreateSimpleTestDataStructure() +{ + DataStructure ds; + + auto* group = DataGroup::Create(ds, k_GroupName); + REQUIRE(group != nullptr); + + // SmallAM holds SmallArray (10 tuples) + auto* smallAM = AttributeMatrix::Create(ds, k_SmallAttrMatName, {k_SmallArraySize}, group->getId()); + REQUIRE(smallAM != nullptr); + + auto smallStore = std::make_unique>(std::vector{k_SmallArraySize}, std::vector{1}, static_cast(0)); + for(usize i = 0; i < k_SmallArraySize; ++i) + { + smallStore->setValue(i, static_cast(i * 3)); + } + auto* smallArray = DataArray::Create(ds, k_SmallArrayName, std::move(smallStore), smallAM->getId()); + REQUIRE(smallArray != nullptr); + + // LargeAM holds LargeArray (100 tuples) + auto* largeAM = AttributeMatrix::Create(ds, k_LargeAttrMatName, {k_LargeArraySize}, group->getId()); + REQUIRE(largeAM != nullptr); + + auto largeStore = std::make_unique>(std::vector{k_LargeArraySize}, std::vector{1}, static_cast(0)); + for(usize i = 0; i < k_LargeArraySize; ++i) + { + largeStore->setValue(i, static_cast(i) * 1.5f); + } + auto* largeArray = DataArray::Create(ds, k_LargeArrayName, std::move(largeStore), largeAM->getId()); + REQUIRE(largeArray != nullptr); + + return ds; +} + +/** + * @brief Writes the given DataStructure to a temp .dream3d file and returns the path. + */ +fs::path WriteTestFile(const DataStructure& ds, const std::string& fileName) +{ + fs::path outputPath = GetTestOutputDir() / fileName; + Result<> writeResult = DREAM3D::WriteFile(outputPath, ds); + SIMPLNX_RESULT_REQUIRE_VALID(writeResult); + REQUIRE(fs::exists(outputPath)); + return outputPath; +} + +/** + * @brief Creates a DataStructure for the multi-group prune test: + * GroupA / AttrMatA / ArrayA1 (20 x int32) + * / ArrayA2 (20 x int32) + * GroupB / AttrMatB / ArrayB1 (20 x float32) + * / ArrayB2 (20 x float32) + */ +DataStructure CreateMultiGroupTestDataStructure() +{ + DataStructure ds; + + auto* groupA = DataGroup::Create(ds, k_GroupAName); + REQUIRE(groupA != nullptr); + auto* attrMatA = AttributeMatrix::Create(ds, k_AttrMatAName, {k_PruneArraySize}, groupA->getId()); + REQUIRE(attrMatA != nullptr); + + auto storeA1 = std::make_unique>(std::vector{k_PruneArraySize}, std::vector{1}, static_cast(0)); + for(usize i = 0; i < k_PruneArraySize; ++i) + { + storeA1->setValue(i, static_cast(i)); + } + auto* arrayA1 = DataArray::Create(ds, k_ArrayA1Name, std::move(storeA1), attrMatA->getId()); + REQUIRE(arrayA1 != nullptr); + + auto storeA2 = std::make_unique>(std::vector{k_PruneArraySize}, std::vector{1}, static_cast(0)); + for(usize i = 0; i < k_PruneArraySize; ++i) + { + storeA2->setValue(i, static_cast(i * 2)); + } + auto* arrayA2 = DataArray::Create(ds, k_ArrayA2Name, std::move(storeA2), attrMatA->getId()); + REQUIRE(arrayA2 != nullptr); + + auto* groupB = DataGroup::Create(ds, k_GroupBName); + REQUIRE(groupB != nullptr); + auto* attrMatB = AttributeMatrix::Create(ds, k_AttrMatBName, {k_PruneArraySize}, groupB->getId()); + REQUIRE(attrMatB != nullptr); + + auto storeB1 = std::make_unique>(std::vector{k_PruneArraySize}, std::vector{1}, static_cast(0)); + for(usize i = 0; i < k_PruneArraySize; ++i) + { + storeB1->setValue(i, static_cast(i) * 0.5f); + } + auto* arrayB1 = DataArray::Create(ds, k_ArrayB1Name, std::move(storeB1), attrMatB->getId()); + REQUIRE(arrayB1 != nullptr); + + auto storeB2 = std::make_unique>(std::vector{k_PruneArraySize}, std::vector{1}, static_cast(0)); + for(usize i = 0; i < k_PruneArraySize; ++i) + { + storeB2->setValue(i, static_cast(i) * 0.25f); + } + auto* arrayB2 = DataArray::Create(ds, k_ArrayB2Name, std::move(storeB2), attrMatB->getId()); + REQUIRE(arrayB2 != nullptr); + + return ds; +} +} // namespace + +// ============================================================================= +// Test Scenarios +// ============================================================================= + +TEST_CASE("Dream3dLoadingApi: LoadDataStructure loads all arrays") +{ + DataStructure srcDs = CreateSimpleTestDataStructure(); + fs::path filePath = WriteTestFile(srcDs, "Dream3dLoadingApiTest_LoadAll.dream3d"); + + Result result = DREAM3D::LoadDataStructure(filePath); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + const DataStructure& ds = result.value(); + + const auto& smallPath = k_SmallArrayPath; + const auto& largePath = k_LargeArrayPath; + + // Both arrays must exist + auto* smallArray = ds.getDataAs(smallPath); + REQUIRE(smallArray != nullptr); + REQUIRE(smallArray->getNumberOfTuples() == k_SmallArraySize); + + auto* largeArray = ds.getDataAs(largePath); + REQUIRE(largeArray != nullptr); + REQUIRE(largeArray->getNumberOfTuples() == k_LargeArraySize); + + // Verify SmallArray values + const auto& smallStore = smallArray->getDataStoreRef(); + for(usize i = 0; i < k_SmallArraySize; ++i) + { + CHECK(smallStore[i] == static_cast(i * 3)); + } + + // Verify LargeArray values + const auto& largeStore = largeArray->getDataStoreRef(); + for(usize i = 0; i < k_LargeArraySize; ++i) + { + CHECK(largeStore[i] == Approx(static_cast(i) * 1.5f)); + } +} + +TEST_CASE("Dream3dLoadingApi: LoadDataStructureArrays loads only requested arrays") +{ + DataStructure srcDs = CreateSimpleTestDataStructure(); + fs::path filePath = WriteTestFile(srcDs, "Dream3dLoadingApiTest_Selective.dream3d"); + + const auto& smallPath = k_SmallArrayPath; + const auto& largePath = k_LargeArrayPath; + + Result result = DREAM3D::LoadDataStructureArrays(filePath, {smallPath}); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + const DataStructure& ds = result.value(); + + // SmallArray must exist with correct data + auto* smallArray = ds.getDataAs(smallPath); + REQUIRE(smallArray != nullptr); + REQUIRE(smallArray->getNumberOfTuples() == k_SmallArraySize); + + const auto& smallStore = smallArray->getDataStoreRef(); + for(usize i = 0; i < k_SmallArraySize; ++i) + { + CHECK(smallStore[i] == static_cast(i * 3)); + } + + // LargeArray must NOT exist + auto* largeArray = ds.getDataAs(largePath); + CHECK(largeArray == nullptr); + + // Ancestor containers must exist + CHECK(ds.getDataAs(k_GroupPath) != nullptr); + CHECK(ds.getDataAs(k_SmallAMPath) != nullptr); +} + +TEST_CASE("Dream3dLoadingApi: LoadDataStructureArraysMetadata loads only requested metadata") +{ + DataStructure srcDs = CreateSimpleTestDataStructure(); + fs::path filePath = WriteTestFile(srcDs, "Dream3dLoadingApiTest_SelectiveMeta.dream3d"); + + const auto& smallPath = k_SmallArrayPath; + const auto& largePath = k_LargeArrayPath; + + Result result = DREAM3D::LoadDataStructureArraysMetadata(filePath, {smallPath}); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + const DataStructure& ds = result.value(); + + // SmallArray must exist + auto* smallArray = ds.getDataAs(smallPath); + REQUIRE(smallArray != nullptr); + + // SmallArray store must be Empty (no data loaded) + CHECK(smallArray->getStoreType() == IDataStore::StoreType::Empty); + + // LargeArray must NOT exist + auto* largeArray = ds.getDataAs(largePath); + CHECK(largeArray == nullptr); + + // Ancestor containers must exist + CHECK(ds.getDataAs(k_GroupPath) != nullptr); + CHECK(ds.getDataAs(k_SmallAMPath) != nullptr); +} + +TEST_CASE("Dream3dLoadingApi: LoadDataStructureMetadata loads all metadata") +{ + DataStructure srcDs = CreateSimpleTestDataStructure(); + fs::path filePath = WriteTestFile(srcDs, "Dream3dLoadingApiTest_AllMeta.dream3d"); + + Result result = DREAM3D::LoadDataStructureMetadata(filePath); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + const DataStructure& ds = result.value(); + + const auto& smallPath = k_SmallArrayPath; + const auto& largePath = k_LargeArrayPath; + + // Both arrays must exist + auto* smallArray = ds.getDataAs(smallPath); + REQUIRE(smallArray != nullptr); + auto* largeArray = ds.getDataAs(largePath); + REQUIRE(largeArray != nullptr); + + // Both must have Empty stores (no data loaded) + CHECK(smallArray->getStoreType() == IDataStore::StoreType::Empty); + CHECK(largeArray->getStoreType() == IDataStore::StoreType::Empty); +} + +TEST_CASE("Dream3dLoadingApi: LoadDataStructure with invalid path returns error") +{ + const fs::path bogusPath("/tmp/nonexistent_dream3d_file_12345.dream3d"); + + // Suppress HDF5 error output temporarily + H5Eset_auto(H5E_DEFAULT, nullptr, nullptr); + Result result = DREAM3D::LoadDataStructure(bogusPath); + // Restore default error handling + H5Eset_auto(H5E_DEFAULT, (H5E_auto_t)H5Eprint, stderr); + + REQUIRE(result.invalid()); + REQUIRE(!result.errors().empty()); + CHECK(result.errors()[0].code == -1); +} + +TEST_CASE("Dream3dLoadingApi: LoadDataStructure with legacy file") +{ + const fs::path legacyPath = fs::path(unit_test::k_SourceDir.view()) / "test" / "Data" / "LegacyData.dream3d"; + REQUIRE(fs::exists(legacyPath)); + + Result result = DREAM3D::LoadDataStructure(legacyPath); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + // Verify the DataStructure has content (legacy files should load successfully) + const DataStructure& ds = result.value(); + CHECK(!ds.getAllDataPaths().empty()); +} + +TEST_CASE("Dream3dLoadingApi: LoadDataStructureArrays prune verification") +{ + DataStructure srcDs = CreateMultiGroupTestDataStructure(); + fs::path filePath = WriteTestFile(srcDs, "Dream3dLoadingApiTest_Prune.dream3d"); + + Result result = DREAM3D::LoadDataStructureArrays(filePath, {k_ArrayA1Path}); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + const DataStructure& ds = result.value(); + + // ArrayA1 and its ancestors must exist + auto* arrayA1 = ds.getDataAs(k_ArrayA1Path); + REQUIRE(arrayA1 != nullptr); + REQUIRE(arrayA1->getNumberOfTuples() == k_PruneArraySize); + CHECK(ds.getDataAs(k_GroupAPath) != nullptr); + CHECK(ds.getDataAs(k_AttrMatAPath) != nullptr); + + // ArrayA2 must NOT exist (same group, but not requested) + CHECK(ds.getDataAs(k_ArrayA2Path) == nullptr); + + // GroupB and its children must NOT exist + CHECK(ds.getDataAs(k_GroupBPath) == nullptr); + CHECK(ds.getDataAs(k_ArrayB1Path) == nullptr); + CHECK(ds.getDataAs(k_ArrayB2Path) == nullptr); +} + +TEST_CASE("Dream3dLoadingApi: Recovery file with all in-core data") +{ + DataStructure srcDs = CreateSimpleTestDataStructure(); + fs::path filePath = GetTestOutputDir() / "Dream3dLoadingApiTest_Recovery.dream3d"; + + // Write as a recovery file — without OOC plugin, this behaves like WriteFile + // but exercises the recovery write path + Result<> writeResult = DREAM3D::WriteRecoveryFile(filePath, srcDs); + SIMPLNX_RESULT_REQUIRE_VALID(writeResult); + REQUIRE(fs::exists(filePath)); + + Result result = DREAM3D::LoadDataStructure(filePath); + SIMPLNX_RESULT_REQUIRE_VALID(result); + + const DataStructure& ds = result.value(); + + const auto& smallPath = k_SmallArrayPath; + const auto& largePath = k_LargeArrayPath; + + // All arrays should be in-core since no OOC plugin is loaded + auto* smallArray = ds.getDataAs(smallPath); + REQUIRE(smallArray != nullptr); + CHECK(smallArray->getStoreType() == IDataStore::StoreType::InMemory); + + auto* largeArray = ds.getDataAs(largePath); + REQUIRE(largeArray != nullptr); + CHECK(largeArray->getStoreType() == IDataStore::StoreType::InMemory); + + // Verify data integrity through the recovery round-trip + const auto& smallStore = ds.getDataRefAs(smallPath).getDataStoreRef(); + for(usize i = 0; i < k_SmallArraySize; ++i) + { + CHECK(smallStore[i] == static_cast(i * 3)); + } +} diff --git a/test/EmptyStringStoreTest.cpp b/test/EmptyStringStoreTest.cpp new file mode 100644 index 0000000000..3b00766836 --- /dev/null +++ b/test/EmptyStringStoreTest.cpp @@ -0,0 +1,60 @@ +#include "simplnx/DataStructure/EmptyStringStore.hpp" +#include "simplnx/DataStructure/StringStore.hpp" + +#include + +using namespace nx::core; + +TEST_CASE("EmptyStringStore: basic metadata") +{ + ShapeType tupleShape = {5}; + EmptyStringStore store(tupleShape); + + REQUIRE(store.size() == 5); + REQUIRE(store.getNumberOfTuples() == 5); + REQUIRE(store.getTupleShape() == tupleShape); + REQUIRE(store.empty() == false); + REQUIRE(store.isPlaceholder() == true); +} + +TEST_CASE("EmptyStringStore: zero tuples") +{ + EmptyStringStore store({0}); + REQUIRE(store.size() == 0); + REQUIRE(store.empty() == true); + REQUIRE(store.isPlaceholder() == true); +} + +TEST_CASE("EmptyStringStore: data access throws") +{ + EmptyStringStore store({3}); + + REQUIRE_THROWS_AS(store[0], std::runtime_error); + REQUIRE_THROWS_AS(store.at(0), std::runtime_error); + REQUIRE_THROWS_AS(store.getValue(0), std::runtime_error); + REQUIRE_THROWS_AS(store.setValue(0, "test"), std::runtime_error); +} + +TEST_CASE("EmptyStringStore: deep copy preserves placeholder status") +{ + EmptyStringStore original({4}); + auto copy = original.deepCopy(); + + REQUIRE(copy->isPlaceholder() == true); + REQUIRE(copy->size() == 4); + REQUIRE(copy->getTupleShape() == ShapeType{4}); +} + +TEST_CASE("EmptyStringStore: resize") +{ + EmptyStringStore store({2}); + store.resizeTuples({10}); + REQUIRE(store.getNumberOfTuples() == 10); + REQUIRE(store.size() == 10); +} + +TEST_CASE("StringStore: isPlaceholder returns false") +{ + StringStore store(std::vector{"a", "b", "c"}, ShapeType{3}); + REQUIRE(store.isPlaceholder() == false); +} diff --git a/test/H5Test.cpp b/test/H5Test.cpp index aa3d9bdcb1..51ed5a2f59 100644 --- a/test/H5Test.cpp +++ b/test/H5Test.cpp @@ -57,13 +57,13 @@ const fs::path k_ComplexH5File = "new.h5"; fs::path GetDataDir() { - return std::filesystem::path(unit_test::k_BinaryTestOutputDir.view()); + return fs::path(unit_test::k_BinaryTestOutputDir.view()); } fs::path GetLegacyFilepath() { std::string path = fmt::format("{}/test/Data/{}", unit_test::k_SourceDir.view(), Constants::k_LegacyFilepath); - return std::filesystem::path(path); + return fs::path(path); } fs::path GetComplexH5File() @@ -625,10 +625,10 @@ H5ClassT TestH5ImplicitCopy(H5ClassT&& originalObject, std::string_view testedCl TEST_CASE("Read Legacy DREAM3D-NX Data") { auto app = Application::GetOrCreateInstance(); - std::filesystem::path filepath = GetLegacyFilepath(); + fs::path filepath = GetLegacyFilepath(); REQUIRE(exists(filepath)); { - Result result = DREAM3D::ImportDataStructureFromFile(filepath, true); + Result result = DREAM3D::LoadDataStructureMetadata(filepath); SIMPLNX_RESULT_REQUIRE_VALID(result); DataStructure dataStructure = result.value(); @@ -1024,8 +1024,8 @@ TEST_CASE("HDF5ImplicitCopyIOTest") TEST_CASE("DataStructureAppend") { - const std::filesystem::path inputFilePath = fs::path(unit_test::k_SourceDir.view()) / "test/Data/geoms.dream3d"; - const std::filesystem::path outputFilePath = GetDataDir() / "DataStructureAppend.dream3d"; + const fs::path inputFilePath = fs::path(unit_test::k_SourceDir.view()) / "test/Data/geoms.dream3d"; + const fs::path outputFilePath = GetDataDir() / "DataStructureAppend.dream3d"; const DataPath originalArrayPath({"foo"}); DataStructure baseDataStructure; @@ -1038,9 +1038,7 @@ TEST_CASE("DataStructureAppend") Result<> writeResult = DREAM3D::WriteFile(outputFilePath, baseDataStructure); SIMPLNX_RESULT_REQUIRE_VALID(writeResult); - auto readResult = DREAM3D::ImportDataStructureFromFile(inputFilePath, false); - SIMPLNX_RESULT_REQUIRE_VALID(readResult); - DataStructure exemplarDataStructure = std::move(readResult.value()); + DataStructure exemplarDataStructure = UnitTest::LoadDataStructure(inputFilePath); usize currentTopLevelSize = baseDataStructure.getTopLevelData().size(); for(const DataObject* object : exemplarDataStructure.getTopLevelData()) @@ -1050,10 +1048,7 @@ TEST_CASE("DataStructureAppend") auto appendResult = DREAM3D::AppendFile(outputFilePath, exemplarDataStructure, path); SIMPLNX_RESULT_REQUIRE_VALID(appendResult); - auto appendedFileReadResult = DREAM3D::ImportDataStructureFromFile(outputFilePath, false); - SIMPLNX_RESULT_REQUIRE_VALID(appendedFileReadResult); - - DataStructure appendedDataStructure = std::move(appendedFileReadResult.value()); + DataStructure appendedDataStructure = UnitTest::LoadDataStructure(outputFilePath); currentTopLevelSize++; diff --git a/test/IOFormat.cpp b/test/IOFormat.cpp index 0aac8b4e56..ae05336b6b 100644 --- a/test/IOFormat.cpp +++ b/test/IOFormat.cpp @@ -1,7 +1,11 @@ #include #include "simplnx/Core/Application.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" #include "simplnx/DataStructure/IO/Generic/DataIOCollection.hpp" +#include "simplnx/DataStructure/IO/Generic/IDataIOManager.hpp" +#include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include "simplnx/Utilities/MemoryUtilities.hpp" using namespace nx::core; @@ -10,8 +14,8 @@ TEST_CASE("Contains HDF5 IO Support", "IOTest") { auto app = Application::GetOrCreateInstance(); - auto ioCollection = app->getIOCollection(); - auto h5IO = ioCollection->getManager("HDF5"); + auto& ioCollection = app->getIOCollection(); + auto h5IO = ioCollection.getManager("HDF5"); REQUIRE(h5IO != nullptr); } @@ -23,18 +27,122 @@ TEST_CASE("Memory Check", "IOTest") REQUIRE(storage.free > 0); } -TEST_CASE("Target DataStructure Size", "IOTest") +// ============================================================================= +// Data Format Preference Tests (in-core only, no OOC plugin) +// ============================================================================= + +TEST_CASE("Data Format: Not configured defaults to InMemory store", "[IOTest][DataFormat]") +{ + auto* prefs = Application::GetOrCreateInstance()->getPreferences(); + + // With no OOC plugin loaded and no format configured, largeDataFormat() + // should return empty (not configured) and useOocData() should be false. + std::string savedFormat = prefs->largeDataFormat(); + prefs->setLargeDataFormat(""); + REQUIRE(prefs->largeDataFormat().empty()); + REQUIRE_FALSE(prefs->useOocData()); + + // CreateDataStore should produce an InMemory store regardless of size + auto store = DataStoreUtilities::CreateDataStore({100, 100, 100}, {1}, IDataAction::Mode::Execute); + REQUIRE(store != nullptr); + REQUIRE(store->getStoreType() == IDataStore::StoreType::InMemory); + + prefs->setLargeDataFormat(savedFormat); +} + +TEST_CASE("Data Format: Explicit InMemory format prevents OOC", "[IOTest][DataFormat]") +{ + auto* prefs = Application::GetOrCreateInstance()->getPreferences(); + + std::string savedFormat = prefs->largeDataFormat(); + prefs->setLargeDataFormat(std::string(Preferences::k_InMemoryFormat)); + + // k_InMemoryFormat is non-empty but should NOT enable OOC + REQUIRE_FALSE(prefs->largeDataFormat().empty()); + REQUIRE(prefs->largeDataFormat() == Preferences::k_InMemoryFormat); + REQUIRE_FALSE(prefs->useOocData()); + + // CreateDataStore should produce InMemory even for large arrays + auto store = DataStoreUtilities::CreateDataStore({100, 100, 100}, {1}, IDataAction::Mode::Execute); + REQUIRE(store != nullptr); + REQUIRE(store->getStoreType() == IDataStore::StoreType::InMemory); + + prefs->setLargeDataFormat(savedFormat); +} + +TEST_CASE("Data Format: checkUseOoc returns false for empty string", "[IOTest][DataFormat]") +{ + auto* prefs = Application::GetOrCreateInstance()->getPreferences(); + + std::string savedFormat = prefs->largeDataFormat(); + prefs->setLargeDataFormat(""); + REQUIRE_FALSE(prefs->useOocData()); + prefs->setLargeDataFormat(savedFormat); +} + +TEST_CASE("Data Format: checkUseOoc returns false for InMemory format", "[IOTest][DataFormat]") { - auto* preferences = Application::GetOrCreateInstance()->getPreferences(); - REQUIRE(preferences->largeDataStructureSize() > 0); + auto* prefs = Application::GetOrCreateInstance()->getPreferences(); - const uint64 memory = Memory::GetTotalMemory(); - const uint64 largeDataSize = preferences->valueAs(Preferences::k_LargeDataSize_Key); - const uint64 minimumRemaining = 2 * largeDataSize; - uint64 targetReducedSize = (memory - 2 * largeDataSize); - if(minimumRemaining >= memory) + std::string savedFormat = prefs->largeDataFormat(); + prefs->setLargeDataFormat(std::string(Preferences::k_InMemoryFormat)); + REQUIRE_FALSE(prefs->useOocData()); + prefs->setLargeDataFormat(savedFormat); +} + +TEST_CASE("Data Format: Cannot register IO manager with reserved InMemory name", "[IOTest][DataFormat]") +{ + // Create a dummy IDataIOManager subclass that returns k_InMemoryFormat + class ReservedNameManager : public IDataIOManager { - targetReducedSize = memory / 2; - } - REQUIRE(preferences->defaultValueAs(Preferences::k_LargeDataStructureSize_Key) == targetReducedSize); + public: + std::string formatName() const override + { + return std::string(Preferences::k_InMemoryFormat); + } + }; + + auto& ioCollection = Application::GetOrCreateInstance()->getIOCollection(); + auto badManager = std::make_shared(); + auto addResult = ioCollection.addIOManager(badManager); + SIMPLNX_RESULT_REQUIRE_INVALID(addResult); +} + +TEST_CASE("Data Format: resolveFormat returns empty with no resolver registered", "[IOTest][DataFormat]") +{ + auto* prefs = Application::GetOrCreateInstance()->getPreferences(); + auto& ioCollection = Application::GetOrCreateInstance()->getIOCollection(); + + std::string savedFormat = prefs->largeDataFormat(); + + // Set format to InMemory — core must not redirect to any OOC format + prefs->setLargeDataFormat(std::string(Preferences::k_InMemoryFormat)); + + // With no OOC plugin loaded, no resolver is registered and resolveFormat returns "" + DataStructure ds; + DataPath dp({"TestArray"}); + uint64 largeSize = 2ULL * 1024 * 1024 * 1024; // 2 GB — well above any threshold + std::string dataFormat = ioCollection.resolveFormat(ds, dp, DataType::float32, largeSize); + REQUIRE(dataFormat.empty()); // Should NOT be changed to any OOC format + + prefs->setLargeDataFormat(savedFormat); +} + +TEST_CASE("Data Format: resolveFormat returns empty when format not configured", "[IOTest][DataFormat]") +{ + auto* prefs = Application::GetOrCreateInstance()->getPreferences(); + auto& ioCollection = Application::GetOrCreateInstance()->getIOCollection(); + + std::string savedFormat = prefs->largeDataFormat(); + + // Empty format = not configured, and no OOC plugin is loaded in the in-core build + prefs->setLargeDataFormat(""); + + DataStructure ds; + DataPath dp({"TestArray"}); + uint64 largeSize = 2ULL * 1024 * 1024 * 1024; + std::string dataFormat = ioCollection.resolveFormat(ds, dp, DataType::float32, largeSize); + REQUIRE(dataFormat.empty()); // No OOC format available + + prefs->setLargeDataFormat(savedFormat); } diff --git a/test/IParallelAlgorithmTest.cpp b/test/IParallelAlgorithmTest.cpp new file mode 100644 index 0000000000..2b654a4625 --- /dev/null +++ b/test/IParallelAlgorithmTest.cpp @@ -0,0 +1,303 @@ +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/DataStore.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Utilities/ParallelDataAlgorithm.hpp" + +#include + +#include +#include +#include +#include + +using namespace nx::core; + +namespace +{ +/** + * @brief Minimal mock data store that reports StoreType::OutOfCore. + * + * Only getStoreType() is meaningful; every other method throws because + * the tests never access actual element data. + */ +template +class MockOocDataStore : public AbstractDataStore +{ +public: + using value_type = typename AbstractDataStore::value_type; + + MockOocDataStore(const ShapeType& tupleShape, const ShapeType& componentShape) + : m_TupleShape(tupleShape) + , m_ComponentShape(componentShape) + , m_NumTuples(std::accumulate(tupleShape.cbegin(), tupleShape.cend(), static_cast(1), std::multiplies<>())) + , m_NumComponents(std::accumulate(componentShape.cbegin(), componentShape.cend(), static_cast(1), std::multiplies<>())) + { + } + + ~MockOocDataStore() override = default; + + IDataStore::StoreType getStoreType() const override + { + return IDataStore::StoreType::OutOfCore; + } + + std::map getRecoveryMetadata() const override + { + return {}; + } + + usize getNumberOfTuples() const override + { + return m_NumTuples; + } + + usize getNumberOfComponents() const override + { + return m_NumComponents; + } + + const ShapeType& getTupleShape() const override + { + return m_TupleShape; + } + + const ShapeType& getComponentShape() const override + { + return m_ComponentShape; + } + + DataType getDataType() const override + { + return GetDataType(); + } + + void resizeTuples(const ShapeType& /*tupleShape*/) override + { + throw std::runtime_error("MockOocDataStore::resizeTuples not implemented"); + } + + value_type getValue(usize /*index*/) const override + { + throw std::runtime_error("MockOocDataStore::getValue not implemented"); + } + + void setValue(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::setValue not implemented"); + } + + Result<> copyIntoBuffer(usize /*startIndex*/, nonstd::span /*buffer*/) const override + { + return MakeErrorResult(-9001, "MockOocDataStore::copyIntoBuffer not implemented"); + } + + Result<> copyFromBuffer(usize /*startIndex*/, nonstd::span /*buffer*/) override + { + return MakeErrorResult(-9002, "MockOocDataStore::copyFromBuffer not implemented"); + } + + value_type at(usize /*index*/) const override + { + throw std::runtime_error("MockOocDataStore::at not implemented"); + } + + void add(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::add not implemented"); + } + + void sub(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::sub not implemented"); + } + + void mul(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::mul not implemented"); + } + + void div(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::div not implemented"); + } + + void rem(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::rem not implemented"); + } + + void bitwiseAND(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::bitwiseAND not implemented"); + } + + void bitwiseOR(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::bitwiseOR not implemented"); + } + + void bitwiseXOR(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::bitwiseXOR not implemented"); + } + + void bitwiseLShift(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::bitwiseLShift not implemented"); + } + + void bitwiseRShift(usize /*index*/, value_type /*value*/) override + { + throw std::runtime_error("MockOocDataStore::bitwiseRShift not implemented"); + } + + void byteSwap(usize /*index*/) override + { + throw std::runtime_error("MockOocDataStore::byteSwap not implemented"); + } + + void swap(usize /*index1*/, usize /*index2*/) override + { + throw std::runtime_error("MockOocDataStore::swap not implemented"); + } + + std::unique_ptr deepCopy() const override + { + return std::make_unique(*this); + } + + std::unique_ptr createNewInstance() const override + { + return std::make_unique(m_TupleShape, m_ComponentShape); + } + + std::pair writeBinaryFile(const std::string& /*absoluteFilePath*/) const override + { + return {-1, "MockOocDataStore cannot write files"}; + } + + std::pair writeBinaryFile(std::ostream& /*outputStream*/) const override + { + return {-1, "MockOocDataStore cannot write files"}; + } + + Result<> readHdf5(const HDF5::DatasetIO& /*dataset*/) override + { + return MakeErrorResult(-1, "MockOocDataStore cannot read HDF5"); + } + + Result<> writeHdf5(HDF5::DatasetIO& /*dataset*/) const override + { + return MakeErrorResult(-1, "MockOocDataStore cannot write HDF5"); + } + +private: + ShapeType m_TupleShape; + ShapeType m_ComponentShape; + usize m_NumTuples = 0; + usize m_NumComponents = 0; +}; +} // namespace + +TEST_CASE("IParallelAlgorithm: TBB enabled by default", "[simplnx][IParallelAlgorithm]") +{ + ParallelDataAlgorithm algorithm; + +#ifdef SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == true); +#else + REQUIRE(algorithm.getParallelizationEnabled() == false); +#endif +} + +TEST_CASE("IParallelAlgorithm: requireArraysInMemory with in-memory arrays keeps TBB enabled", "[simplnx][IParallelAlgorithm]") +{ + DataStructure dataStructure; + auto store = std::make_shared>(ShapeType{10}, ShapeType{1}, 0.0f); + auto* dataArray = DataArray::Create(dataStructure, "TestArray", store); + REQUIRE(dataArray != nullptr); + + ParallelDataAlgorithm algorithm; + algorithm.requireArraysInMemory({dataArray}); + +#ifdef SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == true); +#else + REQUIRE(algorithm.getParallelizationEnabled() == false); +#endif +} + +TEST_CASE("IParallelAlgorithm: requireStoresInMemory with in-memory stores keeps TBB enabled", "[simplnx][IParallelAlgorithm]") +{ + DataStore store(ShapeType{10}, ShapeType{1}, 0.0f); + + ParallelDataAlgorithm algorithm; + algorithm.requireStoresInMemory({&store}); + +#ifdef SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == true); +#else + REQUIRE(algorithm.getParallelizationEnabled() == false); +#endif +} + +TEST_CASE("IParallelAlgorithm: requireArraysInMemory with OOC arrays disables TBB", "[simplnx][IParallelAlgorithm]") +{ + DataStructure dataStructure; + auto oocStore = std::make_shared>(ShapeType{10}, ShapeType{1}); + auto* dataArray = DataArray::Create(dataStructure, "OocArray", oocStore); + REQUIRE(dataArray != nullptr); + + ParallelDataAlgorithm algorithm; + algorithm.requireArraysInMemory({dataArray}); + + // OOC arrays should disable parallelization regardless of SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == false); +} + +TEST_CASE("IParallelAlgorithm: requireStoresInMemory with OOC stores disables TBB", "[simplnx][IParallelAlgorithm]") +{ + MockOocDataStore oocStore(ShapeType{10}, ShapeType{1}); + + ParallelDataAlgorithm algorithm; + algorithm.requireStoresInMemory({&oocStore}); + + // OOC stores should disable parallelization regardless of SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == false); +} + +TEST_CASE("IParallelAlgorithm: requireStoresInMemory with mixed stores disables TBB", "[simplnx][IParallelAlgorithm]") +{ + DataStore inMemoryStore(ShapeType{10}, ShapeType{1}, 0.0f); + MockOocDataStore oocStore(ShapeType{10}, ShapeType{1}); + + ParallelDataAlgorithm algorithm; + algorithm.requireStoresInMemory({&inMemoryStore, &oocStore}); + + // A single OOC store in the mix should disable parallelization + REQUIRE(algorithm.getParallelizationEnabled() == false); +} + +TEST_CASE("IParallelAlgorithm: requireArraysInMemory with empty array list keeps TBB enabled", "[simplnx][IParallelAlgorithm]") +{ + ParallelDataAlgorithm algorithm; + algorithm.requireArraysInMemory({}); + +#ifdef SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == true); +#else + REQUIRE(algorithm.getParallelizationEnabled() == false); +#endif +} + +TEST_CASE("IParallelAlgorithm: requireStoresInMemory with nullptr entries keeps TBB enabled", "[simplnx][IParallelAlgorithm]") +{ + ParallelDataAlgorithm algorithm; + algorithm.requireStoresInMemory({nullptr, nullptr}); + +#ifdef SIMPLNX_ENABLE_MULTICORE + REQUIRE(algorithm.getParallelizationEnabled() == true); +#else + REQUIRE(algorithm.getParallelizationEnabled() == false); +#endif +} diff --git a/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.cpp b/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.cpp index ca7cfac790..bfdc9709d9 100644 --- a/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.cpp +++ b/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.cpp @@ -1,7 +1,5 @@ #include "UnitTestCommon.hpp" -#include "simplnx/Parameters/Dream3dImportParameter.hpp" - #include #include @@ -12,37 +10,19 @@ namespace nx::core::UnitTest { DataStructure LoadDataStructure(const fs::path& filepath) { - // Ensure the plugins a loaded. LoadPlugins(); - - INFO(fmt::format("Error loading file: '{}' ", filepath.string())); REQUIRE(fs::exists(filepath)); - DataStructure dataStructure; - - // const Uuid k_SimplnxCorePluginId = *Uuid::FromString("05cc618b-781f-4ac0-b9ac-43f26ce1854f"); - auto* filterList = Application::Instance()->getFilterList(); - /************************************************************************* - * ReadDREAM3DFilter - ************************************************************************/ - constexpr Uuid k_ReadDREAM3DFilterId = *Uuid::FromString("0dbd31c7-19e0-4077-83ef-f4a6459a0e2d"); - const FilterHandle k_ReadDREAM3DFilterHandle(k_ReadDREAM3DFilterId, k_SimplnxCorePluginId); - - auto filterPtr = filterList->createFilter(k_ReadDREAM3DFilterHandle); - REQUIRE(nullptr != filterPtr); - - Arguments args; - args.insertOrAssign("import_data_object", std::make_any(Dream3dImportParameter::ImportData{filepath, Dream3dImportParameter::PathImportPolicy::All})); - - // Preflight the filter and check result - auto preflightResult = filterPtr->preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions) - - // Execute the filter and check the result - auto executeResult = filterPtr->execute(dataStructure, args); //, nullptr, IFilter::MessageHandler{[](const IFilter::Message& message) { fmt::print("{}\n", message.message); }}); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - - return dataStructure; + auto result = DREAM3D::LoadDataStructure(filepath); + if(result.invalid()) + { + for(const auto& error : result.errors()) + { + UNSCOPED_INFO(fmt::format("[{}] {}", error.code, error.message)); + } + FAIL(fmt::format("Failed to load DataStructure from '{}'", filepath.string())); + } + return std::move(result.value()); } TestFileSentinel::TestFileSentinel(std::string testFilesDir, std::string inputArchiveName, std::string expectedTopLevelOutput, bool decompressFiles, bool removeTemp) @@ -69,7 +49,7 @@ TestFileSentinel::~TestFileSentinel() if(m_RemoveTemp) { std::error_code errorCode; - std::filesystem::remove_all(fmt::format("{}/{}", m_TestFilesDir, m_ExpectedTopLevelOutput), errorCode); + fs::remove_all(fmt::format("{}/{}", m_TestFilesDir, m_ExpectedTopLevelOutput), errorCode); if(errorCode) { std::cout << "Removing decompressed data failed: " << errorCode.message() << std::endl; @@ -242,13 +222,13 @@ std::error_code TestFileSentinel::decompress() // typeFlag: '5' = directory, '0' or '\0' = regular file, '2' = symlink if(typeFlag == '5') { - std::filesystem::create_directories(fullPath); + fs::create_directories(fullPath); } else if(typeFlag == '0' || typeFlag == '\0') { // Ensure parent directory exists - std::filesystem::path filePath(fullPath); - std::filesystem::create_directories(filePath.parent_path()); + fs::path filePath(fullPath); + fs::create_directories(filePath.parent_path()); std::ofstream outFile(fullPath, std::ios::binary); if(!outFile) diff --git a/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.hpp b/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.hpp index 67572b5159..f609439399 100644 --- a/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.hpp +++ b/test/UnitTestCommon/include/simplnx/UnitTest/UnitTestCommon.hpp @@ -25,6 +25,7 @@ #include "simplnx/Parameters/DataTypeParameter.hpp" #include "simplnx/Parameters/GeometrySelectionParameter.hpp" #include "simplnx/Utilities/DataGroupUtilities.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include "simplnx/Utilities/FilterUtilities.hpp" #include "simplnx/Utilities/MD5.hpp" #include "simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp" @@ -456,45 +457,101 @@ inline void CompareMontage(const AbstractMontage& exemplar, const AbstractMontag } /** - * @brief Compares IDataArray - * @tparam T - * @param left - * @param right + * @brief Compares two IDataArrays element-by-element using bulk copyIntoBuffer + * for OOC-safe, high-performance comparison. + * + * Why copyIntoBuffer instead of operator[]: + * When arrays are backed by an out-of-core (chunked) DataStore, each call + * to operator[] may trigger a chunk load from disk. Comparing millions of + * elements one at a time would cause catastrophic chunk thrashing. Instead, + * this function reads both arrays in 40,000-element chunks via copyIntoBuffer, + * which batches HDF5 I/O and keeps access sequential. This is also safe for + * in-memory stores, where copyIntoBuffer is a simple memcpy. + * + * Floating-point comparison semantics: + * - NaN == NaN is treated as equal. Many filter outputs legitimately produce + * NaN values (e.g., division by zero in optional statistics), and both the + * exemplar and generated arrays should agree on which elements are NaN. + * - Values within UnitTest::EPSILON of each other are treated as equal, + * accommodating floating-point rounding differences across platforms. + * + * Error reporting: + * On the first mismatched element, the function records the index and both + * values, then breaks out of the comparison loop. The mismatch details are + * reported via Catch2's UNSCOPED_INFO before the final REQUIRE(!failed). + * + * @tparam T The element type (must match the actual DataStore element type) + * @param left First array (typically the exemplar / golden reference) + * @param right Second array (typically the generated / computed result) + * @param start Element index to start comparison from (default 0). Useful when + * the first N elements are known to differ (e.g., header/padding). */ template void CompareDataArrays(const IDataArray& left, const IDataArray& right, usize start = 0) { const auto& oldDataStore = left.template getIDataStoreRefAs>(); const auto& newDataStore = right.template getIDataStoreRefAs>(); - usize end = oldDataStore.getSize(); + const usize totalSize = oldDataStore.getSize(); INFO(fmt::format("Input Data Array:'{}' Output DataArray: '{}' bad comparison", left.getName(), right.getName())); - T oldVal; - T newVal; + REQUIRE(totalSize == newDataStore.getSize()); + + // Use 40K-element chunks to balance memory usage against I/O efficiency. + // Each chunk is ~160 KB for float32 or ~320 KB for float64, which fits + // comfortably in L2 cache and aligns well with typical HDF5 chunk sizes. + constexpr usize k_ChunkSize = 40000; + auto oldBuf = std::make_unique(k_ChunkSize); + auto newBuf = std::make_unique(k_ChunkSize); + bool failed = false; - for(usize i = start; i < end; i++) + usize failIndex = 0; + T failOld = {}; + T failNew = {}; + + // Iterate through the arrays in fixed-size chunks, reading both arrays + // into local buffers for fast element-wise comparison + for(usize offset = start; offset < totalSize && !failed; offset += k_ChunkSize) { - oldVal = oldDataStore[i]; - newVal = newDataStore[i]; - if(oldVal != newVal) - { - UNSCOPED_INFO(fmt::format("index=: {} oldValue != newValue. {} != {}", i, oldVal, newVal)); + // Handle the last chunk which may be smaller than k_ChunkSize + const usize count = std::min(k_ChunkSize, totalSize - offset); + oldDataStore.copyIntoBuffer(offset, nonstd::span(oldBuf.get(), count)); + newDataStore.copyIntoBuffer(offset, nonstd::span(newBuf.get(), count)); - if constexpr(std::is_floating_point_v) + // Compare each element in the current chunk + for(usize i = 0; i < count; i++) + { + const T oldVal = oldBuf[i]; + const T newVal = newBuf[i]; + if(oldVal != newVal) { - float diff = std::fabs(static_cast(oldVal - newVal)); - if(diff > EPSILON) + if constexpr(std::is_floating_point_v) { - failed = true; - break; + // Special case: NaN == NaN is treated as equal because many filters + // produce NaN for undefined results, and both arrays should agree + if(std::isnan(oldVal) && std::isnan(newVal)) + { + continue; + } + // Allow small floating-point differences within EPSILON tolerance + float32 diff = std::fabs(static_cast(oldVal - newVal)); + if(diff <= EPSILON) + { + continue; + } } - } - else - { + // Record the first failure for diagnostic output, then stop failed = true; + failIndex = offset + i; + failOld = oldVal; + failNew = newVal; + break; } - break; } } + + if(failed) + { + UNSCOPED_INFO(fmt::format("index=: {} oldValue != newValue. {} != {}", failIndex, failOld, failNew)); + } REQUIRE(!failed); }