diff --git a/CMakeLists.txt b/CMakeLists.txt index 426a550c8d..d599f45a14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -537,6 +537,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/DataGroupUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/DataObjectUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/DataStoreUtilities.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/AlgorithmDispatch.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FilePathGenerator.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/ColorTableUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/FileUtilities.hpp @@ -558,6 +559,7 @@ set(SIMPLNX_HDRS ${SIMPLNX_SOURCE_DIR}/Utilities/ParallelTaskAlgorithm.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/SamplingUtils.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/SegmentFeatures.hpp + ${SIMPLNX_SOURCE_DIR}/Utilities/UnionFind.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/TimeUtilities.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/TooltipGenerator.hpp ${SIMPLNX_SOURCE_DIR}/Utilities/TooltipRowItem.hpp diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp index 587ecbf6c3..26d5ac0723 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.cpp @@ -5,12 +5,14 @@ #include "simplnx/Common/Constants.hpp" #include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include "simplnx/Utilities/ClusteringUtilities.hpp" #include #include #include +#include #include using namespace nx::core; @@ -26,6 +28,37 @@ CAxisSegmentFeatures::CAxisSegmentFeatures(DataStructure& dataStructure, const I // ----------------------------------------------------------------------------- CAxisSegmentFeatures::~CAxisSegmentFeatures() noexcept = default; +// ----------------------------------------------------------------------------- +// Segments a hexagonal EBSD dataset into features (grains) based on c-axis +// alignment. Two neighboring voxels are grouped into the same feature when +// their crystallographic c-axes (the [0001] direction) are aligned within a +// user-specified angular tolerance. Unlike EBSDSegmentFeatures which uses full +// misorientation via LaueOps, this filter only considers the c-axis direction, +// which is useful for analyzing basal texture in hexagonal materials. +// +// Pre-validation: +// Before segmentation, every cell's phase is checked against the crystal +// structure table. All phases must be hexagonal (Hexagonal_High 6/mmm or +// Hexagonal_Low 6/m); if any non-hexagonal phase is found, the filter +// returns an error because c-axis alignment is only meaningful for HCP. +// +// Algorithm dispatch: +// - In-core data -> execute() : classic depth-first-search (DFS) flood fill +// - Out-of-core -> executeCCL() : connected-component labeling that streams +// data slice-by-slice to limit memory usage +// The choice is made by checking IsOutOfCore() on the FeatureIds array (i.e., +// whether the backing DataStore lives on disk) or if ForceOocAlgorithm() is +// set (used for testing). +// +// Post-processing after either algorithm: +// 1. Validate that at least one feature was found (error if not). +// 2. Resize the Feature AttributeMatrix to (m_FoundFeatures + 1) tuples so +// that all per-feature arrays (Active, etc.) have the correct size. +// Index 0 is reserved as an invalid/background feature. +// 3. Initialize the Active array: fill with 1 (active), then set index 0 +// to 0 to mark it as the reserved background slot. +// 4. Optionally randomize FeatureIds so that spatially adjacent grains get +// non-sequential IDs, improving visual contrast in color-mapped renders. // ----------------------------------------------------------------------------- Result<> CAxisSegmentFeatures::operator()() { @@ -68,8 +101,21 @@ Result<> CAxisSegmentFeatures::operator()() auto* active = m_DataStructure.getDataAs(m_InputValues->ActiveArrayPath); active->fill(1); - // Run the segmentation algorithm - execute(imageGeometry); + // Dispatch between DFS (in-core) and CCL (OOC) algorithms + if(IsOutOfCore(*m_FeatureIdsArray) || ForceOocAlgorithm()) + { + SizeVec3 udims = imageGeometry->getDimensions(); + allocateSliceBuffers(static_cast(udims[0]), static_cast(udims[1])); + + auto& featureIdsStore = m_FeatureIdsArray->getDataStoreRef(); + executeCCL(imageGeometry, featureIdsStore); + + deallocateSliceBuffers(); + } + else + { + execute(imageGeometry); + } // Sanity check the result. if(this->m_FoundFeatures < 1) { @@ -97,6 +143,22 @@ Result<> CAxisSegmentFeatures::operator()() return {}; } +// ----------------------------------------------------------------------------- +// Finds the next unassigned voxel that can serve as the seed for a new feature. +// The scan is a simple linear walk starting from `nextSeed`, which is the index +// immediately after the last seed found. This avoids rescanning already-assigned +// voxels at the beginning of the array. +// +// A voxel is eligible to become a seed when all three conditions are met: +// 1. featureId == 0 : the voxel has not yet been assigned to any feature. +// 2. Passes the mask: if masking is enabled, the voxel must be flagged as +// "good" (e.g., not a bad scan point). +// 3. cellPhase > 0 : the voxel belongs to a real crystallographic phase +// (phase 0 is reserved for unindexed/background points). +// +// When a valid seed is found, its featureId is immediately set to `gnum` +// (the new feature number) so that subsequent calls will skip it. +// Returns the linear index of the seed, or -1 if no more seeds exist. // ----------------------------------------------------------------------------- int64 CAxisSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const { @@ -127,14 +189,34 @@ int64 CAxisSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const } if(seed >= 0) { - auto& cellFeatureAM = m_DataStructure.getDataRefAs(m_InputValues->CellFeatureAttributeMatrixPath); featureIds[static_cast(seed)] = gnum; - const ShapeType tDims = {static_cast(gnum) + 1}; - cellFeatureAM.resizeTuples(tDims); // This will resize the active array } return seed; } +// ----------------------------------------------------------------------------- +// Determines whether a neighboring voxel should be merged into the current +// feature during the DFS flood fill (execute() path). This is NOT used by +// the CCL path, which calls areNeighborsSimilar() instead. +// +// The method checks three conditions before grouping: +// 1. The neighbor's featureId must be 0 (unassigned). +// 2. The neighbor must pass the mask (if masking is enabled). +// 3. The neighbor must have a c-axis aligned with the reference voxel. +// +// C-axis misalignment calculation: +// - Both voxels must share the same phase (no cross-phase grouping). +// - Quaternion orientations (QuatF, 4 floats) are extracted for both voxels. +// - Each quaternion is converted to a 3x3 orientation matrix, which is then +// transposed and multiplied by the crystal c-axis unit vector [0,0,1] to +// obtain the sample-frame c-axis direction for each voxel. +// - Both c-axis vectors are normalized so the dot product directly gives +// the cosine of the angle between them. +// - The dot product is clamped to [-1, 1] to guard against floating-point +// error, then acos() gives the misalignment angle w (in radians). +// - Because the c-axis is bidirectional (parallel and antiparallel are +// equivalent), the check accepts w <= tolerance OR (pi - w) <= tolerance. +// - If accepted, the neighbor's featureId is set to `gnum` as a side effect. // ----------------------------------------------------------------------------- bool CAxisSegmentFeatures::determineGrouping(int64 referencepoint, int64 neighborpoint, int32 gnum) const { @@ -182,3 +264,304 @@ bool CAxisSegmentFeatures::determineGrouping(int64 referencepoint, int64 neighbo } return group; } + +// ----------------------------------------------------------------------------- +// Checks whether a single voxel is eligible for segmentation (used by the CCL +// path in executeCCL()). A voxel is valid if it passes the mask and has a +// crystallographic phase > 0. +// +// Slice buffer fast path: +// When m_UseSliceBuffers is true (OOC mode), the method checks whether the +// voxel's Z-slice is currently loaded in one of the two buffer slots. The +// slot lookup checks both m_BufferedSliceZ[0] and m_BufferedSliceZ[1] to +// find which slot (if any) holds the target slice. If found, mask and phase +// values are read from the in-memory m_MaskBuffer and m_PhaseBuffer arrays, +// avoiding an on-disk I/O round-trip. +// +// OOC fallback: +// If slice buffers are not active, or if the voxel's slice is not currently +// buffered (which can happen during Phase 1b of CCL when periodic boundary +// merging accesses non-adjacent slices), the method falls back to direct +// array access through the DataStore, which may trigger on-disk I/O for +// out-of-core data. +// ----------------------------------------------------------------------------- +bool CAxisSegmentFeatures::isValidVoxel(int64 point) const +{ + if(m_UseSliceBuffers) + { + int64 sliceZ = point / m_BufSliceSize; + if(sliceZ == m_BufferedSliceZ[0] || sliceZ == m_BufferedSliceZ[1]) + { + int64 slot = (sliceZ == m_BufferedSliceZ[0]) ? 0 : 1; + int64 offset = point - sliceZ * m_BufSliceSize; + int64 bufIdx = slot * m_BufSliceSize + offset; + // Check mask + if(m_InputValues->UseMask && m_MaskBuffer[bufIdx] == 0) + { + return false; + } + // Check phase + if(m_PhaseBuffer[bufIdx] <= 0) + { + return false; + } + return true; + } + } + + // Fallback: direct array access + if(m_InputValues->UseMask && !m_GoodVoxelsArray->isTrue(point)) + { + return false; + } + Int32Array& cellPhases = *m_CellPhases; + if(cellPhases[point] <= 0) + { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- +// Determines whether two neighboring voxels have sufficiently aligned c-axes +// to belong to the same feature. Used exclusively by the CCL path +// (executeCCL()), whereas the DFS path uses determineGrouping() instead. +// +// Slice buffer fast path: +// When both voxels' Z-slices are present in the rolling 2-slot buffer, all +// data is read from the in-memory buffers (m_QuatBuffer, m_PhaseBuffer, +// m_MaskBuffer). The buffer index for each point is computed as: +// slot * sliceSize + (point - sliceZ * sliceSize) +// For quaternions, an additional x4 factor accounts for the 4 components +// per voxel. The method then: +// 1. Checks point2's mask validity. +// 2. Checks that point2's phase > 0 and both phases match. +// 3. Constructs QuatF objects from the buffered quaternion components. +// 4. Converts each quaternion to an orientation matrix, transposes it, and +// multiplies by [0,0,1] to get the sample-frame c-axis direction. +// 5. Normalizes both c-axis vectors and computes the dot product. +// 6. Clamps the dot product to [-1,1] and takes acos() to get the +// misalignment angle w. +// 7. Returns true if w <= tolerance OR (pi - w) <= tolerance (because +// parallel and antiparallel c-axes are crystallographically equivalent). +// +// OOC fallback: +// If either voxel's slice is not buffered (e.g., during Phase 1b periodic +// merge), falls back to direct DataStore access: validates point2 via +// isValidVoxel(), checks phase equality, then computes c-axis misalignment +// from the full quaternion and phase arrays on disk. +// ----------------------------------------------------------------------------- +bool CAxisSegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + if(m_UseSliceBuffers) + { + int64 sliceZ1 = point1 / m_BufSliceSize; + int64 sliceZ2 = point2 / m_BufSliceSize; + bool buf1 = (sliceZ1 == m_BufferedSliceZ[0] || sliceZ1 == m_BufferedSliceZ[1]); + bool buf2 = (sliceZ2 == m_BufferedSliceZ[0] || sliceZ2 == m_BufferedSliceZ[1]); + + if(buf1 && buf2) + { + int64 slot1 = (sliceZ1 == m_BufferedSliceZ[0]) ? 0 : 1; + int64 slot2 = (sliceZ2 == m_BufferedSliceZ[0]) ? 0 : 1; + int64 off1 = point1 - sliceZ1 * m_BufSliceSize; + int64 off2 = point2 - sliceZ2 * m_BufSliceSize; + int64 bufIdx1 = slot1 * m_BufSliceSize + off1; + int64 bufIdx2 = slot2 * m_BufSliceSize + off2; + + // Check point2 validity (mask + phase) + if(m_InputValues->UseMask && m_MaskBuffer[bufIdx2] == 0) + { + return false; + } + if(m_PhaseBuffer[bufIdx2] <= 0) + { + return false; + } + + // Must be same phase + if(m_PhaseBuffer[bufIdx1] != m_PhaseBuffer[bufIdx2]) + { + return false; + } + + // Read quaternions from buffer + int64 qIdx1 = bufIdx1 * 4; + int64 qIdx2 = bufIdx2 * 4; + const ebsdlib::QuatF q1(m_QuatBuffer[qIdx1], m_QuatBuffer[qIdx1 + 1], m_QuatBuffer[qIdx1 + 2], m_QuatBuffer[qIdx1 + 3]); + const ebsdlib::QuatF q2(m_QuatBuffer[qIdx2], m_QuatBuffer[qIdx2 + 1], m_QuatBuffer[qIdx2 + 2], m_QuatBuffer[qIdx2 + 3]); + + const ebsdlib::OrientationMatrixFType oMatrix1 = q1.toOrientationMatrix(); + const ebsdlib::OrientationMatrixFType oMatrix2 = q2.toOrientationMatrix(); + + const Eigen::Vector3f cAxis{0.0f, 0.0f, 1.0f}; + Eigen::Vector3f c1 = oMatrix1.transpose() * cAxis; + Eigen::Vector3f c2 = oMatrix2.transpose() * cAxis; + + c1.normalize(); + c2.normalize(); + + float32 w = std::clamp(((c1[0] * c2[0]) + (c1[1] * c2[1]) + (c1[2] * c2[2])), -1.0F, 1.0F); + w = std::acos(w); + + return w <= m_InputValues->MisorientationTolerance || (Constants::k_PiD - w) <= m_InputValues->MisorientationTolerance; + } + } + + // Fallback: direct array access + if(!isValidVoxel(point2)) + { + return false; + } + + Int32Array& cellPhases = *m_CellPhases; + + // Must be same phase + if(cellPhases[point1] != cellPhases[point2]) + { + return false; + } + + // Calculate c-axis misalignment + const Eigen::Vector3f cAxis{0.0f, 0.0f, 1.0f}; + Float32Array& quats = *m_QuatsArray; + + const ebsdlib::QuatF q1(quats[point1 * 4], quats[point1 * 4 + 1], quats[point1 * 4 + 2], quats[point1 * 4 + 3]); + const ebsdlib::QuatF q2(quats[point2 * 4], quats[point2 * 4 + 1], quats[point2 * 4 + 2], quats[point2 * 4 + 3]); + + const ebsdlib::OrientationMatrixFType oMatrix1 = q1.toOrientationMatrix(); + const ebsdlib::OrientationMatrixFType oMatrix2 = q2.toOrientationMatrix(); + + Eigen::Vector3f c1 = oMatrix1.transpose() * cAxis; + Eigen::Vector3f c2 = oMatrix2.transpose() * cAxis; + + c1.normalize(); + c2.normalize(); + + float32 w = std::clamp(((c1[0] * c2[0]) + (c1[1] * c2[1]) + (c1[2] * c2[2])), -1.0F, 1.0F); + w = std::acos(w); + + return w <= m_InputValues->MisorientationTolerance || (Constants::k_PiD - w) <= m_InputValues->MisorientationTolerance; +} + +// ----------------------------------------------------------------------------- +// Allocates the rolling 2-slot slice buffers used by the CCL (OOC) algorithm. +// Called once at the start of the OOC branch in operator(), before executeCCL(). +// +// Each slot holds one full XY slice (dimX * dimY voxels). Two slots are needed +// because the CCL algorithm compares the current slice (iz) with the previous +// slice (iz-1), so both must be in memory simultaneously. +// +// Buffers allocated: +// - m_QuatBuffer : 2 * sliceSize * 4 floats (quaternion: 4 components/voxel) +// - m_PhaseBuffer : 2 * sliceSize int32 values (one phase ID per voxel) +// - m_MaskBuffer : 2 * sliceSize uint8 values (one mask flag per voxel) +// +// Both m_BufferedSliceZ slots are initialized to -1 (no slice loaded). +// m_UseSliceBuffers is set to true so that isValidVoxel() and +// areNeighborsSimilar() will use the fast buffer path. +// ----------------------------------------------------------------------------- +void CAxisSegmentFeatures::allocateSliceBuffers(int64 dimX, int64 dimY) +{ + m_BufSliceSize = dimX * dimY; + int64 totalSlots = 2 * m_BufSliceSize; + m_QuatBuffer.resize(static_cast(totalSlots * 4)); + m_PhaseBuffer.resize(static_cast(totalSlots)); + m_MaskBuffer.resize(static_cast(totalSlots)); + m_BufferedSliceZ[0] = -1; + m_BufferedSliceZ[1] = -1; + m_UseSliceBuffers = true; +} + +// ----------------------------------------------------------------------------- +// Releases the slice buffers after executeCCL() completes, freeing the memory +// back to the system. Called in the OOC branch of operator() after the CCL +// algorithm finishes. Resets m_UseSliceBuffers to false and both +// m_BufferedSliceZ slots to -1. Uses clear() + shrink_to_fit() on each vector +// to guarantee memory deallocation. +// ----------------------------------------------------------------------------- +void CAxisSegmentFeatures::deallocateSliceBuffers() +{ + m_UseSliceBuffers = false; + m_QuatBuffer.clear(); + m_QuatBuffer.shrink_to_fit(); + m_PhaseBuffer.clear(); + m_PhaseBuffer.shrink_to_fit(); + m_MaskBuffer.clear(); + m_MaskBuffer.shrink_to_fit(); + m_BufferedSliceZ[0] = -1; + m_BufferedSliceZ[1] = -1; + m_BufSliceSize = 0; +} + +// ----------------------------------------------------------------------------- +// Pre-loads voxel data for a single Z-slice into the rolling 2-slot buffer, +// called by executeCCL() before processing each slice. +// +// Rolling buffer design: +// The target slot is determined by (iz % 2), so even slices go to slot 0 and +// odd slices go to slot 1. Because the CCL algorithm processes slices in +// order (0, 1, 2, ...), at any given slice iz the previous slice (iz-1) is +// always in the other slot, keeping both the current and previous slice data +// available in memory. +// +// Sentinel behavior: +// If iz < 0, slice buffering is disabled (m_UseSliceBuffers = false). The +// CCL algorithm passes iz = -1 after completing the slice-by-slice sweep to +// signal that subsequent calls (e.g., during Phase 1b periodic boundary +// merging) should use direct DataStore access instead of the buffers. +// +// Data loaded per slice: +// - Quaternions (4 float32 per voxel) into m_QuatBuffer +// - Phase IDs (1 int32 per voxel) into m_PhaseBuffer +// - Mask flags (1 uint8 per voxel) into m_MaskBuffer; if masking is disabled, +// all mask values are set to 1 (valid) +// +// Note: Unlike the EBSDSegmentFeatures version, this implementation does not +// include a skip-if-already-loaded check; the slot is always overwritten. +// ----------------------------------------------------------------------------- +void CAxisSegmentFeatures::prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ) +{ + if(iz < 0) + { + m_UseSliceBuffers = false; + return; + } + + int64 slot = iz % 2; + m_BufferedSliceZ[slot] = iz; + + int64 sliceStart = iz * m_BufSliceSize; + int64 bufOffset = slot * m_BufSliceSize; + + // Fill quaternion buffer (4 components per voxel) + for(int64 i = 0; i < m_BufSliceSize; i++) + { + int64 srcIdx = (sliceStart + i) * 4; + int64 dstIdx = (bufOffset + i) * 4; + m_QuatBuffer[dstIdx] = m_QuatsArray->getValue(srcIdx); + m_QuatBuffer[dstIdx + 1] = m_QuatsArray->getValue(srcIdx + 1); + m_QuatBuffer[dstIdx + 2] = m_QuatsArray->getValue(srcIdx + 2); + m_QuatBuffer[dstIdx + 3] = m_QuatsArray->getValue(srcIdx + 3); + } + + // Fill phase buffer + for(int64 i = 0; i < m_BufSliceSize; i++) + { + m_PhaseBuffer[bufOffset + i] = m_CellPhases->getValue(sliceStart + i); + } + + // Fill mask buffer + if(m_InputValues->UseMask && m_GoodVoxelsArray != nullptr) + { + for(int64 i = 0; i < m_BufSliceSize; i++) + { + m_MaskBuffer[bufOffset + i] = m_GoodVoxelsArray->isTrue(sliceStart + i) ? 1 : 0; + } + } + else + { + // If no mask, mark everything as valid + std::fill(m_MaskBuffer.begin() + bufOffset, m_MaskBuffer.begin() + bufOffset + m_BufSliceSize, 1); + } +} diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp index b1d0fe9d88..343a16301e 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/CAxisSegmentFeatures.hpp @@ -49,6 +49,23 @@ class ORIENTATIONANALYSIS_EXPORT CAxisSegmentFeatures : public SegmentFeatures int64 getSeed(int32 gnum, int64 nextSeed) const override; bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + /** + * @brief Checks whether a voxel can participate in C-axis segmentation based on mask and phase. + * @param point Linear voxel index. + * @return true if the voxel passes mask and phase checks. + */ + bool isValidVoxel(int64 point) const override; + + /** + * @brief Determines whether two neighboring voxels belong to the same C-axis segment. + * @param point1 First voxel index. + * @param point2 Second (neighbor) voxel index. + * @return true if both voxels share the same phase and their C-axis misalignment is within tolerance. + */ + bool areNeighborsSimilar(int64 point1, int64 point2) const override; + + void prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ) override; + private: const CAxisSegmentFeaturesInputValues* m_InputValues = nullptr; @@ -56,6 +73,17 @@ class ORIENTATIONANALYSIS_EXPORT CAxisSegmentFeatures : public SegmentFeatures Int32Array* m_CellPhases = nullptr; std::unique_ptr m_GoodVoxelsArray = nullptr; Int32Array* m_FeatureIdsArray = nullptr; + + void allocateSliceBuffers(int64 dimX, int64 dimY); + void deallocateSliceBuffers(); + + // Rolling 2-slot input buffers for OOC optimization. + std::vector m_QuatBuffer; + std::vector m_PhaseBuffer; + std::vector m_MaskBuffer; + int64 m_BufSliceSize = 0; + int64 m_BufferedSliceZ[2] = {-1, -1}; + bool m_UseSliceBuffers = false; }; } // namespace nx::core diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp index 000b1e28b3..3844345130 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.cpp @@ -2,6 +2,9 @@ #include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/IGridGeometry.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" + +#include using namespace nx::core; @@ -17,6 +20,30 @@ EBSDSegmentFeatures::EBSDSegmentFeatures(DataStructure& dataStructure, const IFi // ----------------------------------------------------------------------------- EBSDSegmentFeatures::~EBSDSegmentFeatures() noexcept = default; +// ----------------------------------------------------------------------------- +// Segments an EBSD dataset into crystallographic features (grains) by flood- +// filling contiguous voxels whose crystal orientations are within a user- +// specified misorientation tolerance. Two voxels are grouped into the same +// feature only if they share the same phase and their misorientation (computed +// via the appropriate LaueOps symmetry operator) is below the threshold. +// +// Algorithm dispatch: +// - In-core data -> execute() : classic depth-first-search (DFS) flood fill +// - Out-of-core -> executeCCL() : connected-component labeling that streams +// data slice-by-slice to limit memory usage +// The choice is made by checking IsOutOfCore() on the FeatureIds array (i.e., +// whether the backing DataStore lives on disk) or if ForceOocAlgorithm() is +// set (used for testing). +// +// Post-processing after either algorithm: +// 1. Validate that at least one feature was found (error if not). +// 2. Resize the Feature AttributeMatrix to (m_FoundFeatures + 1) tuples so +// that all per-feature arrays (Active, etc.) have the correct size. +// Index 0 is reserved as an invalid/background feature. +// 3. Initialize the Active array: fill with 1 (active), then set index 0 +// to 0 to mark it as the reserved background slot. +// 4. Optionally randomize FeatureIds so that spatially adjacent grains get +// non-sequential IDs, improving visual contrast in color-mapped renders. // ----------------------------------------------------------------------------- Result<> EBSDSegmentFeatures::operator()() { @@ -43,8 +70,21 @@ Result<> EBSDSegmentFeatures::operator()() m_FeatureIdsArray = m_DataStructure.getDataAs(m_InputValues->FeatureIdsArrayPath); m_FeatureIdsArray->fill(0); // initialize the output array with zeros - // Run the segmentation algorithm - execute(gridGeom); + // Dispatch between DFS (in-core) and CCL (OOC) algorithms + if(IsOutOfCore(*m_FeatureIdsArray) || ForceOocAlgorithm()) + { + SizeVec3 udims = gridGeom->getDimensions(); + allocateSliceBuffers(static_cast(udims[0]), static_cast(udims[1])); + + auto& featureIdsStore = m_FeatureIdsArray->getDataStoreRef(); + executeCCL(gridGeom, featureIdsStore); + + deallocateSliceBuffers(); + } + else + { + execute(gridGeom); + } // Sanity check the result. if(this->m_FoundFeatures < 1) { @@ -72,6 +112,22 @@ Result<> EBSDSegmentFeatures::operator()() return {}; } +// ----------------------------------------------------------------------------- +// Finds the next unassigned voxel that can serve as the seed for a new feature. +// The scan is a simple linear walk starting from `nextSeed`, which is the index +// immediately after the last seed found. This avoids rescanning already-assigned +// voxels at the beginning of the array. +// +// A voxel is eligible to become a seed when all three conditions are met: +// 1. featureId == 0 : the voxel has not yet been assigned to any feature. +// 2. Passes the mask: if masking is enabled, the voxel must be flagged as +// "good" (e.g., not a bad scan point). +// 3. cellPhase > 0 : the voxel belongs to a real crystallographic phase +// (phase 0 is reserved for unindexed/background points). +// +// When a valid seed is found, its featureId is immediately set to `gnum` +// (the new feature number) so that subsequent calls will skip it. +// Returns the linear index of the seed, or -1 if no more seeds exist. // ----------------------------------------------------------------------------- int64_t EBSDSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const { @@ -108,6 +164,26 @@ int64_t EBSDSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const return seed; } +// ----------------------------------------------------------------------------- +// Determines whether a neighboring voxel should be merged into the current +// feature during the DFS flood fill (execute() path). This is NOT used by +// the CCL path, which calls areNeighborsSimilar() instead. +// +// The method checks three conditions before grouping: +// 1. The neighbor's featureId must be 0 (unassigned). +// 2. The neighbor must pass the mask (if masking is enabled). +// 3. The neighbor must be crystallographically similar to the reference voxel. +// +// Similarity check (misorientation): +// - Look up the Laue class for both voxels from their phase -> crystal +// structure mapping. If either Laue class is out of range (>= number of +// known symmetry operators, e.g., phase == 999), bail out immediately. +// - Extract the quaternion orientations (4 floats per voxel) for both points. +// - If both voxels share the same phase, compute the misorientation angle via +// LaueOps::calculateMisorientation(), which returns an axis-angle pair. +// The angle (w, in radians) accounts for crystal symmetry equivalences. +// - If w < MisorientationTolerance, the voxels are considered part of the +// same grain. The neighbor's featureId is set to `gnum` as a side effect. // ----------------------------------------------------------------------------- bool EBSDSegmentFeatures::determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const { @@ -152,3 +228,291 @@ bool EBSDSegmentFeatures::determineGrouping(int64 referencePoint, int64 neighbor return group; } + +// ----------------------------------------------------------------------------- +// Checks whether a single voxel is eligible for segmentation (used by the CCL +// path in executeCCL()). A voxel is valid if it passes the mask and has a +// crystallographic phase > 0. +// +// Slice buffer fast path: +// When m_UseSliceBuffers is true (OOC mode), the method first checks whether +// the voxel's Z-slice is currently loaded in the rolling 2-slot buffer. The +// slot is determined by (iz % 2). If the voxel's slice matches the buffered +// slice index, the mask and phase values are read directly from the in-memory +// m_MaskBuffer and m_PhaseBuffer arrays, avoiding an on-disk I/O round-trip. +// +// OOC fallback: +// If slice buffers are not active, or if the voxel's slice is not currently +// buffered (which can happen during Phase 1b of CCL when periodic boundary +// merging accesses non-adjacent slices), the method falls back to direct +// array access through the DataStore, which may trigger on-disk I/O for +// out-of-core data. +// ----------------------------------------------------------------------------- +bool EBSDSegmentFeatures::isValidVoxel(int64 point) const +{ + if(m_UseSliceBuffers) + { + const int64 iz = point / m_BufSliceSize; + const int slot = static_cast(iz % 2); + if(m_BufferedSliceZ[slot] == iz) + { + const usize sliceSize = static_cast(m_BufSliceSize); + const usize off = static_cast(slot) * sliceSize + static_cast(point - iz * m_BufSliceSize); + if(m_InputValues->UseMask && m_MaskBuffer[off] == 0) + { + return false; + } + if(m_PhaseBuffer[off] <= 0) + { + return false; + } + return true; + } + } + + // OOC fallback + if(m_InputValues->UseMask && !m_GoodVoxelsArray->isTrue(point)) + { + return false; + } + AbstractDataStore& cellPhases = m_CellPhases->getDataStoreRef(); + if(cellPhases[point] <= 0) + { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- +// Determines whether two neighboring voxels are crystallographically similar +// enough to belong to the same feature. Used exclusively by the CCL path +// (executeCCL()), whereas the DFS path uses determineGrouping() instead. +// +// Slice buffer fast path: +// When both voxels' Z-slices are present in the rolling 2-slot buffer, all +// data is read from the in-memory buffers (m_QuatBuffer, m_PhaseBuffer, +// m_MaskBuffer). The buffer offset for each point is computed as: +// slot * sliceSize + (point - iz * sliceSize) +// For quaternions, an additional x4 factor accounts for the 4 components +// per voxel. The method then: +// 1. Checks point2's mask validity. +// 2. Checks that point2's phase > 0 and both phases match. +// 3. Looks up the Laue class and verifies it is in range. +// 4. Constructs QuatD objects from the buffered quaternion components. +// 5. Computes misorientation via LaueOps::calculateMisorientation(). +// 6. Returns true if the misorientation angle < MisorientationTolerance. +// +// OOC fallback: +// If either voxel's slice is not buffered (e.g., during Phase 1b periodic +// merge), falls back to direct DataStore access: validates point2 via +// isValidVoxel(), checks phase equality, then computes misorientation from +// the full quaternion and phase arrays on disk. +// ----------------------------------------------------------------------------- +bool EBSDSegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + if(m_UseSliceBuffers) + { + const int64 iz1 = point1 / m_BufSliceSize; + const int slot1 = static_cast(iz1 % 2); + const int64 iz2 = point2 / m_BufSliceSize; + const int slot2 = static_cast(iz2 % 2); + + if(m_BufferedSliceZ[slot1] == iz1 && m_BufferedSliceZ[slot2] == iz2) + { + const usize sliceSize = static_cast(m_BufSliceSize); + const usize off1 = static_cast(slot1) * sliceSize + static_cast(point1 - iz1 * m_BufSliceSize); + const usize off2 = static_cast(slot2) * sliceSize + static_cast(point2 - iz2 * m_BufSliceSize); + + // Check point2 validity + if(m_InputValues->UseMask && m_MaskBuffer[off2] == 0) + { + return false; + } + const int32 phase1 = m_PhaseBuffer[off1]; + const int32 phase2 = m_PhaseBuffer[off2]; + if(phase2 <= 0) + { + return false; + } + if(phase1 != phase2) + { + return false; + } + + int32 laueClass = (*m_CrystalStructures)[phase1]; + if(static_cast(laueClass) >= m_OrientationOps.size()) + { + return false; + } + + const usize q1Base = static_cast(slot1) * sliceSize * 4 + static_cast(point1 - iz1 * m_BufSliceSize) * 4; + const usize q2Base = static_cast(slot2) * sliceSize * 4 + static_cast(point2 - iz2 * m_BufSliceSize) * 4; + + const ebsdlib::QuatD q1(m_QuatBuffer[q1Base], m_QuatBuffer[q1Base + 1], m_QuatBuffer[q1Base + 2], m_QuatBuffer[q1Base + 3]); + const ebsdlib::QuatD q2(m_QuatBuffer[q2Base], m_QuatBuffer[q2Base + 1], m_QuatBuffer[q2Base + 2], m_QuatBuffer[q2Base + 3]); + + ebsdlib::AxisAngleDType axisAngle = m_OrientationOps[laueClass]->calculateMisorientation(q1, q2); + float w = static_cast(axisAngle[3]); + + return w < m_InputValues->MisorientationTolerance; + } + } + + // OOC fallback (original code) + if(!isValidVoxel(point2)) + { + return false; + } + + AbstractDataStore& cellPhases = m_CellPhases->getDataStoreRef(); + + if(cellPhases[point1] != cellPhases[point2]) + { + return false; + } + + int32 laueClass = (*m_CrystalStructures)[cellPhases[point1]]; + if(static_cast(laueClass) >= m_OrientationOps.size()) + { + return false; + } + + Float32Array& quats = *m_QuatsArray; + const ebsdlib::QuatD q1(quats[point1 * 4], quats[point1 * 4 + 1], quats[point1 * 4 + 2], quats[point1 * 4 + 3]); + const ebsdlib::QuatD q2(quats[point2 * 4], quats[point2 * 4 + 1], quats[point2 * 4 + 2], quats[point2 * 4 + 3]); + + ebsdlib::AxisAngleDType axisAngle = m_OrientationOps[laueClass]->calculateMisorientation(q1, q2); + float w = static_cast(axisAngle[3]); + + return w < m_InputValues->MisorientationTolerance; +} + +// ----------------------------------------------------------------------------- +// Allocates the rolling 2-slot slice buffers used by the CCL (OOC) algorithm. +// Called once at the start of the OOC branch in operator(), before executeCCL(). +// +// Each slot holds one full XY slice (dimX * dimY voxels). Two slots are needed +// because the CCL algorithm compares the current slice (iz) with the previous +// slice (iz-1), so both must be in memory simultaneously. +// +// Buffers allocated: +// - m_QuatBuffer : 2 * sliceSize * 4 floats (quaternion: 4 components/voxel) +// - m_PhaseBuffer : 2 * sliceSize int32 values (one phase ID per voxel) +// - m_MaskBuffer : 2 * sliceSize uint8 values (one mask flag per voxel) +// +// Both m_BufferedSliceZ slots are initialized to -1 (no slice loaded). +// m_UseSliceBuffers is set to true so that isValidVoxel() and +// areNeighborsSimilar() will use the fast buffer path. +// ----------------------------------------------------------------------------- +void EBSDSegmentFeatures::allocateSliceBuffers(int64 dimX, int64 dimY) +{ + m_BufSliceSize = dimX * dimY; + const usize sliceSize = static_cast(m_BufSliceSize); + m_QuatBuffer.resize(2 * sliceSize * 4); + m_PhaseBuffer.resize(2 * sliceSize); + m_MaskBuffer.resize(2 * sliceSize); + m_BufferedSliceZ[0] = -1; + m_BufferedSliceZ[1] = -1; + m_UseSliceBuffers = true; +} + +// ----------------------------------------------------------------------------- +// Releases the slice buffers after executeCCL() completes, freeing the memory +// back to the system. Called in the OOC branch of operator() after the CCL +// algorithm finishes. Resets m_UseSliceBuffers to false and both +// m_BufferedSliceZ slots to -1. The vectors are replaced with default- +// constructed (empty) instances to guarantee memory deallocation. +// ----------------------------------------------------------------------------- +void EBSDSegmentFeatures::deallocateSliceBuffers() +{ + m_UseSliceBuffers = false; + m_QuatBuffer = std::vector(); + m_PhaseBuffer = std::vector(); + m_MaskBuffer = std::vector(); + m_BufferedSliceZ[0] = -1; + m_BufferedSliceZ[1] = -1; +} + +// ----------------------------------------------------------------------------- +// Pre-loads voxel data for a single Z-slice into the rolling 2-slot buffer, +// called by executeCCL() before processing each slice. +// +// Rolling buffer design: +// The target slot is determined by (iz % 2), so even slices go to slot 0 and +// odd slices go to slot 1. Because the CCL algorithm processes slices in +// order (0, 1, 2, ...), at any given slice iz the previous slice (iz-1) is +// always in the other slot, keeping both the current and previous slice data +// available in memory. +// +// Sentinel behavior: +// If iz < 0, slice buffering is disabled (m_UseSliceBuffers = false). The +// CCL algorithm passes iz = -1 after completing the slice-by-slice sweep to +// signal that subsequent calls (e.g., during Phase 1b periodic boundary +// merging) should use direct DataStore access instead of the buffers. +// +// Skip-if-already-loaded: +// If m_BufferedSliceZ[slot] == iz, the data for this slice is already in the +// buffer (e.g., from a previous prepareForSlice call), so the method returns +// immediately without re-reading. +// +// Data loaded per slice: +// - Quaternions (4 float32 per voxel) into m_QuatBuffer +// - Phase IDs (1 int32 per voxel) into m_PhaseBuffer +// - Mask flags (1 uint8 per voxel) into m_MaskBuffer; if masking is disabled, +// all mask values are set to 1 (valid) +// ----------------------------------------------------------------------------- +void EBSDSegmentFeatures::prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ) +{ + if(iz < 0) + { + m_UseSliceBuffers = false; + return; + } + + if(!m_UseSliceBuffers) + { + return; + } + + const int slot = static_cast(iz % 2); + if(m_BufferedSliceZ[slot] == iz) + { + return; + } + + const usize sliceSize = static_cast(m_BufSliceSize); + const usize slotOffset = static_cast(slot) * sliceSize; + const usize quatSlotOffset = slotOffset * 4; + const int64 baseIndex = iz * m_BufSliceSize; + + AbstractDataStore& quatStore = m_QuatsArray->getDataStoreRef(); + for(usize i = 0; i < sliceSize; i++) + { + const usize srcBase = static_cast(baseIndex + static_cast(i)) * 4; + const usize dstBase = quatSlotOffset + i * 4; + m_QuatBuffer[dstBase + 0] = quatStore[srcBase + 0]; + m_QuatBuffer[dstBase + 1] = quatStore[srcBase + 1]; + m_QuatBuffer[dstBase + 2] = quatStore[srcBase + 2]; + m_QuatBuffer[dstBase + 3] = quatStore[srcBase + 3]; + } + + AbstractDataStore& phaseStore = m_CellPhases->getDataStoreRef(); + for(usize i = 0; i < sliceSize; i++) + { + m_PhaseBuffer[slotOffset + i] = phaseStore[static_cast(baseIndex) + i]; + } + + if(m_InputValues->UseMask && m_GoodVoxelsArray != nullptr) + { + for(usize i = 0; i < sliceSize; i++) + { + m_MaskBuffer[slotOffset + i] = m_GoodVoxelsArray->isTrue(static_cast(baseIndex) + i) ? 1 : 0; + } + } + else + { + std::fill(m_MaskBuffer.begin() + slotOffset, m_MaskBuffer.begin() + slotOffset + sliceSize, static_cast(1)); + } + + m_BufferedSliceZ[slot] = iz; +} diff --git a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp index d1db6caada..da9a55ea57 100644 --- a/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp +++ b/src/Plugins/OrientationAnalysis/src/OrientationAnalysis/Filters/Algorithms/EBSDSegmentFeatures.hpp @@ -56,26 +56,25 @@ class ORIENTATIONANALYSIS_EXPORT EBSDSegmentFeatures : public SegmentFeatures Result<> operator()(); protected: + int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + /** - * @brief - * @param data - * @param args - * @param gnum - * @param nextSeed - * @return int64 + * @brief Checks whether a voxel can participate in EBSD segmentation based on mask and phase. + * @param point Linear voxel index. + * @return true if the voxel passes mask and phase checks. */ - int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool isValidVoxel(int64 point) const override; /** - * @brief - * @param data - * @param args - * @param referencepoint - * @param neighborpoint - * @param gnum - * @return bool + * @brief Determines whether two neighboring voxels belong to the same EBSD segment. + * @param point1 First voxel index. + * @param point2 Second (neighbor) voxel index. + * @return true if both voxels share the same phase and their misorientation is within tolerance. */ - bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + bool areNeighborsSimilar(int64 point1, int64 point2) const override; + + void prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ) override; private: const EBSDSegmentFeaturesInputValues* m_InputValues = nullptr; @@ -87,6 +86,19 @@ class ORIENTATIONANALYSIS_EXPORT EBSDSegmentFeatures : public SegmentFeatures FeatureIdsArrayType* m_FeatureIdsArray = nullptr; std::vector m_OrientationOps; + + void allocateSliceBuffers(int64 dimX, int64 dimY); + void deallocateSliceBuffers(); + + // Rolling 2-slot input buffers for OOC optimization. + // Pre-loading input data into these avoids per-element OOC overhead + // during neighbor comparisons in the CCL algorithm. + std::vector m_QuatBuffer; + std::vector m_PhaseBuffer; + std::vector m_MaskBuffer; + int64 m_BufSliceSize = 0; + int64 m_BufferedSliceZ[2] = {-1, -1}; + bool m_UseSliceBuffers = false; }; } // namespace nx::core diff --git a/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp b/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp index 5a6e1a4681..16977e8f3d 100644 --- a/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp +++ b/src/Plugins/OrientationAnalysis/test/CAxisSegmentFeaturesTest.cpp @@ -2,257 +2,377 @@ #include "OrientationAnalysis/Filters/CAxisSegmentFeaturesFilter.hpp" #include "OrientationAnalysis/OrientationAnalysis_test_dirs.hpp" -#include "OrientationAnalysisTestUtils.hpp" -#include "simplnx/Core/Application.hpp" -#include "simplnx/Parameters/ArrayCreationParameter.hpp" -#include "simplnx/Parameters/Dream3dImportParameter.hpp" -#include "simplnx/Parameters/GeometrySelectionParameter.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Parameters/ChoicesParameter.hpp" +#include "simplnx/UnitTest/SegmentFeaturesTestUtils.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" -#include +#include namespace fs = std::filesystem; using namespace nx::core; -using namespace nx::core::Constants; -namespace caxis_segment_features_constants +using namespace nx::core::UnitTest; + +namespace +{ +// Exemplar archive (shared across Scalar, EBSD, CAxis) +const std::string k_ArchiveName = "segment_features_exemplars.tar.gz"; +const std::string k_DataDirName = "segment_features_exemplars"; +const fs::path k_DataDir = fs::path(unit_test::k_TestFilesDir.view()) / k_DataDirName; +const fs::path k_SmallExemplarFile = k_DataDir / "caxis_small.dream3d"; +const fs::path k_LargeExemplarFile = k_DataDir / "caxis_large.dream3d"; + +// Geometry names +constexpr StringLiteral k_GeomName = "DataContainer"; +constexpr StringLiteral k_CellDataName = "CellData"; +constexpr StringLiteral k_FeatureDataName = "CellFeatureData"; +constexpr StringLiteral k_EnsembleName = "CellEnsembleData"; + +// Output array paths +const DataPath k_GeomPath({k_GeomName}); +const DataPath k_FeatureIdsPath({k_GeomName, k_CellDataName, "FeatureIds"}); +const DataPath k_ActivePath({k_GeomName, k_FeatureDataName, "Active"}); +const DataPath k_MaskPath({k_GeomName, k_CellDataName, "Mask"}); +const DataPath k_QuatsPath({k_GeomName, k_CellDataName, "Quats"}); +const DataPath k_PhasesPath({k_GeomName, k_CellDataName, "Phases"}); +const DataPath k_CrystalStructuresPath({k_GeomName, k_EnsembleName, "CrystalStructures"}); + +// Test dimensions +constexpr usize k_SmallDim = 15; +constexpr usize k_SmallBlockSize = 5; +constexpr usize k_LargeDim = 200; +constexpr usize k_LargeBlockSize = 25; + +/** + * @brief Populates CAxisSegmentFeaturesFilter arguments. + */ +void SetupArgs(Arguments& args, bool useMask, float32 tolerance = 5.0f, ChoicesParameter::ValueType neighborScheme = 0, bool randomize = false) { -inline constexpr StringLiteral k_InputGeometryName = "DataContainer"; -inline const DataPath k_InputGeometryPath({k_InputGeometryName}); -inline constexpr StringLiteral k_CellDataName = "CellData"; -inline constexpr StringLiteral k_EnsembleName = "CellEnsembleData"; -inline const DataPath k_QuatsArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Quats"); -inline const DataPath k_PhasesArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Phases"); -inline const DataPath k_MaskArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Mask (Y Pos)"); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(tolerance)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborScheme)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(useMask)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(useMask ? k_MaskPath : DataPath{})); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(k_GeomPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(k_QuatsPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(k_PhasesPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(k_CrystalStructuresPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(std::string(k_FeatureDataName))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(randomize)); +} +} // namespace -inline const DataPath k_CrystalStructuresArrayPath = k_InputGeometryPath.createChildPath(k_EnsembleName).createChildPath("CrystalStructures"); +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: Small Correctness", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") +{ + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // Quats float32 4-comp => 15*15*4*4 = 3,600 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 3600, true); -inline const DataPath k_ActivesArrayPath = k_InputGeometryPath.createChildPath(k_Grain_Data).createChildPath(k_ActiveName); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_SmallExemplarFile); -inline const DataPath k_FeatureIdsArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath(k_FeatureIds); + std::string testName = GENERATE("Base", "Masked"); + DYNAMIC_SECTION("Variant: " << testName) + { + const bool useMask = (testName == "Masked"); + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; -inline const DataPath k_FeatureIdsFacePath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("CAxis_FeatureIds_Face"); -inline const DataPath k_FeatureIdsAllPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("CAxis_FeatureIds_All"); -inline const DataPath k_FeatureIdsMaskFacePath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("CAxis_FeatureIds_Mask_Face"); -inline const DataPath k_FeatureIdsMaskAllPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("CAxis_FeatureIds_Mask_All"); -} // namespace caxis_segment_features_constants + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 0, k_SmallBlockSize); // Hexagonal_High -TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:Face", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") -{ - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + if(useMask) + { + BuildSphericalMask(dataStructure, cellShape, am->getId()); + } - // EBSD Segment Features/Semgent Features (Misorientation) Filter - { CAxisSegmentFeaturesFilter filter; Arguments args; + SetupArgs(args, useMask); - // Create default Parameters for the filter. - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); - - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(caxis_segment_features_constants::k_MaskArrayPath)); - - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(caxis_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(caxis_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(caxis_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(caxis_segment_features_constants::k_CrystalStructuresArrayPath)); - - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); - - // Preflight the filter and check result auto preflightResult = filter.preflight(dataStructure, args); SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); - - // Execute the filter and check the result auto executeResult = filter.execute(dataStructure, args); SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } - { - UInt8Array& actives = dataStructure.getDataRefAs(caxis_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 57); - } + // Compare against exemplar + const std::string exemplarGeomName = testName + "_Exemplar"; + const DataPath exemplarFeatureIdsPath({exemplarGeomName, std::string(k_CellDataName), "FeatureIds"}); + const DataPath exemplarActivePath({exemplarGeomName, std::string(k_FeatureDataName), "Active"}); - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group - { - const auto& generatedDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsFacePath); + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(k_FeatureIdsPath)); - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); - } + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarActivePath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarActivePath), dataStructure.getDataRefAs(k_ActivePath)); - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); + UnitTest::CheckArraysInheritTupleDims(dataStructure); + } } -TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:All", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: 200x200x200 Large OOC", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // Quats float32 4-comp => 200*200*4*4 = 640,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 640000, true); - // EBSD Segment Features/Semgent Features (Misorientation) Filter - { - CAxisSegmentFeaturesFilter filter; - Arguments args; + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_LargeExemplarFile); - // Create default Parameters for the filter. - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(1)); + const ShapeType cellShape = {k_LargeDim, k_LargeDim, k_LargeDim}; + const std::array dims = {k_LargeDim, k_LargeDim, k_LargeDim}; - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(caxis_segment_features_constants::k_MaskArrayPath)); + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 0, k_LargeBlockSize); // Hexagonal_High + BuildSphericalMask(dataStructure, cellShape, am->getId()); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(caxis_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(caxis_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(caxis_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(caxis_segment_features_constants::k_CrystalStructuresArrayPath)); + CAxisSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/true); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + const DataPath exemplarFeatureIdsPath({"DataContainer_Exemplar", std::string(k_CellDataName), "FeatureIds"}); + const DataPath exemplarActivePath({"DataContainer_Exemplar", std::string(k_FeatureDataName), "Active"}); - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(k_FeatureIdsPath)); - { - UInt8Array& actives = dataStructure.getDataRefAs(caxis_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 37); - } - - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group - { - const auto& generatedDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsAllPath); - - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); - } + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarActivePath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarActivePath), dataStructure.getDataRefAs(k_ActivePath)); - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); + UnitTest::CheckArraysInheritTupleDims(dataStructure); } -TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:MaskFace", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: No Valid Voxels Returns Error", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + UnitTest::LoadPlugins(); - // EBSD Segment Features/Semgent Features (Misorientation) Filter - { - CAxisSegmentFeaturesFilter filter; - Arguments args; + RunNoValidVoxelsErrorTest([](Arguments& args, DataStructure& ds, const DataPath& geomPath, const DataPath& cellDataPath, const DataPath& maskPath) { + const ShapeType cellShape = {3, 3, 3}; + auto& am = ds.getDataRefAs(cellDataPath); + auto& geom = ds.getDataRefAs(geomPath); + BuildOrientationTestData(ds, cellShape, geom.getId(), am.getId(), 0, 3); // Hexagonal_High - // Create default Parameters for the filter. args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(true)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(caxis_segment_features_constants::k_MaskArrayPath)); - - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(caxis_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(caxis_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(caxis_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(caxis_segment_features_constants::k_CrystalStructuresArrayPath)); - - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(maskPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(geomPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(cellDataPath.createChildPath("Quats"))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(cellDataPath.createChildPath("Phases"))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(DataPath({"Geom", "CellEnsembleData", "CrystalStructures"}))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any("Grain Data")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + }); +} - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); - - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } - +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: Randomize Feature IDs", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") +{ + UnitTest::LoadPlugins(); + + constexpr usize k_ExpectedFeatures = 3; // 3 Z-layers with 1 merge-pair pillar + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; + + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 0, k_SmallBlockSize); // Hexagonal_High + + CAxisSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/false, /*tolerance=*/5.0f, /*neighborScheme=*/0, /*randomize=*/true); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + const auto& actives = dataStructure.getDataRefAs(k_ActivePath); + REQUIRE(actives.getNumberOfTuples() == k_ExpectedFeatures + 1); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + const auto& featureIds = dataStructure.getDataRefAs(k_FeatureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + std::set uniqueIds; + int32 minId = std::numeric_limits::max(); + int32 maxId = std::numeric_limits::min(); + for(usize i = 0; i < featureStore.getNumberOfTuples(); i++) { - UInt8Array& actives = dataStructure.getDataRefAs(caxis_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 31); + int32 fid = featureStore.getValue(i); + uniqueIds.insert(fid); + minId = std::min(minId, fid); + maxId = std::max(maxId, fid); } + REQUIRE(minId == 1); + REQUIRE(maxId == static_cast(k_ExpectedFeatures)); + REQUIRE(uniqueIds.size() == k_ExpectedFeatures); +} - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: High Tolerance Merges All", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") +{ + UnitTest::LoadPlugins(); + + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; + + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 0, k_SmallBlockSize); // Hexagonal_High + + CAxisSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/false, /*tolerance=*/90.0f); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + // With tolerance=90 degrees, all C-axis directions on the hemisphere merge into 1 feature + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + const auto& actives = dataStructure.getDataRefAs(k_ActivePath); + REQUIRE(actives.getNumberOfTuples() == 2); // 1 feature + index 0 + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + const auto& featureIds = dataStructure.getDataRefAs(k_FeatureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + for(usize i = 0; i < featureStore.getNumberOfTuples(); i++) { - const auto& generatedDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsMaskFacePath); - - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); + REQUIRE(featureStore.getValue(i) == 1); } - - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); } -TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures:MaskAll", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: FaceEdgeVertex Connectivity", "[OrientationAnalysis][CAxisSegmentFeaturesFilter]") { - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + UnitTest::LoadPlugins(); + + constexpr float32 k_DegToRad = 3.14159265358979323846f / 180.0f; + + auto setupCAxis = [&](Arguments& args, DataStructure& ds, const DataPath& geomPath, const DataPath& cellDataPath, ChoicesParameter::ValueType neighborScheme) { + const ShapeType cellShape = {3, 3, 3}; + auto& am = ds.getDataRefAs(cellDataPath); + auto& geom = ds.getDataRefAs(geomPath); + + // Quaternions: background = 60° X-rotation, pairs = identity and 30° (EBSDlib order: x,y,z,w) + const float32 bgHalf = 60.0f * k_DegToRad * 0.5f; + auto quatsDS = DataStoreUtilities::CreateDataStore(cellShape, {4}, IDataAction::Mode::Execute); + auto* quatsArr = DataArray::Create(ds, "Quats", quatsDS, am.getId()); + auto& quatsStore = quatsArr->getDataStoreRef(); + for(usize i = 0; i < 27; i++) + { + quatsStore[i * 4 + 0] = std::sin(bgHalf); + quatsStore[i * 4 + 1] = 0.0f; + quatsStore[i * 4 + 2] = 0.0f; + quatsStore[i * 4 + 3] = std::cos(bgHalf); + } + for(usize idx : {static_cast(0), static_cast(1 * 9 + 1 * 3 + 1)}) + { + quatsStore[idx * 4 + 0] = 0.0f; + quatsStore[idx * 4 + 1] = 0.0f; + quatsStore[idx * 4 + 2] = 0.0f; + quatsStore[idx * 4 + 3] = 1.0f; + } + const float32 pairHalf = 30.0f * k_DegToRad * 0.5f; + for(usize idx : {static_cast(0 * 9 + 0 * 3 + 2), static_cast(1 * 9 + 1 * 3 + 2)}) + { + quatsStore[idx * 4 + 0] = std::sin(pairHalf); + quatsStore[idx * 4 + 1] = 0.0f; + quatsStore[idx * 4 + 2] = 0.0f; + quatsStore[idx * 4 + 3] = std::cos(pairHalf); + } + + auto phasesDS = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* phasesArr = DataArray::Create(ds, "Phases", phasesDS, am.getId()); + phasesArr->fill(1); + + const ShapeType ensShape = {2}; + auto* ensAM = AttributeMatrix::Create(ds, "CellEnsembleData", ensShape, geom.getId()); + auto crystDS = DataStoreUtilities::CreateDataStore(ensShape, {1}, IDataAction::Mode::Execute); + auto* crystArr = DataArray::Create(ds, "CrystalStructures", crystDS, ensAM->getId()); + auto& crystStore = crystArr->getDataStoreRef(); + crystStore[0] = 999; + crystStore[1] = 0; // Hexagonal_High + + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0f)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborScheme)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(geomPath)); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(cellDataPath.createChildPath("Quats"))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(cellDataPath.createChildPath("Phases"))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(DataPath({"Geom", "CellEnsembleData", "CrystalStructures"}))); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any("CellFeatureData")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + }; - // EBSD Segment Features/Semgent Features (Misorientation) Filter - { - CAxisSegmentFeaturesFilter filter; - Arguments args; + RunFaceEdgeVertexConnectivityTest([&](Arguments& args, DataStructure& ds, const DataPath& gp, const DataPath& cp) { setupCAxis(args, ds, gp, cp, 0); }, + [&](Arguments& args, DataStructure& ds, const DataPath& gp, const DataPath& cp) { setupCAxis(args, ds, gp, cp, 1); }); +} - // Create default Parameters for the filter. - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(1)); +TEST_CASE("OrientationAnalysis::CAxisSegmentFeatures: Generate Test Data", "[OrientationAnalysis][CAxisSegmentFeaturesFilter][.GenerateTestData]") +{ + UnitTest::LoadPlugins(); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_UseMask_Key, std::make_any(true)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(caxis_segment_features_constants::k_MaskArrayPath)); + const auto outputDir = fs::path(fmt::format("{}/generated_test_data/caxis_segment_features", unit_test::k_BinaryTestOutputDir)); + fs::create_directories(outputDir); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(caxis_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(caxis_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(caxis_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(caxis_segment_features_constants::k_CrystalStructuresArrayPath)); + // Small input data (15^3) — one geometry per test variant + { + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(CAxisSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + DataStructure ds; - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto* amBase = BuildSegmentFeaturesTestGeometry(ds, dims, "Base", std::string(k_CellDataName)); + auto& geomBase = ds.getDataRefAs(DataPath({"Base"})); + BuildOrientationTestData(ds, cellShape, geomBase.getId(), amBase->getId(), 0, k_SmallBlockSize); // Hexagonal_High - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } + auto* amMasked = BuildSegmentFeaturesTestGeometry(ds, dims, "Masked", std::string(k_CellDataName)); + auto& geomMasked = ds.getDataRefAs(DataPath({"Masked"})); + BuildOrientationTestData(ds, cellShape, geomMasked.getId(), amMasked->getId(), 0, k_SmallBlockSize); + BuildSphericalMask(ds, cellShape, amMasked->getId()); - { - UInt8Array& actives = dataStructure.getDataRefAs(caxis_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 25); + UnitTest::WriteTestDataStructure(ds, outputDir / "small_input.dream3d"); } - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group + // Large input data (200^3) — mask=true { - const auto& generatedDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(caxis_segment_features_constants::k_FeatureIdsMaskAllPath); + const ShapeType cellShape = {k_LargeDim, k_LargeDim, k_LargeDim}; + const std::array dims = {k_LargeDim, k_LargeDim, k_LargeDim}; - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); - } + DataStructure ds; + auto* am = BuildSegmentFeaturesTestGeometry(ds, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = ds.getDataRefAs(k_GeomPath); + BuildOrientationTestData(ds, cellShape, geom.getId(), am->getId(), 0, k_LargeBlockSize); // Hexagonal_High + BuildSphericalMask(ds, cellShape, am->getId()); - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); + UnitTest::WriteTestDataStructure(ds, outputDir / "large_input.dream3d"); + } } diff --git a/src/Plugins/OrientationAnalysis/test/CMakeLists.txt b/src/Plugins/OrientationAnalysis/test/CMakeLists.txt index 5ba060ddbd..68ce258546 100644 --- a/src/Plugins/OrientationAnalysis/test/CMakeLists.txt +++ b/src/Plugins/OrientationAnalysis/test/CMakeLists.txt @@ -158,6 +158,7 @@ if(EXISTS "${DREAM3D_DATA_DIR}" AND SIMPLNX_DOWNLOAD_TEST_FILES) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME write_stats_gen_odf_angle_file.tar.gz SHA512 be3f663aae1f78e5b789200421534ed9fe293187ec3514796ac8177128b34ded18bb9a98b8e838bb283f9818ac30dc4b19ec379bdd581b1a98eb36d967cdd319) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME 6_5_MergeTwins.tar.gz SHA512 756da6b9a2fdc6c7f1cf611243b889b8da0bdc172c1cd184f81672c3cdf651f1f450aecff2e2e0c9b1fa367735ca1df26436d88fa342cea1825b4e5665aa7dfd) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME compute_feature_reference_misorientation.tar.gz SHA512 6ea9c04ca5b0c0439573b5a14bda63592181c6badb4dd325b542fb97ff2a5d492e83d2bac1bf5999612cbdb7697ec48e321549427470f1f23ccd37921c6a95f1) + download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME segment_features_exemplars.tar.gz SHA512 004fdccf1d2af6dbea8690a9213cf485ddd8c3afb90416895de7d8f52c51f4c2e4b73edb335b01572934ad4d2ff188edc047b3a64f76beff8d280e55ef08e2ab) endif() diff --git a/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp b/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp index 3765e2f6f1..6d7f65058f 100644 --- a/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp +++ b/src/Plugins/OrientationAnalysis/test/EBSDSegmentFeaturesFilterTest.cpp @@ -2,268 +2,391 @@ #include "OrientationAnalysis/Filters/EBSDSegmentFeaturesFilter.hpp" #include "OrientationAnalysis/OrientationAnalysis_test_dirs.hpp" -#include "OrientationAnalysisTestUtils.hpp" -#include "simplnx/Core/Application.hpp" -#include "simplnx/Parameters/ArrayCreationParameter.hpp" -#include "simplnx/Parameters/Dream3dImportParameter.hpp" -#include "simplnx/Parameters/GeometrySelectionParameter.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Parameters/ChoicesParameter.hpp" +#include "simplnx/UnitTest/SegmentFeaturesTestUtils.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" -#include - -#include +#include namespace fs = std::filesystem; using namespace nx::core; -using namespace nx::core::Constants; +using namespace nx::core::UnitTest; -namespace ebsd_segment_features_constants +namespace { -inline constexpr StringLiteral k_InputGeometryName = "DataContainer"; -inline const DataPath k_InputGeometryPath({k_InputGeometryName}); -inline constexpr StringLiteral k_CellDataName = "CellData"; -inline constexpr StringLiteral k_EnsembleName = "CellEnsembleData"; -inline const DataPath k_QuatsArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Quats"); -inline const DataPath k_PhasesArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Phases"); -inline const DataPath k_MaskArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Mask (Y Pos)"); - -inline const DataPath k_CrystalStructuresArrayPath = k_InputGeometryPath.createChildPath(k_EnsembleName).createChildPath("CrystalStructures"); - -inline const DataPath k_ActivesArrayPath = k_InputGeometryPath.createChildPath(k_Grain_Data).createChildPath(k_ActiveName); - -inline const DataPath k_FeatureIdsArrayPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath(k_FeatureIds); - -inline const DataPath k_FeatureIdsFacePath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Ebsd_FeatureIds_Face"); -inline const DataPath k_FeatureIdsAllPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Ebsd_FeatureIds_All"); -inline const DataPath k_FeatureIdsMaskFacePath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Ebsd_FeatureIds_Mask_Face"); -inline const DataPath k_FeatureIdsMaskAllPath = k_InputGeometryPath.createChildPath(k_CellDataName).createChildPath("Ebsd_FeatureIds_Mask_All"); -} // namespace ebsd_segment_features_constants +// Exemplar archive (shared across Scalar, EBSD, CAxis) +const std::string k_ArchiveName = "segment_features_exemplars.tar.gz"; +const std::string k_DataDirName = "segment_features_exemplars"; +const fs::path k_DataDir = fs::path(unit_test::k_TestFilesDir.view()) / k_DataDirName; +const fs::path k_SmallExemplarFile = k_DataDir / "ebsd_small.dream3d"; +const fs::path k_LargeExemplarFile = k_DataDir / "ebsd_large.dream3d"; + +// Geometry names +constexpr StringLiteral k_GeomName = "DataContainer"; +constexpr StringLiteral k_CellDataName = "CellData"; +constexpr StringLiteral k_FeatureDataName = "CellFeatureData"; +constexpr StringLiteral k_EnsembleName = "CellEnsembleData"; + +// Output array paths +const DataPath k_GeomPath({k_GeomName}); +const DataPath k_FeatureIdsPath({k_GeomName, k_CellDataName, "FeatureIds"}); +const DataPath k_ActivePath({k_GeomName, k_FeatureDataName, "Active"}); +const DataPath k_MaskPath({k_GeomName, k_CellDataName, "Mask"}); +const DataPath k_QuatsPath({k_GeomName, k_CellDataName, "Quats"}); +const DataPath k_PhasesPath({k_GeomName, k_CellDataName, "Phases"}); +const DataPath k_CrystalStructuresPath({k_GeomName, k_EnsembleName, "CrystalStructures"}); + +// Test dimensions +constexpr usize k_SmallDim = 15; +constexpr usize k_SmallBlockSize = 5; +constexpr usize k_LargeDim = 200; +constexpr usize k_LargeBlockSize = 25; + +/** + * @brief Populates EBSDSegmentFeaturesFilter arguments. + */ +void SetupArgs(Arguments& args, bool useMask, bool isPeriodic = false, float32 tolerance = 5.0f, ChoicesParameter::ValueType neighborScheme = 0, bool randomize = false) +{ + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(tolerance)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborScheme)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(useMask)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(useMask ? k_MaskPath : DataPath{})); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_IsPeriodic_Key, std::make_any(isPeriodic)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(k_GeomPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(k_QuatsPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(k_PhasesPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(k_CrystalStructuresPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(std::string(k_FeatureDataName))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(randomize)); +} +} // namespace -TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:Face", "[OrientationAnalysis][EBSDSegmentFeatures]") +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: Small Correctness", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // Quats float32 4-comp => 15*15*4*4 = 3,600 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 3600, true); - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_SmallExemplarFile); - // EBSD Segment Features/Semgent Features (Misorientation) Filter + std::string testName = GENERATE("Base", "Masked", "Periodic"); + DYNAMIC_SECTION("Variant: " << testName) { - EBSDSegmentFeaturesFilter filter; - Arguments args; + const bool useMask = (testName == "Masked"); + const bool isPeriodic = (testName == "Periodic"); + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; - // Create default Parameters for the filter. - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 1, k_SmallBlockSize, isPeriodic); // Cubic_High - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_MaskArrayPath)); - - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(ebsd_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_CrystalStructuresArrayPath)); + if(useMask) + { + BuildSphericalMask(dataStructure, cellShape, am->getId()); + } - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + EBSDSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, useMask, isPeriodic); - // Preflight the filter and check result auto preflightResult = filter.preflight(dataStructure, args); SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); - - // Execute the filter and check the result auto executeResult = filter.execute(dataStructure, args); SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } - { - UInt8Array& actives = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 83); - } + // Compare against exemplar + const std::string exemplarGeomName = testName + "_Exemplar"; + const DataPath exemplarFeatureIdsPath({exemplarGeomName, std::string(k_CellDataName), "FeatureIds"}); + const DataPath exemplarActivePath({exemplarGeomName, std::string(k_FeatureDataName), "Active"}); - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group - { - const auto& generatedDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsFacePath); + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(k_FeatureIdsPath)); - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); - } + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarActivePath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarActivePath), dataStructure.getDataRefAs(k_ActivePath)); - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); + UnitTest::CheckArraysInheritTupleDims(dataStructure); + } } -TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:All", "[OrientationAnalysis][EBSDSegmentFeatures]") +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: 200x200x200 Large OOC", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // Quats float32 4-comp => 200*200*4*4 = 640,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 640000, true); - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_LargeExemplarFile); - // EBSD Segment Features/Semgent Features (Misorientation) Filter - { - EBSDSegmentFeaturesFilter filter; - Arguments args; + const ShapeType cellShape = {k_LargeDim, k_LargeDim, k_LargeDim}; + const std::array dims = {k_LargeDim, k_LargeDim, k_LargeDim}; - // Create default Parameters for the filter. - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(1)); + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 1, k_LargeBlockSize, true); // Cubic_High, wrapBoundary - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_MaskArrayPath)); + EBSDSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/false, /*isPeriodic=*/true); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(ebsd_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_CrystalStructuresArrayPath)); + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + const DataPath exemplarFeatureIdsPath({"DataContainer_Exemplar", std::string(k_CellDataName), "FeatureIds"}); + const DataPath exemplarActivePath({"DataContainer_Exemplar", std::string(k_FeatureDataName), "Active"}); - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(k_FeatureIdsPath)); - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarActivePath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarActivePath), dataStructure.getDataRefAs(k_ActivePath)); - { - UInt8Array& actives = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 77); - } + UnitTest::CheckArraysInheritTupleDims(dataStructure); +} - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group - { - const auto& generatedDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsAllPath); +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: No Valid Voxels Returns Error", "[OrientationAnalysis][EBSDSegmentFeatures]") +{ + UnitTest::LoadPlugins(); - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); - } + RunNoValidVoxelsErrorTest([](Arguments& args, DataStructure& ds, const DataPath& geomPath, const DataPath& cellDataPath, const DataPath& maskPath) { + const ShapeType cellShape = {3, 3, 3}; + auto& am = ds.getDataRefAs(cellDataPath); + auto& geom = ds.getDataRefAs(geomPath); + BuildOrientationTestData(ds, cellShape, geom.getId(), am.getId(), 1, 3); // Cubic_High - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(true)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(maskPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_IsPeriodic_Key, std::make_any(false)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(geomPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(cellDataPath.createChildPath("Quats"))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(cellDataPath.createChildPath("Phases"))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(DataPath({"Geom", "CellEnsembleData", "CrystalStructures"}))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any("Grain Data")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + }); } -TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:MaskFace", "[OrientationAnalysis][EBSDSegmentFeatures]") +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: Randomize Feature IDs", "[OrientationAnalysis][EBSDSegmentFeatures]") { UnitTest::LoadPlugins(); - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); - - // EBSD Segment Features/Semgent Features (Misorientation) Filter + constexpr usize k_ExpectedFeatures = 3; // 3 Z-layers with 1 merge-pair pillar + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; + + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 1, k_SmallBlockSize); // Cubic_High + + EBSDSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/false, /*isPeriodic=*/false, /*tolerance=*/5.0f, /*neighborScheme=*/0, /*randomize=*/true); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + const auto& actives = dataStructure.getDataRefAs(k_ActivePath); + REQUIRE(actives.getNumberOfTuples() == k_ExpectedFeatures + 1); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + const auto& featureIds = dataStructure.getDataRefAs(k_FeatureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + std::set uniqueIds; + int32 minId = std::numeric_limits::max(); + int32 maxId = std::numeric_limits::min(); + for(usize i = 0; i < featureStore.getNumberOfTuples(); i++) { - EBSDSegmentFeaturesFilter filter; - Arguments args; + int32 fid = featureStore.getValue(i); + uniqueIds.insert(fid); + minId = std::min(minId, fid); + maxId = std::max(maxId, fid); + } + REQUIRE(minId == 1); + REQUIRE(maxId == static_cast(k_ExpectedFeatures)); + REQUIRE(uniqueIds.size() == k_ExpectedFeatures); +} - // Create default Parameters for the filter. - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(0)); +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: High Tolerance Merges All", "[OrientationAnalysis][EBSDSegmentFeatures]") +{ + UnitTest::LoadPlugins(); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(true)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_MaskArrayPath)); + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(ebsd_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_CrystalStructuresArrayPath)); + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = dataStructure.getDataRefAs(k_GeomPath); + BuildOrientationTestData(dataStructure, cellShape, geom.getId(), am->getId(), 1, k_SmallBlockSize); // Cubic_High - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + EBSDSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/false, /*isPeriodic=*/false, /*tolerance=*/90.0f); - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - } + // With tolerance=90 degrees, all orientations merge (max cubic misorientation is ~62.8 deg) + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + const auto& actives = dataStructure.getDataRefAs(k_ActivePath); + REQUIRE(actives.getNumberOfTuples() == 2); // 1 feature + index 0 + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + const auto& featureIds = dataStructure.getDataRefAs(k_FeatureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + for(usize i = 0; i < featureStore.getNumberOfTuples(); i++) { - UInt8Array& actives = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 36); + REQUIRE(featureStore.getValue(i) == 1); } +} - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group - { - const auto& generatedDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsMaskFacePath); +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: FaceEdgeVertex Connectivity", "[OrientationAnalysis][EBSDSegmentFeatures]") +{ + UnitTest::LoadPlugins(); - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); - } + // Shared test: verifies vertex and edge connectivity with FaceEdgeVertex scheme. + // Setup lambda creates orientation data with 4 isolated voxels and configures args. + // Pair voxels share the same quaternion (0° X-rotation = identity). + // Background voxels get a different quaternion (60° X-rotation, well above 5° tolerance). + constexpr float32 k_DegToRad = 3.14159265358979323846f / 180.0f; + + auto setupEBSD = [&](Arguments& args, DataStructure& ds, const DataPath& geomPath, const DataPath& cellDataPath, ChoicesParameter::ValueType neighborScheme) { + const ShapeType cellShape = {3, 3, 3}; + auto& am = ds.getDataRefAs(cellDataPath); + auto& geom = ds.getDataRefAs(geomPath); + + // Quaternions: background = 60° X-rotation, pairs = identity (EBSDlib order: x,y,z,w) + const float32 bgHalf = 60.0f * k_DegToRad * 0.5f; + auto quatsDS = DataStoreUtilities::CreateDataStore(cellShape, {4}, IDataAction::Mode::Execute); + auto* quatsArr = DataArray::Create(ds, "Quats", quatsDS, am.getId()); + auto& quatsStore = quatsArr->getDataStoreRef(); + for(usize i = 0; i < 27; i++) + { + quatsStore[i * 4 + 0] = std::sin(bgHalf); + quatsStore[i * 4 + 1] = 0.0f; + quatsStore[i * 4 + 2] = 0.0f; + quatsStore[i * 4 + 3] = std::cos(bgHalf); + } + // Pair A,B: identity quat at (0,0,0) and (1,1,1) + for(usize idx : {static_cast(0), static_cast(1 * 9 + 1 * 3 + 1)}) + { + quatsStore[idx * 4 + 0] = 0.0f; + quatsStore[idx * 4 + 1] = 0.0f; + quatsStore[idx * 4 + 2] = 0.0f; + quatsStore[idx * 4 + 3] = 1.0f; + } + // Pair C,D: 30° X-rotation at (2,0,0) and (2,1,1) + const float32 pairHalf = 30.0f * k_DegToRad * 0.5f; + for(usize idx : {static_cast(0 * 9 + 0 * 3 + 2), static_cast(1 * 9 + 1 * 3 + 2)}) + { + quatsStore[idx * 4 + 0] = std::sin(pairHalf); + quatsStore[idx * 4 + 1] = 0.0f; + quatsStore[idx * 4 + 2] = 0.0f; + quatsStore[idx * 4 + 3] = std::cos(pairHalf); + } + + // Phases: all phase 1 + auto phasesDS = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* phasesArr = DataArray::Create(ds, "Phases", phasesDS, am.getId()); + phasesArr->fill(1); + + // CrystalStructures: phase 0 = unknown, phase 1 = Cubic_High + const ShapeType ensShape = {2}; + auto* ensAM = AttributeMatrix::Create(ds, "CellEnsembleData", ensShape, geom.getId()); + auto crystDS = DataStoreUtilities::CreateDataStore(ensShape, {1}, IDataAction::Mode::Execute); + auto* crystArr = DataArray::Create(ds, "CrystalStructures", crystDS, ensAM->getId()); + auto& crystStore = crystArr->getDataStoreRef(); + crystStore[0] = 999; + crystStore[1] = 1; // Cubic_High + + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0f)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborScheme)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_IsPeriodic_Key, std::make_any(false)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(geomPath)); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(cellDataPath.createChildPath("Quats"))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(cellDataPath.createChildPath("Phases"))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(DataPath({"Geom", "CellEnsembleData", "CrystalStructures"}))); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any("CellFeatureData")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + }; - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); + RunFaceEdgeVertexConnectivityTest([&](Arguments& args, DataStructure& ds, const DataPath& gp, const DataPath& cp) { setupEBSD(args, ds, gp, cp, 0); }, + [&](Arguments& args, DataStructure& ds, const DataPath& gp, const DataPath& cp) { setupEBSD(args, ds, gp, cp, 1); }); } -TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures:MaskAll", "[OrientationAnalysis][EBSDSegmentFeatures]") +TEST_CASE("OrientationAnalysis::EBSDSegmentFeatures: Generate Test Data", "[OrientationAnalysis][EBSDSegmentFeatures][.GenerateTestData]") { UnitTest::LoadPlugins(); - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_test_data.tar.gz", "segment_features_test_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/segment_features_test_data/segment_features_test_data.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(exemplarFilePath); + const auto outputDir = fs::path(fmt::format("{}/generated_test_data/ebsd_segment_features", unit_test::k_BinaryTestOutputDir)); + fs::create_directories(outputDir); - // EBSD Segment Features/Semgent Features (Misorientation) Filter + // Small input data (15^3) — one geometry per test variant { - EBSDSegmentFeaturesFilter filter; - Arguments args; - - // Create default Parameters for the filter. - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MisorientationTolerance_Key, std::make_any(5.0F)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(1)); + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_UseMask_Key, std::make_any(true)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_MaskArrayPath)); + DataStructure ds; - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_SelectedImageGeometryPath_Key, std::make_any(ebsd_segment_features_constants::k_InputGeometryPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_QuatsArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_QuatsArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellPhasesArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_PhasesArrayPath)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CrystalStructuresArrayPath_Key, std::make_any(ebsd_segment_features_constants::k_CrystalStructuresArrayPath)); + auto* amBase = BuildSegmentFeaturesTestGeometry(ds, dims, "Base", std::string(k_CellDataName)); + auto& geomBase = ds.getDataRefAs(DataPath({"Base"})); + BuildOrientationTestData(ds, cellShape, geomBase.getId(), amBase->getId(), 1, k_SmallBlockSize); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_FeatureIdsArrayName_Key, std::make_any(k_FeatureIds)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_CellFeatureAttributeMatrixName_Key, std::make_any(k_Grain_Data)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - args.insertOrAssign(EBSDSegmentFeaturesFilter::k_RandomizeFeatureIds_Key, std::make_any(false)); + auto* amMasked = BuildSegmentFeaturesTestGeometry(ds, dims, "Masked", std::string(k_CellDataName)); + auto& geomMasked = ds.getDataRefAs(DataPath({"Masked"})); + BuildOrientationTestData(ds, cellShape, geomMasked.getId(), amMasked->getId(), 1, k_SmallBlockSize); + BuildSphericalMask(ds, cellShape, amMasked->getId()); - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto* amPeriodic = BuildSegmentFeaturesTestGeometry(ds, dims, "Periodic", std::string(k_CellDataName)); + auto& geomPeriodic = ds.getDataRefAs(DataPath({"Periodic"})); + BuildOrientationTestData(ds, cellShape, geomPeriodic.getId(), amPeriodic->getId(), 1, k_SmallBlockSize, true); // wrapBoundary - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + UnitTest::WriteTestDataStructure(ds, outputDir / "small_input.dream3d"); } + // Large input data (200^3) — periodic=true, no mask (sphere mask would eliminate boundary voxels, defeating periodic) { - UInt8Array& actives = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_ActivesArrayPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 32); - } + const ShapeType cellShape = {k_LargeDim, k_LargeDim, k_LargeDim}; + const std::array dims = {k_LargeDim, k_LargeDim, k_LargeDim}; - // Loop and compare each array from the 'Exemplar Data / CellData' to the 'Data Container / CellData' group - { - const auto& generatedDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsArrayPath); - const auto& exemplarDataArray = dataStructure.getDataRefAs(ebsd_segment_features_constants::k_FeatureIdsMaskAllPath); + DataStructure ds; + auto* am = BuildSegmentFeaturesTestGeometry(ds, dims, std::string(k_GeomName), std::string(k_CellDataName)); + auto& geom = ds.getDataRefAs(k_GeomPath); + BuildOrientationTestData(ds, cellShape, geom.getId(), am->getId(), 1, k_LargeBlockSize, true); // wrapBoundary - UnitTest::CompareDataArrays(generatedDataArray, exemplarDataArray); + UnitTest::WriteTestDataStructure(ds, outputDir / "large_input.dream3d"); } - - UnitTest::CheckArraysInheritTupleDims(dataStructure, SmallIn100::k_TupleCheckIgnoredPaths); } diff --git a/src/Plugins/SimplnxCore/CMakeLists.txt b/src/Plugins/SimplnxCore/CMakeLists.txt index 8175d378fe..5227fdfc73 100644 --- a/src/Plugins/SimplnxCore/CMakeLists.txt +++ b/src/Plugins/SimplnxCore/CMakeLists.txt @@ -247,11 +247,15 @@ set(AlgorithmList ExtractVertexGeometry FeatureFaceCurvature FillBadData + FillBadDataBFS + FillBadDataCCL FindNRingNeighbors FlyingEdges3D HierarchicalSmooth IdentifyDuplicateVertices IdentifySample + IdentifySampleBFS + IdentifySampleCCL InitializeData InitializeImageGeomCellData InterpolatePointCloudToRegularGrid diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp index 5cd59ae953..e9f4521ff0 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.cpp @@ -1,265 +1,15 @@ #include "FillBadData.hpp" -#include "simplnx/DataStructure/DataArray.hpp" -#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" -#include "simplnx/Utilities/DataGroupUtilities.hpp" -#include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/MessageHelper.hpp" -#include "simplnx/Utilities/NeighborUtilities.hpp" +#include "FillBadDataBFS.hpp" +#include "FillBadDataCCL.hpp" -#include -#include +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" using namespace nx::core; -// ============================================================================= -// FillBadData Algorithm Overview -// ============================================================================= -// -// This file implements an optimized algorithm for filling bad data (voxels with -// FeatureId == 0) in image geometries. The algorithm handles out-of-core datasets -// efficiently by processing data in chunks and uses a four-phase approach: -// -// Phase 1: Chunk-Sequential Connected Component Labeling (CCL) -// - Process chunks sequentially, assigning provisional labels to bad data regions -// - Use Union-Find to track equivalences between labels across chunk boundaries -// - Track size of each connected component -// -// Phase 2: Global Resolution -// - Flatten Union-Find structure to resolve all equivalences -// - Accumulate region sizes to root labels -// -// Phase 3: Region Classification and Relabeling -// - Classify regions as "small" (below threshold) or "large" (above threshold) -// - Small regions: mark with -1 for filling in Phase 4 -// - Large regions: keep as 0 or assign to new phase (if requested) -// -// Phase 4: Iterative Morphological Fill -// - Iteratively fill -1 voxels by assigning them to the most common neighbor -// - Update all cell data arrays to match the filled voxels -// -// ============================================================================= - -namespace -{ -// ----------------------------------------------------------------------------- -// Helper function: Update data array tuples based on neighbor assignments -// ----------------------------------------------------------------------------- -// Copies data from neighbor voxels to fill bad data voxels (-1 values) -// This is used to propagate cell data attributes during the filling process -// -// @param featureIds The feature IDs array indicating which voxels are bad data -// @param outputDataStore The data array to update -// @param neighbors The neighbor assignments (index of the neighbor to copy from) -template -void FillBadDataUpdateTuples(const Int32AbstractDataStore& featureIds, AbstractDataStore& outputDataStore, const std::vector& neighbors) -{ - usize start = 0; - usize stop = outputDataStore.getNumberOfTuples(); - const usize numComponents = outputDataStore.getNumberOfComponents(); - - // Loop through all tuples in the data array - for(usize tupleIndex = start; tupleIndex < stop; tupleIndex++) - { - const int32 featureName = featureIds[tupleIndex]; - const int32 neighbor = neighbors[tupleIndex]; - - // Skip if no neighbor assignment - if(neighbor == tupleIndex) - { - continue; - } - - // Copy data from the valid neighbor to bad data voxel - // Only copy if the current voxel is bad data (-1) and the neighbor is valid (>0) - if(featureName < 0 && neighbor != -1 && featureIds[static_cast(neighbor)] > 0) - { - // Copy all components from neighbor tuple to current tuple - for(usize i = 0; i < numComponents; i++) - { - auto value = outputDataStore[neighbor * numComponents + i]; - outputDataStore[tupleIndex * numComponents + i] = value; - } - } - } -} - -// ----------------------------------------------------------------------------- -// Functor for type-dispatched tuple updates -// ----------------------------------------------------------------------------- -// Allows the FillBadDataUpdateTuples function to be called with runtime type dispatch -struct FillBadDataUpdateTuplesFunctor -{ - template - void operator()(const Int32AbstractDataStore& featureIds, IDataArray* outputIDataArray, const std::vector& neighbors) - { - auto& outputStore = outputIDataArray->template getIDataStoreRefAs>(); - FillBadDataUpdateTuples(featureIds, outputStore, neighbors); - } -}; -} // namespace - -// ============================================================================= -// ChunkAwareUnionFind Implementation -// ============================================================================= -// -// A Union-Find (Disjoint Set) data structure optimized for tracking connected -// component equivalences during chunk-sequential processing. Uses union-by-rank -// for efficient merging and defers path compression to a single flatten() pass -// to avoid redundant updates during construction. -// -// Key features: -// - Lazily creates entries as labels are encountered -// - Tracks rank for balanced union operations -// - Accumulates sizes at each label (not root) during construction -// - Single-pass path compression and size accumulation in flatten() -// ============================================================================= - -// ----------------------------------------------------------------------------- -// Find the root representative of a label's equivalence class -// ----------------------------------------------------------------------------- -// This performs a simple root lookup without path compression. Path compression -// is deferred to the flatten() method to avoid wasting cycles updating paths -// that will be modified again during later merges. -// -// @param x The label to find the root for -// @return The root label of the equivalence class -int64 ChunkAwareUnionFind::find(int64 x) -{ - // Create a parent entry if it doesn't exist (lazy initialization) - if(!m_Parent.contains(x)) - { - m_Parent[x] = x; - m_Rank[x] = 0; - m_Size[x] = 0; - } - - // Find root iteratively without using the path compression algorithm - // Path compression is deferred to flatten() to avoid wasting cycles - // during frequent merges where paths would be updated repeatedly - int64 root = x; - while(m_Parent[root] != root) - { - root = m_Parent[root]; - } - - return root; -} - -// ----------------------------------------------------------------------------- -// Unite two labels into the same equivalence class -// ----------------------------------------------------------------------------- -// Merges the sets containing labels a and b using union-by-rank heuristic. -// This keeps the tree balanced for better performance. -// -// @param a First label -// @param b Second label -void ChunkAwareUnionFind::unite(int64 a, int64 b) -{ - int64 rootA = find(a); - int64 rootB = find(b); - - // Already in the same set - if(rootA == rootB) - { - return; - } - - // Union by rank: attach the smaller tree object under the root of the larger tree - // This keeps the tree height logarithmic for better find() performance - if(m_Rank[rootA] < m_Rank[rootB]) - { - m_Parent[rootA] = rootB; - } - else if(m_Rank[rootA] > m_Rank[rootB]) - { - m_Parent[rootB] = rootA; - } - else - { - // Equal rank: arbitrarily choose rootA as the parent and increment its rank - m_Parent[rootB] = rootA; - m_Rank[rootA]++; - } -} - -// ----------------------------------------------------------------------------- -// Add voxel count to a label's size -// ----------------------------------------------------------------------------- -// During construction, sizes are accumulated at each label (not root). -// This allows concurrent size updates without needing to find roots. -// All sizes will be accumulated to roots during flatten(). -// -// @param label The label to add size to -// @param count Number of voxels to add -void ChunkAwareUnionFind::addSize(int64 label, uint64 count) -{ - // Add size to the label itself, not the root - // Sizes will be accumulated to roots during flatten() - m_Size[label] += count; -} - // ----------------------------------------------------------------------------- -// Get the total size of a label's equivalence class -// ----------------------------------------------------------------------------- -// Returns the accumulated size for a label's root. Should only be called -// after flatten() has been executed to get accurate totals. -// -// @param label The label to query -// @return Total number of voxels in the equivalence class -uint64 ChunkAwareUnionFind::getSize(int64 label) -{ - int64 root = find(label); - auto it = m_Size.find(root); - if(it == m_Size.end()) - { - return 0; - } - return it->second; -} - -// ----------------------------------------------------------------------------- -// Flatten the Union-Find structure with path compression -// ----------------------------------------------------------------------------- -// Performs a single-pass path compression and size accumulation after all -// merges are complete. This is more efficient than doing path compression -// during every find() operation when there are frequent merges. -// -// After flatten(): -// - Every label points directly to its root (fully compressed paths) -// - All sizes are accumulated at root labels -// - Subsequent find() and getSize() operations are O(1) -void ChunkAwareUnionFind::flatten() -{ - // First pass: flatten all parents with path compression - // Make every label point directly to its root for O(1) lookups - // This is done in a single pass after all merges to avoid wasting - // cycles updating paths repeatedly during construction - std::unordered_map finalRoots; - for(auto& [label, parent] : m_Parent) - { - int64 root = find(label); - finalRoots[label] = root; - } - - // Second pass: accumulate sizes to roots - // Sum up all the sizes from individual labels to their root representatives - std::unordered_map rootSizes; - for(const auto& [label, root] : finalRoots) - { - rootSizes[root] += m_Size[label]; - } - - // Replace maps with flattened versions for O(1) access - m_Parent = finalRoots; - m_Size = rootSizes; -} - -// ============================================================================= -// FillBadData Implementation -// ============================================================================= - -FillBadData::FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, FillBadDataInputValues* inputValues) +FillBadData::FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues) : m_DataStructure(dataStructure) , m_InputValues(inputValues) , m_ShouldCancel(shouldCancel) @@ -271,493 +21,9 @@ FillBadData::FillBadData(DataStructure& dataStructure, const IFilter::MessageHan FillBadData::~FillBadData() noexcept = default; // ----------------------------------------------------------------------------- -const std::atomic_bool& FillBadData::getCancel() const -{ - return m_ShouldCancel; -} - -// ============================================================================= -// PHASE 1: Chunk-Sequential Connected Component Labeling (CCL) -// ============================================================================= -// -// Performs connected component labeling on bad data voxels (FeatureId == 0) -// using a chunk-sequential scanline algorithm. This approach is optimized for -// out-of-core datasets where data is stored in chunks on the disk. -// -// Algorithm: -// 1. Process chunks sequentially, loading one chunk at a time -// 2. For each bad data voxel, check already-processed neighbors (-X, -Y, -Z) -// 3. If neighbors exist, reuse their label; otherwise assign new label -// 4. Track label equivalences in Union-Find structure -// 5. Track size of each connected component -// -// The scanline order ensures we only need to check 3 neighbors (previous in -// X, Y, and Z directions) instead of all 6 face neighbors, because later -// neighbors haven't been processed yet. -// -// @param featureIdsStore The feature IDs data store (maybe out-of-core) -// @param unionFind Union-Find structure for tracking label equivalences -// @param provisionalLabels Map from voxel index to assigned provisional label -// @param dims Image dimensions [X, Y, Z] -// ============================================================================= -void FillBadData::phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims) -{ - // Use negative labels for bad data regions to distinguish from positive feature IDs - int64 nextLabel = -1; - - const uint64 numChunks = featureIdsStore.getNumberOfChunks(); - - // Process each chunk sequentially (load, process, unload) - for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) - { - // Load the current chunk into memory - featureIdsStore.loadChunk(chunkIdx); - - // Get chunk bounds (INCLUSIVE ranges in [Z, Y, X] order) - const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); - const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); - - // Process voxels in this chunk using scanline algorithm - // Iterate in Z-Y-X order (slowest to fastest) to maintain scanline consistency - // Note: chunk bounds are INCLUSIVE and in [Z, Y, X] order (slowest to fastest) - for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) - { - for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) - { - for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) - { - // Calculate linear index for current voxel - const usize index = z * dims[0] * dims[1] + y * dims[0] + x; - - // Only process bad data voxels (FeatureId == 0) - // Skip valid feature voxels (FeatureId > 0) - if(featureIdsStore[index] != 0) - { - continue; - } - - // Check already-processed neighbors (scanline order: -Z, -Y, -X) - // We only check "backward" neighbors because "forward" neighbors - // haven't been processed yet in the scanline order - std::vector neighborLabels; - - // Check -X neighbor - if(x > 0) - { - const usize neighborIdx = index - 1; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } - } - - // Check -Y neighbor - if(y > 0) - { - const usize neighborIdx = index - dims[0]; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } - } - - // Check -Z neighbor - if(z > 0) - { - const usize neighborIdx = index - dims[0] * dims[1]; - if(provisionalLabels.contains(neighborIdx) && featureIdsStore[neighborIdx] == 0) - { - neighborLabels.push_back(provisionalLabels[neighborIdx]); - } - } - - // Assign label based on neighbors - int64 assignedLabel; - if(neighborLabels.empty()) - { - // No labeled neighbors found - this is a new connected component - // Assign a new negative label and initialize in union-find - assignedLabel = nextLabel--; - unionFind.find(assignedLabel); // Initialize in union-find (creates entry) - } - else - { - // One or more labeled neighbors found - join their equivalence class - // Use the first neighbor's label as the representative - assignedLabel = neighborLabels[0]; - - // If multiple neighbors have different labels, unite them - // This handles the case where different regions merge at this voxel - for(usize i = 1; i < neighborLabels.size(); i++) - { - if(neighborLabels[i] != assignedLabel) - { - unionFind.unite(assignedLabel, neighborLabels[i]); - } - } - } - - // Store the assigned label for this voxel - provisionalLabels[index] = assignedLabel; - - // Increment the size count for this label (will be accumulated to root in flatten()) - unionFind.addSize(assignedLabel, 1); - } - } - } - } - - // Flush to ensure all chunks are written back to storage - featureIdsStore.flush(); -} - -// ============================================================================= -// PHASE 2: Global Resolution of Equivalences -// ============================================================================= -// -// Resolves all label equivalences from Phase 1 and accumulates region sizes. -// After this phase: -// - All labels point directly to their root representatives -// - All sizes are accumulated at root labels -// - Region sizes can be queried in O(1) time -// -// @param unionFind Union-Find structure containing label equivalences -// @param smallRegions Unused in current implementation (kept for interface compatibility) -// ============================================================================= -void FillBadData::phaseTwoGlobalResolution(ChunkAwareUnionFind& unionFind, std::unordered_set& smallRegions) -{ - // Flatten the union-find structure to: - // 1. Compress all paths (make every label point directly to root) - // 2. Accumulate all sizes to root labels - unionFind.flatten(); -} - -// ============================================================================= -// PHASE 3: Region Classification and Relabeling -// ============================================================================= -// -// Classifies bad data regions as "small" or "large" based on size threshold: -// - Small regions (< minAllowedDefectSize): marked with -1 for filling in Phase 4 -// - Large regions (>= minAllowedDefectSize): kept as 0 (or assigned new phase) -// -// This phase processes chunks to relabel voxels based on their region classification. -// Large regions may optionally be assigned to a new phase (if storeAsNewPhase is true). -// -// @param featureIdsStore The feature IDs data store -// @param cellPhasesPtr Cell phases array (maybe null) -// @param provisionalLabels Map from voxel index to provisional label (from Phase 1) -// @param smallRegions Unused in current implementation (kept for interface compatibility) -// @param unionFind Union-Find structure with resolved equivalences (from Phase 2) -// @param maxPhase Maximum existing phase value (for new phase assignment) -// ============================================================================= -void FillBadData::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, - const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, usize maxPhase) const -{ - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const SizeVec3 udims = selectedImageGeom.getDimensions(); - const uint64 numChunks = featureIdsStore.getNumberOfChunks(); - - // Collect all unique root labels and their sizes - // After flatten(), all labels point to roots and sizes are accumulated - std::unordered_map rootSizes; - for(const auto& [index, label] : provisionalLabels) - { - int64 root = unionFind.find(label); - if(!rootSizes.contains(root)) - { - rootSizes[root] = unionFind.getSize(root); - } - } - - // Classify regions as small (need filling) or large (keep or assign to a new phase) - std::unordered_set localSmallRegions; - for(const auto& [root, size] : rootSizes) - { - if(static_cast(size) < m_InputValues->minAllowedDefectSizeValue) - { - localSmallRegions.insert(root); - } - } - - // Process each chunk to relabel voxels based on region classification - for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) - { - // Load chunk into memory - featureIdsStore.loadChunk(chunkIdx); - - // Get chunk bounds (INCLUSIVE ranges in [Z, Y, X] order) - const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); - const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); - - // Iterate through all voxels in this chunk - // Note: chunk bounds are INCLUSIVE and in [Z, Y, X] order (slowest to fastest) - for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) - { - for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) - { - for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) - { - const usize index = z * udims[0] * udims[1] + y * udims[0] + x; - - // Check if this voxel was labeled as bad data in Phase 1 - auto labelIter = provisionalLabels.find(index); - if(labelIter != provisionalLabels.end()) - { - // Find the root label for this voxel's connected component - int64 root = unionFind.find(labelIter->second); - - if(localSmallRegions.contains(root)) - { - // Small region - mark with -1 for filling in Phase 4 - featureIdsStore[index] = -1; - } - else - { - // Large region - keep as bad data (0) or assign to a new phase - featureIdsStore[index] = 0; - - // Optionally assign large bad data regions to a new phase - if(m_InputValues->storeAsNewPhase && cellPhasesPtr != nullptr) - { - (*cellPhasesPtr)[index] = static_cast(maxPhase) + 1; - } - } - } - } - } - } - } - - // Write all chunks back to storage - featureIdsStore.flush(); -} - -// ============================================================================= -// PHASE 4: Iterative Morphological Fill -// ============================================================================= -// -// Fills small bad data regions (marked with -1 in Phase 3) using iterative -// morphological dilation. Each iteration: -// 1. For each -1 voxel, find the most common positive feature among its neighbors -// 2. Assign that voxel to the most common neighbor's feature -// 3. Update all cell data arrays to match the filled voxels -// -// This process repeats until all -1 voxels have been filled. The algorithm -// gradually fills small defects from the edges inward, ensuring smooth boundaries. -// -// @param featureIdsStore The feature IDs data store -// @param dims Image dimensions [X, Y, Z] -// @param numFeatures Number of features in the dataset -// ============================================================================= -void FillBadData::phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, usize numFeatures) const -{ - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const usize totalPoints = featureIdsStore.getNumberOfTuples(); - - std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); - std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); - - // Neighbor assignment array: neighbors[i] = index of the neighbor to copy from - std::vector neighbors(totalPoints, -1); - - // Feature vote counter: tracks how many times each feature appears as the neighbor - std::vector featureNumber(numFeatures + 1, 0); - - // Get a list of all cell arrays that need to be updated during filling - // Exclude arrays specified in ignoredDataArrayPaths - std::optional> allChildArrays = GetAllChildDataPaths(m_DataStructure, selectedImageGeom.getCellDataPath(), DataObject::Type::DataArray, m_InputValues->ignoredDataArrayPaths); - std::vector voxelArrayNames; - if(allChildArrays.has_value()) - { - voxelArrayNames = allChildArrays.value(); - } - - // Create a message helper for throttled progress updates (1 update per second) - MessageHelper messageHelper(m_MessageHandler, std::chrono::milliseconds(1000)); - auto throttledMessenger = messageHelper.createThrottledMessenger(std::chrono::milliseconds(1000)); - - usize count = 1; // Number of voxels with -1 value that remain - usize iteration = 0; // Current iteration number - - // Iteratively fill until no voxels with -1 value remain - while(count != 0) - { - iteration++; - count = 0; // Reset count of voxels with a -1 value for this iteration - - // Pass 1: Determine neighbor assignments for all -1 voxels - // For each -1 voxel, find the most common positive feature among neighbors - for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) - { - int32 featureName = featureIdsStore[voxelIndex]; - - // Only process voxels marked for filling (-1) - if(featureName < 0) - { - count++; // Count this voxel as needing filling - int32 most = 0; // Highest vote count seen so far - - // Compute 3D position from the linear index - int64 xIdx = voxelIndex % dims[0]; - int64 yIdx = (voxelIndex / dims[0]) % dims[1]; - int64 zIdx = voxelIndex / (dims[0] * dims[1]); - - // Vote for the most common positive neighbor feature - // Loop over the 6 face neighbors of the voxel - std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - // Skip neighbors outside image bounds - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - - auto neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; - int32 feature = featureIdsStore[neighborPoint]; - - // Only vote for positive features (valid data) - if(feature > 0) - { - // Increment vote count for this feature - featureNumber[feature]++; - int32 current = featureNumber[feature]; - - // Track the feature with the most votes - if(current > most) - { - most = current; - neighbors[voxelIndex] = static_cast(neighborPoint); // Store neighbor to copy from - } - } - } - - // Reset vote counters for next voxel - // Only reset features that were actually counted to save time - // Loop over the 6 face neighbors of the voxel - isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - - int64 neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; - int32 feature = featureIdsStore[neighborPoint]; - - if(feature > 0) - { - featureNumber[feature] = 0; - } - } - } - } - - // Pass 2: Update all cell data arrays based on neighbor assignments - // This propagates all cell data attributes (not just feature IDs) to filled voxels - for(const auto& cellArrayPath : voxelArrayNames) - { - // Skip the feature IDs array (will be updated separately below) - if(cellArrayPath == m_InputValues->featureIdsArrayPath) - { - continue; - } - - auto* oldCellArray = m_DataStructure.getDataAs(cellArrayPath); - - // Use the type-dispatched update function to handle all data types - ExecuteDataFunction(FillBadDataUpdateTuplesFunctor{}, oldCellArray->getDataType(), featureIdsStore, oldCellArray, neighbors); - } - - // Update FeatureIds array last to finalize the iteration - FillBadDataUpdateTuples(featureIdsStore, featureIdsStore, neighbors); - - // Send throttled progress update (max 1 per second) - throttledMessenger.sendThrottledMessage([iteration, count]() { return fmt::format(" Iteration {}: {} voxels remaining to fill", iteration, count); }); - } - - // Send final completion summary - m_MessageHandler({IFilter::Message::Type::Info, fmt::format(" Completed in {} iteration{}", iteration, iteration == 1 ? "" : "s")}); -} - -// ============================================================================= -// Main Algorithm Entry Point -// ============================================================================= -// -// Executes the four-phase bad data filling algorithm: -// 1. Chunk-Sequential CCL: Label connected components of bad data -// 2. Global Resolution: Resolve equivalences and accumulate sizes -// 3. Region Classification: Classify regions as small or large -// 4. Iterative Fill: Fill small regions using morphological dilation -// -// @return Result indicating success or failure -// ============================================================================= -Result<> FillBadData::operator()() const +Result<> FillBadData::operator()() { - // Get feature IDs array and image geometry - auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); - const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); - const SizeVec3 udims = selectedImageGeom.getDimensions(); - - // Convert dimensions to signed integers for offset calculations - std::array dims = { - static_cast(udims[0]), - static_cast(udims[1]), - static_cast(udims[2]), - }; - - const usize totalPoints = featureIdsStore.getNumberOfTuples(); - - // Get cell phases array if we need to assign large regions to a new phase - Int32Array* cellPhasesPtr = nullptr; - usize maxPhase = 0; - - if(m_InputValues->storeAsNewPhase) - { - cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); - - // Find the maximum existing phase value - for(usize i = 0; i < totalPoints; i++) - { - if((*cellPhasesPtr)[i] > maxPhase) - { - maxPhase = (*cellPhasesPtr)[i]; - } - } - } - - // Count the number of existing features for array sizing - usize numFeatures = 0; - for(usize i = 0; i < totalPoints; i++) - { - int32 featureName = featureIdsStore[i]; - if(featureName > numFeatures) - { - numFeatures = featureName; - } - } - - // Initialize data structures for chunk-aware connected component labeling - ChunkAwareUnionFind unionFind; // Tracks label equivalences and sizes - std::unordered_map provisionalLabels; // Maps voxel index to provisional label - std::unordered_set smallRegions; // Set of small region roots (unused currently) - - // Phase 1: Chunk-Sequential Connected Component Labeling - m_MessageHandler({IFilter::Message::Type::Info, "Phase 1/4: Labeling connected components..."}); - phaseOneCCL(featureIdsStore, unionFind, provisionalLabels, dims); - - // Phase 2: Global Resolution of equivalences - m_MessageHandler({IFilter::Message::Type::Info, "Phase 2/4: Resolving region equivalences..."}); - phaseTwoGlobalResolution(unionFind, smallRegions); - - // Phase 3: Relabeling based on region size classification - m_MessageHandler({IFilter::Message::Type::Info, "Phase 3/4: Classifying region sizes..."}); - phaseThreeRelabeling(featureIdsStore, cellPhasesPtr, provisionalLabels, smallRegions, unionFind, maxPhase); - - // Phase 4: Iterative morphological fill - m_MessageHandler({IFilter::Message::Type::Info, "Phase 4/4: Filling small defects..."}); - phaseFourIterativeFill(featureIdsStore, dims, numFeatures); + auto* featureIdsArray = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath); - return {}; + return DispatchAlgorithm({featureIdsArray}, m_DataStructure, m_MessageHandler, m_ShouldCancel, m_InputValues); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp index 1e994f2948..db21d91419 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadData.hpp @@ -1,4 +1,3 @@ - #pragma once #include "SimplnxCore/SimplnxCore_export.hpp" @@ -7,71 +6,11 @@ #include "simplnx/DataStructure/DataStructure.hpp" #include "simplnx/Filter/IFilter.hpp" -#include -#include #include namespace nx::core { -// Forward declarations -template -class DataArray; -using Int32Array = DataArray; - -template -class AbstractDataStore; -using Int32AbstractDataStore = AbstractDataStore; - -/** - * @class ChunkAwareUnionFind - * @brief Union-Find data structure for tracking connected component equivalences across chunks - */ -class SIMPLNXCORE_EXPORT ChunkAwareUnionFind -{ -public: - ChunkAwareUnionFind() = default; - ~ChunkAwareUnionFind() = default; - - /** - * @brief Find the root label with path compression - * @param x Label to find - * @return Root label - */ - int64 find(int64 x); - - /** - * @brief Unite two labels into the same equivalence class - * @param a First label - * @param b Second label - */ - void unite(int64 a, int64 b); - - /** - * @brief Add to the size count for a label - * @param label Label to update - * @param count Number of voxels to add - */ - void addSize(int64 label, uint64 count); - - /** - * @brief Get the total size of a label's equivalence class - * @param label Label to query - * @return Total number of voxels in the equivalence class - */ - uint64 getSize(int64 label); - - /** - * @brief Flatten the union-find structure and sum sizes to roots - */ - void flatten(); - -private: - std::unordered_map m_Parent; - std::unordered_map m_Rank; - std::unordered_map m_Size; -}; - struct SIMPLNXCORE_EXPORT FillBadDataInputValues { int32 minAllowedDefectSizeValue; @@ -84,12 +23,23 @@ struct SIMPLNXCORE_EXPORT FillBadDataInputValues /** * @class FillBadData - + * @brief Dispatcher that selects between BFS (in-core) and CCL (out-of-core) algorithms. + * + * @see FillBadDataBFS for the in-core-optimized implementation. + * @see FillBadDataCCL for the out-of-core-optimized implementation. + * @see AlgorithmDispatch.hpp for the dispatch mechanism. */ class SIMPLNXCORE_EXPORT FillBadData { public: - FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, FillBadDataInputValues* inputValues); + /** + * @brief Constructs the dispatcher with the required context for algorithm selection. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling fill behavior. + */ + FillBadData(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues); ~FillBadData() noexcept; FillBadData(const FillBadData&) = delete; @@ -97,47 +47,13 @@ class SIMPLNXCORE_EXPORT FillBadData FillBadData& operator=(const FillBadData&) = delete; FillBadData& operator=(FillBadData&&) noexcept = delete; - Result<> operator()() const; - - const std::atomic_bool& getCancel() const; - -private: /** - * @brief Phase 1: Chunk-sequential connected component labeling - * @param featureIdsStore Feature IDs data store - * @param unionFind Union-find structure for tracking equivalences - * @param provisionalLabels Map from voxel index to provisional label - * @param dims Image geometry dimensions + * @brief Dispatches to either BFS or CCL algorithm based on data residency. + * @return Result indicating success or an error with a descriptive message. */ - static void phaseOneCCL(Int32AbstractDataStore& featureIdsStore, ChunkAwareUnionFind& unionFind, std::unordered_map& provisionalLabels, const std::array& dims); - - /** - * @brief Phase 2: Global resolution of equivalences and region classification - * @param unionFind Union-find structure to flatten - * @param smallRegions Output set of labels for small regions that need filling - */ - static void phaseTwoGlobalResolution(ChunkAwareUnionFind& unionFind, std::unordered_set& smallRegions); - - /** - * @brief Phase 3: Relabel voxels based on region classification - * @param featureIdsStore Feature IDs data store - * @param cellPhasesPtr Cell phases array (could be null) - * @param provisionalLabels Map from voxel index to provisional label - * @param smallRegions Set of labels for small regions - * @param unionFind Union-find for looking up equivalences - * @param maxPhase Maximum phase value (for new phase assignment) - */ - void phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, const std::unordered_map& provisionalLabels, - const std::unordered_set& smallRegions, ChunkAwareUnionFind& unionFind, size_t maxPhase) const; - - /** - * @brief Phase 4: Iterative morphological fill - * @param featureIdsStore Feature IDs data store - * @param dims Image geometry dimensions - * @param numFeatures Number of features - */ - void phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, size_t numFeatures) const; + Result<> operator()(); +private: DataStructure& m_DataStructure; const FillBadDataInputValues* m_InputValues = nullptr; const std::atomic_bool& m_ShouldCancel; diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.cpp new file mode 100644 index 0000000000..5c2f72b6a3 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.cpp @@ -0,0 +1,386 @@ +#include "FillBadDataBFS.hpp" + +#include "FillBadData.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/DataGroupUtilities.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" + +using namespace nx::core; + +namespace +{ +// ----------------------------------------------------------------------------- +// FillBadDataUpdateTuples +// ----------------------------------------------------------------------------- +// Copies cell data array values from a good neighbor voxel to each bad data +// voxel. The `neighbors` vector maps each voxel index to the index of its best +// source neighbor (determined by majority vote in the iterative fill loop). +// +// Only voxels satisfying ALL of the following conditions are updated: +// - featureId < 0 (marked as small bad-data region needing fill) +// - neighbor != -1 (a valid source neighbor was found) +// - neighbor != tupleIndex (not self-referencing; default sentinel) +// - featureIds[neighbor] > 0 (the source is a real feature, not bad data) +// +// All components of the tuple are copied (e.g., 3-component RGB, 6-component +// tensor, etc.), preserving multi-component array semantics. +// ----------------------------------------------------------------------------- +template +void FillBadDataUpdateTuples(const Int32AbstractDataStore& featureIds, AbstractDataStore& outputDataStore, const std::vector& neighbors) +{ + usize start = 0; + usize stop = outputDataStore.getNumberOfTuples(); + const usize numComponents = outputDataStore.getNumberOfComponents(); + for(usize tupleIndex = start; tupleIndex < stop; tupleIndex++) + { + const int32 featureName = featureIds[tupleIndex]; + const int32 neighbor = neighbors[tupleIndex]; + if(neighbor == tupleIndex) + { + continue; + } + + if(featureName < 0 && neighbor != -1 && featureIds[static_cast(neighbor)] > 0) + { + for(usize i = 0; i < numComponents; i++) + { + auto value = outputDataStore[neighbor * numComponents + i]; + outputDataStore[tupleIndex * numComponents + i] = value; + } + } + } +} + +struct FillBadDataUpdateTuplesFunctor +{ + template + void operator()(const Int32AbstractDataStore& featureIds, IDataArray* outputIDataArray, const std::vector& neighbors) + { + auto& outputStore = outputIDataArray->template getIDataStoreRefAs>(); + FillBadDataUpdateTuples(featureIds, outputStore, neighbors); + } +}; +} // namespace + +// ============================================================================= +FillBadDataBFS::FillBadDataBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +FillBadDataBFS::~FillBadDataBFS() noexcept = default; + +// ============================================================================= +// FillBadDataBFS::operator() +// ============================================================================= +// BFS-based flood-fill algorithm for replacing bad data voxels with values +// from neighboring good features. The algorithm has three main steps: +// +// Step 1: Find the maximum feature ID (and optionally maximum phase). +// +// Step 2: BFS flood-fill to discover connected regions of bad data +// (featureId == 0). Each region is classified by size: +// - Large regions (>= minAllowedDefectSize): kept as voids (featureId +// stays 0, optionally assigned a new phase). +// - Small regions (< threshold): marked with featureId = -1 for filling. +// +// Step 3: Iterative morphological dilation. Each iteration scans all -1 +// voxels, finds the neighboring good feature with the most face-adjacent +// votes (majority vote), and records the best neighbor. Then copies all +// cell data components from that neighbor to the -1 voxel. Repeats until +// no -1 voxels remain. FeatureIds are updated LAST to avoid changing the +// vote source mid-iteration. +// +// NOTE: This algorithm uses O(N) memory (neighbors + alreadyChecked + +// featureNumber vectors), making it unsuitable for very large OOC datasets. +// Use FillBadDataCCL for out-of-core compatible processing. +// ============================================================================= +Result<> FillBadDataBFS::operator()() +{ + auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); + const size_t totalPoints = featureIdsStore.getNumberOfTuples(); + + // O(N) allocations: one int32 per voxel for neighbor mapping, one bit per + // voxel for BFS visited tracking + std::vector neighbors(totalPoints, -1); + std::vector alreadyChecked(totalPoints, false); + + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + + Int32Array* cellPhasesPtr = nullptr; + + if(m_InputValues->storeAsNewPhase) + { + cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); + } + + std::array dims = { + static_cast(udims[0]), + static_cast(udims[1]), + static_cast(udims[2]), + }; + + size_t count = 1; + size_t numFeatures = 0; + size_t maxPhase = 0; + + // --- Step 1: Find the maximum feature ID across all voxels ---------------- + // This value is used to size the featureNumber vote counter in Step 3. + for(size_t i = 0; i < totalPoints; i++) + { + int32 featureName = featureIdsStore[i]; + if(featureName > numFeatures) + { + numFeatures = featureName; + } + } + + // Optionally find the maximum phase so large void regions can be assigned + // to (maxPhase + 1), creating a distinct phase for visualization. + if(m_InputValues->storeAsNewPhase) + { + for(size_t i = 0; i < totalPoints; i++) + { + if((*cellPhasesPtr)[i] > maxPhase) + { + maxPhase = (*cellPhasesPtr)[i]; + } + } + } + + // Face-neighbor offsets in flat index space: -Z, -Y, -X, +X, +Y, +Z + std::array neighborPoints = {-dims[0] * dims[1], -dims[0], -1, 1, dims[0], dims[0] * dims[1]}; + std::vector currentVisitedList; + + // --- Step 2: BFS flood-fill to classify bad data regions ------------------ + // Mark all non-zero voxels as already checked (they are good features). + // Then BFS from each unchecked voxel with featureId == 0 to discover + // contiguous bad data regions. + for(size_t iter = 0; iter < totalPoints; iter++) + { + alreadyChecked[iter] = false; + if(featureIdsStore[iter] != 0) + { + alreadyChecked[iter] = true; + } + } + + for(size_t i = 0; i < totalPoints; i++) + { + if(!alreadyChecked[i] && featureIdsStore[i] == 0) + { + // Start a new BFS from this seed voxel to discover all connected + // bad-data voxels in this region + currentVisitedList.push_back(static_cast(i)); + count = 0; + while(count < currentVisitedList.size()) + { + int64_t index = currentVisitedList[count]; + int64 column = index % dims[0]; + int64 row = (index / dims[0]) % dims[1]; + int64 plane = index / (dims[0] * dims[1]); + // Check all 6 face-adjacent neighbors, with boundary guard checks + for(int32_t j = 0; j < 6; j++) + { + int64_t neighbor = index + neighborPoints[j]; + if(j == 0 && plane == 0) + { + continue; + } + if(j == 5 && plane == (dims[2] - 1)) + { + continue; + } + if(j == 1 && row == 0) + { + continue; + } + if(j == 4 && row == (dims[1] - 1)) + { + continue; + } + if(j == 2 && column == 0) + { + continue; + } + if(j == 3 && column == (dims[0] - 1)) + { + continue; + } + if(featureIdsStore[neighbor] == 0 && !alreadyChecked[neighbor]) + { + currentVisitedList.push_back(neighbor); + alreadyChecked[neighbor] = true; + } + } + count++; + } + // Classify this region by size: + // Large regions (>= threshold): keep as voids (featureId = 0), + // optionally assign to a new phase for visualization. + if((int32_t)currentVisitedList.size() >= m_InputValues->minAllowedDefectSizeValue) + { + for(const auto& currentIndex : currentVisitedList) + { + featureIdsStore[currentIndex] = 0; + if(m_InputValues->storeAsNewPhase) + { + (*cellPhasesPtr)[currentIndex] = static_cast(maxPhase) + 1; + } + } + } + // Small regions (< threshold): mark with -1 to indicate they should + // be filled in Step 3 by copying data from neighboring good features. + if((int32_t)currentVisitedList.size() < m_InputValues->minAllowedDefectSizeValue) + { + for(const auto& currentIndex : currentVisitedList) + { + featureIdsStore[currentIndex] = -1; + } + } + currentVisitedList.clear(); + } + } + + // --- Step 3: Iterative morphological dilation ----------------------------- + // Vote counter indexed by feature ID. O(numFeatures) memory. + std::vector featureNumber(numFeatures + 1, 0); + + // Collect all cell data arrays that need updating when a voxel is filled + // (excludes user-specified ignored arrays) + std::optional> allChildArrays = GetAllChildDataPaths(m_DataStructure, selectedImageGeom.getCellDataPath(), DataObject::Type::DataArray, m_InputValues->ignoredDataArrayPaths); + std::vector voxelArrayNames; + if(allChildArrays.has_value()) + { + voxelArrayNames = allChildArrays.value(); + } + + // Iterate until no -1 voxels remain. Each iteration grows the good-data + // boundary inward by one voxel layer (morphological dilation). + while(count != 0) + { + count = 0; + for(size_t i = 0; i < totalPoints; i++) + { + int32 featureName = featureIdsStore[i]; + if(featureName < 0) + { + count++; + int32 most = 0; + int64 xIndex = static_cast(i % dims[0]); + int64 yIndex = static_cast((i / dims[0]) % dims[1]); + int64 zIndex = static_cast(i / (dims[0] * dims[1])); + + // First neighbor loop: tally votes from face-adjacent good features. + // Each good neighbor increments featureNumber[its featureId]. The + // feature with the highest vote count wins (majority vote), and + // neighbors[i] records the winning neighbor's voxel index. + for(int32_t j = 0; j < 6; j++) + { + auto neighborPoint = static_cast(i + neighborPoints[j]); + if(j == 0 && zIndex == 0) + { + continue; + } + if(j == 5 && zIndex == (dims[2] - 1)) + { + continue; + } + if(j == 1 && yIndex == 0) + { + continue; + } + if(j == 4 && yIndex == (dims[1] - 1)) + { + continue; + } + if(j == 2 && xIndex == 0) + { + continue; + } + if(j == 3 && xIndex == (dims[0] - 1)) + { + continue; + } + + int32 feature = featureIdsStore[neighborPoint]; + if(feature > 0) + { + featureNumber[feature]++; + int32 current = featureNumber[feature]; + if(current > most) + { + most = current; + neighbors[i] = static_cast(neighborPoint); + } + } + } + // Second neighbor loop: reset the vote counters for only the features + // that were incremented above. This avoids zeroing the entire + // featureNumber vector (which would be O(numFeatures) per voxel). + for(int32_t j = 0; j < 6; j++) + { + int64 neighborPoint = static_cast(i) + neighborPoints[j]; + if(j == 0 && zIndex == 0) + { + continue; + } + if(j == 5 && zIndex == (dims[2] - 1)) + { + continue; + } + if(j == 1 && yIndex == 0) + { + continue; + } + if(j == 4 && yIndex == (dims[1] - 1)) + { + continue; + } + if(j == 2 && xIndex == 0) + { + continue; + } + if(j == 3 && xIndex == (dims[0] - 1)) + { + continue; + } + + int32 feature = featureIdsStore[neighborPoint]; + if(feature > 0) + { + featureNumber[feature] = 0; + } + } + } + } + + // Apply fills: update all non-featureIds cell arrays first by copying + // all components from the winning neighbor to the bad voxel. + for(const auto& cellArrayPath : voxelArrayNames) + { + if(cellArrayPath == m_InputValues->featureIdsArrayPath) + { + continue; + } + auto* oldCellArray = m_DataStructure.getDataAs(cellArrayPath); + + ExecuteDataFunction(FillBadDataUpdateTuplesFunctor{}, oldCellArray->getDataType(), featureIdsStore, oldCellArray, neighbors); + } + + // Update FeatureIds LAST: the FillBadDataUpdateTuples calls above rely + // on featureIds to check that the source neighbor is still a valid good + // feature (featureId > 0). If featureIds were updated first, a freshly + // filled voxel could become a vote source before its other arrays were + // copied, leading to inconsistent data. + FillBadDataUpdateTuples(featureIdsStore, featureIdsStore, neighbors); + } + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.hpp new file mode 100644 index 0000000000..6291ce0e3c --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataBFS.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ + +struct FillBadDataInputValues; + +/** + * @class FillBadDataBFS + * @brief BFS flood-fill algorithm for filling bad data regions. + * + * This is the in-core-optimized implementation. It uses BFS (breadth-first search) + * to identify connected components of bad data, then iteratively fills small regions + * by voting among face neighbors. Uses O(N) temporary buffers (neighbors, alreadyChecked) + * which is efficient when data fits in RAM. + * + * @see FillBadDataCCL for the out-of-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT FillBadDataBFS +{ +public: + /** + * @brief Constructs the BFS fill algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling fill behavior. + */ + FillBadDataBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues); + ~FillBadDataBFS() noexcept; + + FillBadDataBFS(const FillBadDataBFS&) = delete; + FillBadDataBFS(FillBadDataBFS&&) noexcept = delete; + FillBadDataBFS& operator=(const FillBadDataBFS&) = delete; + FillBadDataBFS& operator=(FillBadDataBFS&&) noexcept = delete; + + /** + * @brief Executes the BFS flood-fill algorithm to identify and fill bad data regions. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + +private: + DataStructure& m_DataStructure; + const FillBadDataInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.cpp new file mode 100644 index 0000000000..fdf843ecc1 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.cpp @@ -0,0 +1,605 @@ +#include "FillBadDataCCL.hpp" + +#include "FillBadData.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/DataGroupUtilities.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" +#include "simplnx/Utilities/MessageHelper.hpp" +#include "simplnx/Utilities/NeighborUtilities.hpp" + +#include + +using namespace nx::core; + +// ============================================================================= +// FillBadData Algorithm Overview +// ============================================================================= +// +// This file implements an optimized algorithm for filling bad data (voxels with +// FeatureId == 0) in image geometries. The algorithm handles out-of-core datasets +// efficiently by processing data in chunks and uses a four-phase approach: +// +// Phase 1: Chunk-Sequential Connected Component Labeling (CCL) +// - Process chunks sequentially, assigning provisional labels to bad data regions +// - Use Union-Find to track equivalences between labels across chunk boundaries +// - Track size of each connected component +// +// Phase 2: Global Resolution +// - Flatten Union-Find structure to resolve all equivalences +// - Accumulate region sizes to root labels +// +// Phase 3: Region Classification and Relabeling +// - Classify regions as "small" (below threshold) or "large" (above threshold) +// - Small regions: mark with -1 for filling in Phase 4 +// - Large regions: keep as 0 or assign to new phase (if requested) +// +// Phase 4: Iterative Morphological Fill (On-Disk Deferred) +// - Uses a temporary file to defer fills: Pass 1 writes (dest, src) pairs, +// Pass 2 reads them back and applies fills. +// - No O(N) memory allocations — uses O(features) vote counters + temp file I/O. +// +// ============================================================================= + +namespace +{ +// ----------------------------------------------------------------------------- +// Helper: Copy all components of a single tuple from src to dest in a data store. +// ----------------------------------------------------------------------------- +template +void copyTuple(AbstractDataStore& store, int64 dest, int64 src) +{ + const usize numComp = store.getNumberOfComponents(); + for(usize c = 0; c < numComp; c++) + { + store[dest * numComp + c] = store[src * numComp + c]; + } +} + +// Functor for type-dispatched single-tuple copy +struct CopyTupleFunctor +{ + template + void operator()(IDataArray* dataArray, int64 dest, int64 src) + { + auto& store = dataArray->template getIDataStoreRefAs>(); + copyTuple(store, dest, src); + } +}; + +// RAII wrapper for std::FILE* that guarantees cleanup of the temporary file +// on destruction. This ensures the temp file is closed (and thus deleted by +// the OS, since std::tmpfile creates an anonymous file) even if Phase 4 +// returns early due to cancellation or error. Copy/assignment are deleted +// to enforce single-ownership semantics. +struct TempFileGuard +{ + std::FILE* file = nullptr; + + TempFileGuard() = default; + ~TempFileGuard() + { + if(file != nullptr) + { + std::fclose(file); + } + } + + TempFileGuard(const TempFileGuard&) = delete; + TempFileGuard& operator=(const TempFileGuard&) = delete; +}; +} // namespace + +// ============================================================================= +// FillBadData Implementation +// ============================================================================= + +FillBadDataCCL::FillBadDataCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +FillBadDataCCL::~FillBadDataCCL() noexcept = default; + +// ----------------------------------------------------------------------------- +const std::atomic_bool& FillBadDataCCL::getCancel() const +{ + return m_ShouldCancel; +} + +// ============================================================================= +// PHASE 1: Chunk-Sequential Connected Component Labeling (CCL) +// ============================================================================= +// +// Performs connected component labeling on bad data voxels (FeatureId == 0) +// using a chunk-sequential scanline algorithm. Uses positive labels and an +// in-memory provisional labels buffer to avoid cross-chunk OOC reads. +// +// @param featureIdsStore The feature IDs data store (maybe out-of-core) +// @param unionFind Union-Find structure for tracking label equivalences +// @param nextLabel Next label to assign (incremented as new labels are created) +// @param dims Image dimensions [X, Y, Z] +// ============================================================================= +void FillBadDataCCL::phaseOneCCL(Int32AbstractDataStore& featureIdsStore, UnionFind& unionFind, int32& nextLabel, const std::array& dims) +{ + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + const usize sliceSize = static_cast(dims[0]) * static_cast(dims[1]); + + // Rolling 2-slice buffer for backward neighbor label reads. + // The scanline CCL algorithm only needs to look at three backward neighbors: + // x-1 (same slice), y-1 (same slice), and z-1 (previous slice). So we only + // need the current and immediately previous Z-slice labels in memory. The + // buffer alternates between even/odd Z indices via (z % 2) indexing. + // This gives O(dimX * dimY) memory instead of O(volume). + std::vector labelBuffer(2 * sliceSize, 0); + + // Track the last Z-slice index whose buffer region was cleared. This is + // an optimization for the case where a single Z-slice spans multiple OOC + // chunks (e.g., chunk shape that splits within a Z-plane). Without this + // guard, re-entering the same Z from the next chunk would zero out labels + // already written by the previous chunk for that same Z-slice. + int64 lastClearedZ = -1; + + // Process each chunk sequentially + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + featureIdsStore.loadChunk(chunkIdx); + + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + // Clear current slice in rolling buffer only when entering a NEW z value. + // A single Z-slice may span multiple chunks, so we must not re-clear + // data written by a previous chunk for the same z. + const usize curOff = (z % 2) * sliceSize; + if(static_cast(z) != lastClearedZ) + { + std::fill(labelBuffer.begin() + curOff, labelBuffer.begin() + curOff + sliceSize, 0); + lastClearedZ = static_cast(z); + } + const usize prevOff = ((z + 1) % 2) * sliceSize; + + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * sliceSize + y * static_cast(dims[0]) + x; + const usize inSlice = y * static_cast(dims[0]) + x; + + // Only process bad data voxels (FeatureId == 0) + if(featureIdsStore[index] != 0) + { + continue; + } + + // Check backward neighbors using rolling buffer + int32 assignedLabel = 0; + + if(x > 0) + { + int32 neighLabel = labelBuffer[curOff + inSlice - 1]; + if(neighLabel > 0) + { + assignedLabel = neighLabel; + } + } + + if(y > 0) + { + int32 neighLabel = labelBuffer[curOff + inSlice - static_cast(dims[0])]; + if(neighLabel > 0) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + + if(z > 0) + { + int32 neighLabel = labelBuffer[prevOff + inSlice]; + if(neighLabel > 0) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + + if(assignedLabel == 0) + { + assignedLabel = nextLabel++; + unionFind.find(assignedLabel); + } + + // Write the provisional label to both the rolling buffer (for + // backward neighbor reads by subsequent voxels) and the featureIds + // store (persisted for Phases 2-3 to read back). + labelBuffer[curOff + inSlice] = assignedLabel; + featureIdsStore[index] = assignedLabel; + + // Accumulate region size: each voxel contributes 1 to its label. + // After Phase 2 flattening, sizes are aggregated to root labels + // so we can classify regions by total voxel count. + unionFind.addSize(assignedLabel, 1); + } + } + } + } + + featureIdsStore.flush(); +} + +// ============================================================================= +// PHASE 2: Global Resolution of Equivalences +// ============================================================================= +void FillBadDataCCL::phaseTwoGlobalResolution(UnionFind& unionFind) +{ + unionFind.flatten(); +} + +// ============================================================================= +// PHASE 3: Region Classification and Relabeling +// ============================================================================= +// +// Classifies bad data regions as "small" or "large" based on size threshold: +// - Small regions (< minAllowedDefectSize): marked with -1 for filling in Phase 4 +// - Large regions (>= minAllowedDefectSize): kept as 0 (or assigned new phase) +// ============================================================================= +void FillBadDataCCL::phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, int32 startLabel, int32 nextLabel, UnionFind& unionFind, usize maxPhase) const +{ + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + + // Build a vector-based classification: isSmallRoot[label] = 1 if small, 0 if large. + // + // The startLabel boundary is critical: provisional CCL labels were assigned + // starting at (maxExistingFeatureId + 1) during Phase 1, so labels in the + // range [1, startLabel) are original good feature IDs that must NOT be + // touched. Only labels in [startLabel, nextLabel) are CCL-assigned bad-data + // region labels that need classification and relabeling. + std::vector isSmallRoot(static_cast(nextLabel), 0); + for(int32 label = startLabel; label < nextLabel; label++) + { + int64 root = unionFind.find(label); + if(root == label) + { + uint64 regionSize = unionFind.getSize(root); + if(regionSize < static_cast(m_InputValues->minAllowedDefectSizeValue)) + { + isSmallRoot[root] = 1; + } + } + } + + // Read provisional labels from featureIds store (written during Phase 1) + // and relabel based on region classification. + // Only voxels with label >= startLabel are provisional CCL labels (bad data). + // Voxels with label in [1, startLabel) are original good feature IDs — leave them alone. + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + featureIdsStore.loadChunk(chunkIdx); + + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * udims[0] * udims[1] + y * udims[0] + x; + + int32 label = featureIdsStore[index]; + if(label >= startLabel) + { + int64 root = unionFind.find(label); + + if(isSmallRoot[root] != 0) + { + featureIdsStore[index] = -1; + } + else + { + featureIdsStore[index] = 0; + + if(m_InputValues->storeAsNewPhase && cellPhasesPtr != nullptr) + { + (*cellPhasesPtr)[index] = static_cast(maxPhase) + 1; + } + } + } + } + } + } + } + + featureIdsStore.flush(); +} + +// ============================================================================= +// PHASE 4: Iterative Morphological Fill (On-Disk Deferred) +// ============================================================================= +// +// Uses a temporary file to avoid O(N) memory allocations. Each iteration: +// Pass 1 (Vote): Scan voxels chunk-sequentially. For each -1 voxel, find the +// best positive-featureId neighbor via majority vote. Write (dest, src) pairs +// to a temp file. featureIds is read-only during this pass. +// Pass 2 (Apply): Read pairs back from the temp file. Copy all cell data array +// components from src to dest. Update featureIds last. +// ============================================================================= +Result<> FillBadDataCCL::phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, usize numFeatures) const +{ + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + + std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); + std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); + + // Feature vote counter: O(features) not O(voxels) + std::vector featureNumber(numFeatures + 1, 0); + + // Get cell arrays that need updating during filling + std::optional> allChildArrays = GetAllChildDataPaths(m_DataStructure, selectedImageGeom.getCellDataPath(), DataObject::Type::DataArray, m_InputValues->ignoredDataArrayPaths); + std::vector voxelArrayNames; + if(allChildArrays.has_value()) + { + voxelArrayNames = allChildArrays.value(); + } + + // Open a temporary file for deferred fill pairs. We use a temp file instead + // of an O(N) in-memory neighbors vector so that Phase 4 stays OOC-friendly. + // Pass 1 writes (dest, src) index pairs to the file; Pass 2 reads them back + // and applies the fills. This two-pass approach ensures that featureIds are + // read-only during the vote scan (Pass 1), so all votes see the pre-iteration + // state. The TempFileGuard RAII wrapper guarantees the file is closed even + // if an early return or error occurs, preventing temp file leaks. + TempFileGuard tmpGuard; + tmpGuard.file = std::tmpfile(); + if(tmpGuard.file == nullptr) + { + return MakeErrorResult(-87010, "Phase 4/4: Failed to create temporary file for deferred fill"); + } + + MessageHelper messageHelper(m_MessageHandler, std::chrono::milliseconds(1000)); + auto throttledMessenger = messageHelper.createThrottledMessenger(std::chrono::milliseconds(1000)); + + usize count = 1; + usize iteration = 0; + usize pairsWritten = 0; + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + + while(count != 0) + { + iteration++; + count = 0; + + // Rewind for this iteration's writes + std::rewind(tmpGuard.file); + pairsWritten = 0; + + // Pass 1 (Vote): Chunk-sequential scan writing (dest, src) pairs to temp file. + // featureIds is read-only during this pass — two-pass semantics are automatic. + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(m_ShouldCancel) + { + return {}; + } + + featureIdsStore.loadChunk(chunkIdx); + const auto lower = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto upper = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = lower[0]; z <= upper[0]; z++) + { + for(usize y = lower[1]; y <= upper[1]; y++) + { + for(usize x = lower[2]; x <= upper[2]; x++) + { + const int64 voxelIndex = static_cast(z) * dims[0] * dims[1] + static_cast(y) * dims[0] + static_cast(x); + int32 featureName = featureIdsStore[voxelIndex]; + + if(featureName < 0) + { + count++; + int32 most = 0; + int64 bestNeighbor = -1; + + std::array isValidFaceNeighbor = computeValidFaceNeighbors(static_cast(x), static_cast(y), static_cast(z), dims); + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + + auto neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; + int32 feature = featureIdsStore[neighborPoint]; + + if(feature > 0) + { + featureNumber[feature]++; + int32 current = featureNumber[feature]; + if(current > most) + { + most = current; + bestNeighbor = neighborPoint; + } + } + } + + // Reset vote counters by re-visiting only the neighbors that + // were actually incremented above. This sets featureNumber[feature] + // back to 0 for each neighbor's feature, avoiding the need to zero + // the entire featureNumber vector (which would be O(numFeatures) + // per voxel). Since at most 6 neighbors are visited, this reset + // is O(1) per voxel. + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + auto neighborPoint = voxelIndex + neighborVoxelIndexOffsets[faceIndex]; + int32 feature = featureIdsStore[neighborPoint]; + if(feature > 0) + { + featureNumber[feature] = 0; + } + } + + // Write (dest, src) pair to temp file if a valid neighbor was found + if(bestNeighbor >= 0) + { + std::array pair = {voxelIndex, bestNeighbor}; + if(std::fwrite(pair.data(), sizeof(int64), 2, tmpGuard.file) != 2) + { + return MakeErrorResult(-87012, "Phase 4/4: Failed to write fill pair to temporary file"); + } + pairsWritten++; + } + } + } + } + } + } + + if(count == 0) + { + break; + } + + // Pass 2 (Apply): Read (dest, src) pairs from temp file and apply fills. + // This is a two-pass apply strategy: + // - First pass: update all non-featureIds cell arrays (phases, orientations, + // etc.) by copying all components from src to dest. + // - Second pass: update featureIds last. + // The reason featureIds must be updated LAST is that during the first pass, + // the src voxel's featureId must remain valid (> 0) so that subsequent + // iterations correctly identify it as a good-data source. If featureIds were + // updated alongside other arrays, a dest voxel that just received a new + // featureId could be read as a source before its other arrays were copied. + std::rewind(tmpGuard.file); + std::array pair; + + // First pass over pairs: update all non-featureIds cell arrays + for(usize pairIdx = 0; pairIdx < pairsWritten && std::fread(pair.data(), sizeof(int64), 2, tmpGuard.file) == 2; pairIdx++) + { + int64 dest = pair[0]; + int64 src = pair[1]; + + for(const auto& cellArrayPath : voxelArrayNames) + { + if(cellArrayPath == m_InputValues->featureIdsArrayPath) + { + continue; + } + auto* cellArray = m_DataStructure.getDataAs(cellArrayPath); + ExecuteDataFunction(CopyTupleFunctor{}, cellArray->getDataType(), cellArray, dest, src); + } + } + + // Second pass over pairs: update featureIds last (see explanation above) + std::rewind(tmpGuard.file); + for(usize pairIdx = 0; pairIdx < pairsWritten && std::fread(pair.data(), sizeof(int64), 2, tmpGuard.file) == 2; pairIdx++) + { + int64 dest = pair[0]; + int64 src = pair[1]; + featureIdsStore[dest] = featureIdsStore[src]; + } + + featureIdsStore.flush(); + + throttledMessenger.sendThrottledMessage([iteration, count]() { return fmt::format(" Iteration {}: {} voxels remaining to fill", iteration, count); }); + } + + m_MessageHandler({IFilter::Message::Type::Info, fmt::format(" Completed in {} iteration{}", iteration, iteration == 1 ? "" : "s")}); + return {}; +} + +// ============================================================================= +// Main Algorithm Entry Point +// ============================================================================= +Result<> FillBadDataCCL::operator()() +{ + auto& featureIdsStore = m_DataStructure.getDataAs(m_InputValues->featureIdsArrayPath)->getDataStoreRef(); + const auto& selectedImageGeom = m_DataStructure.getDataRefAs(m_InputValues->inputImageGeometry); + const SizeVec3 udims = selectedImageGeom.getDimensions(); + + std::array dims = { + static_cast(udims[0]), + static_cast(udims[1]), + static_cast(udims[2]), + }; + + const usize totalPoints = featureIdsStore.getNumberOfTuples(); + + // Get cell phases array if we need to assign large regions to a new phase + Int32Array* cellPhasesPtr = nullptr; + usize maxPhase = 0; + + if(m_InputValues->storeAsNewPhase) + { + cellPhasesPtr = m_DataStructure.getDataAs(m_InputValues->cellPhasesArrayPath); + } + + // Single pass: find max feature ID and optionally max phase + usize numFeatures = 0; + for(usize i = 0; i < totalPoints; i++) + { + int32 featureName = featureIdsStore[i]; + if(featureName > numFeatures) + { + numFeatures = featureName; + } + if(cellPhasesPtr != nullptr && (*cellPhasesPtr)[i] > maxPhase) + { + maxPhase = (*cellPhasesPtr)[i]; + } + } + + // Initialize data structures for connected component labeling. + // Start provisional labels AFTER the max existing feature ID to avoid collisions. + // Existing feature IDs are in [1, numFeatures], so provisional labels start at numFeatures+1. + UnionFind unionFind; + const int32 startLabel = static_cast(numFeatures) + 1; + int32 nextLabel = startLabel; + + // Phase 1: Chunk-Sequential Connected Component Labeling + // Uses a 2-slice rolling buffer (O(slice) memory) for backward neighbor reads. + // Writes provisional labels to featureIds store for Phases 2-3. + m_MessageHandler({IFilter::Message::Type::Info, "Phase 1/4: Labeling connected components..."}); + phaseOneCCL(featureIdsStore, unionFind, nextLabel, dims); + + // Phase 2: Global Resolution of equivalences + m_MessageHandler({IFilter::Message::Type::Info, "Phase 2/4: Resolving region equivalences..."}); + phaseTwoGlobalResolution(unionFind); + + // Phase 3: Relabeling based on region size classification + // Reads provisional labels from featureIds store (written during Phase 1) + m_MessageHandler({IFilter::Message::Type::Info, "Phase 3/4: Classifying region sizes..."}); + phaseThreeRelabeling(featureIdsStore, cellPhasesPtr, startLabel, nextLabel, unionFind, maxPhase); + + // Phase 4: Iterative morphological fill + m_MessageHandler({IFilter::Message::Type::Info, "Phase 4/4: Filling small defects..."}); + return phaseFourIterativeFill(featureIdsStore, dims, numFeatures); +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.hpp new file mode 100644 index 0000000000..4822e180a7 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/FillBadDataCCL.hpp @@ -0,0 +1,76 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" +#include "simplnx/Utilities/UnionFind.hpp" + +namespace nx::core +{ + +// Forward declarations +template +class DataArray; +using Int32Array = DataArray; + +template +class AbstractDataStore; +using Int32AbstractDataStore = AbstractDataStore; + +struct FillBadDataInputValues; + +/** + * @class FillBadDataCCL + * @brief CCL-based algorithm for filling bad data regions, optimized for out-of-core. + * + * Uses chunk-sequential connected component labeling with a 2-slice rolling buffer + * to avoid O(N) memory allocations. Designed for datasets that may exceed available RAM. + * + * @see FillBadDataBFS for the in-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT FillBadDataCCL +{ +public: + /** + * @brief Constructs the CCL fill algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling fill behavior. + */ + FillBadDataCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const FillBadDataInputValues* inputValues); + ~FillBadDataCCL() noexcept; + + FillBadDataCCL(const FillBadDataCCL&) = delete; + FillBadDataCCL(FillBadDataCCL&&) noexcept = delete; + FillBadDataCCL& operator=(const FillBadDataCCL&) = delete; + FillBadDataCCL& operator=(FillBadDataCCL&&) noexcept = delete; + + /** + * @brief Executes the CCL-based algorithm to identify and fill bad data regions. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + + /** + * @brief Returns the cancellation flag reference. + * @return Reference to the atomic cancellation flag. + */ + const std::atomic_bool& getCancel() const; + +private: + static void phaseOneCCL(Int32AbstractDataStore& featureIdsStore, UnionFind& unionFind, int32& nextLabel, const std::array& dims); + static void phaseTwoGlobalResolution(UnionFind& unionFind); + void phaseThreeRelabeling(Int32AbstractDataStore& featureIdsStore, Int32Array* cellPhasesPtr, int32 startLabel, int32 nextLabel, UnionFind& unionFind, usize maxPhase) const; + Result<> phaseFourIterativeFill(Int32AbstractDataStore& featureIdsStore, const std::array& dims, usize numFeatures) const; + + DataStructure& m_DataStructure; + const FillBadDataInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp index 00ee05cc31..624d7b60b2 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySample.cpp @@ -1,394 +1,13 @@ #include "IdentifySample.hpp" +#include "IdentifySampleBFS.hpp" +#include "IdentifySampleCCL.hpp" + #include "simplnx/DataStructure/DataArray.hpp" -#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" -#include "simplnx/Utilities/FilterUtilities.hpp" -#include "simplnx/Utilities/NeighborUtilities.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" using namespace nx::core; -namespace -{ -struct IdentifySampleFunctor -{ - template - void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) - { - ShapeType cDims = {1}; - auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); - - const auto totalPoints = static_cast(goodVoxelsPtr->getNumberOfTuples()); - - SizeVec3 udims = imageGeom->getDimensions(); - - std::array dims = { - static_cast(udims[0]), - static_cast(udims[1]), - static_cast(udims[2]), - }; - - int64_t neighborPoint = 0; - std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); - std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); - - std::vector currentVList; - std::vector checked(totalPoints, false); - std::vector sample(totalPoints, false); - int64 biggestBlock = 0; - - // In this loop over the data we are finding the biggest contiguous set of GoodVoxels and calling that the 'sample' All GoodVoxels that do not touch the 'sample' - // are flipped to be called 'bad' voxels or 'not sample' - float threshold = 0.0f; - for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) - { - if(shouldCancel) - { - return; - } - const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; - if(percentIncrement > threshold) - { - messageHandler(IFilter::Message::Type::Info, fmt::format("Completed: {}", percentIncrement)); - threshold = threshold + 5.0f; - if(threshold < percentIncrement) - { - threshold = percentIncrement; - } - } - - if(!checked[voxelIndex] && goodVoxels.getValue(voxelIndex)) - { - currentVList.push_back(voxelIndex); - usize count = 0; - while(count < currentVList.size()) - { - int64 index = currentVList[count]; - int64 xIdx = index % dims[0]; - int64 yIdx = (index / dims[0]) % dims[1]; - int64 zIdx = index / (dims[0] * dims[1]); - std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; - - if(!checked[neighborPoint] && goodVoxels.getValue(neighborPoint)) - { - currentVList.push_back(neighborPoint); - checked[neighborPoint] = true; - } - } - count++; - } - if(static_cast(currentVList.size()) >= biggestBlock) - { - biggestBlock = currentVList.size(); - sample.assign(totalPoints, false); - for(int64 j = 0; j < biggestBlock; j++) - { - sample[currentVList[j]] = true; - } - } - currentVList.clear(); - } - } - for(int64 i = 0; i < totalPoints; i++) - { - if(!sample[i] && goodVoxels.getValue(i)) - { - goodVoxels.setValue(i, false); - } - } - sample.clear(); - checked.assign(totalPoints, false); - - // In this loop we are going to 'close' all the 'holes' inside the region already identified as the 'sample' if the user chose to do so. - // This is done by flipping all 'bad' voxel features that do not touch the outside of the sample (i.e. they are fully contained inside the 'sample'). - threshold = 0.0F; - if(fillHoles) - { - messageHandler(IFilter::Message::Type::Info, fmt::format("Filling holes in sample...")); - - bool touchesBoundary = false; - for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) - { - if(shouldCancel) - { - return; - } - const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; - if(percentIncrement > threshold) - { - threshold = threshold + 5.0f; - if(threshold < percentIncrement) - { - threshold = percentIncrement; - } - } - - if(!checked[voxelIndex] && !goodVoxels.getValue(voxelIndex)) - { - currentVList.push_back(voxelIndex); - usize count = 0; - touchesBoundary = false; - while(count < currentVList.size()) - { - int64 index = currentVList[count]; - int64 xIdx = index % dims[0]; - int64 yIdx = (index / dims[0]) % dims[1]; - int64 zIdx = index / (dims[0] * dims[1]); - if(xIdx == 0 || xIdx == (dims[0] - 1) || yIdx == 0 || yIdx == (dims[1] - 1) || zIdx == 0 || zIdx == (dims[2] - 1)) - { - touchesBoundary = true; - } - // Loop over the 6 face neighbors of the voxel - std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); - for(const auto& faceIndex : faceNeighborInternalIdx) - { - if(!isValidFaceNeighbor[faceIndex]) - { - continue; - } - neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; - - if(!checked[neighborPoint] && !goodVoxels.getValue(neighborPoint)) - { - currentVList.push_back(neighborPoint); - checked[neighborPoint] = true; - } - } - count++; - } - if(!touchesBoundary) - { - for(int64_t j : currentVList) - { - goodVoxels.setValue(j, true); - } - } - currentVList.clear(); - } - } - } - checked.clear(); - } -}; - -struct IdentifySampleSliceBySliceFunctor -{ - enum class Plane - { - XY, - XZ, - YZ - }; - - template - void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, Plane plane, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) - { - auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); - - SizeVec3 uDims = imageGeom->getDimensions(); - const int64 dimX = static_cast(uDims[0]); - const int64 dimY = static_cast(uDims[1]); - const int64 dimZ = static_cast(uDims[2]); - - int64 planeDim1, planeDim2, fixedDim; - int64 stride1, stride2, fixedStride; - - switch(plane) - { - case Plane::XY: - planeDim1 = dimX; - planeDim2 = dimY; - fixedDim = dimZ; - stride1 = 1; - stride2 = dimX; - fixedStride = dimX * dimY; - break; - - case Plane::XZ: - planeDim1 = dimX; - planeDim2 = dimZ; - fixedDim = dimY; - stride1 = 1; - stride2 = dimX * dimY; - fixedStride = dimX; - break; - - case Plane::YZ: - planeDim1 = dimY; - planeDim2 = dimZ; - fixedDim = dimX; - stride1 = dimX; - stride2 = dimX * dimY; - fixedStride = 1; - break; - } - - for(int64 fixedIdx = 0; fixedIdx < fixedDim; ++fixedIdx) // Process each slice - { - if(shouldCancel) - { - return; - } - messageHandler(IFilter::Message::Type::Info, fmt::format("Slice {}", fixedIdx)); - - std::vector checked(planeDim1 * planeDim2, false); - std::vector sample(planeDim1 * planeDim2, false); - std::vector currentVList; - int64 biggestBlock = 0; - - // Identify the largest contiguous set of good voxels in the slice - for(int64 p2 = 0; p2 < planeDim2; ++p2) - { - for(int64 p1 = 0; p1 < planeDim1; ++p1) - { - int64 planeIndex = p2 * planeDim1 + p1; - int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; - - if(!checked[planeIndex] && goodVoxels.getValue(globalIndex)) - { - currentVList.push_back(planeIndex); - int64 count = 0; - - while(count < currentVList.size()) - { - int64 localIdx = currentVList[count]; - int64 localP1 = localIdx % planeDim1; - int64 localP2 = localIdx / planeDim1; - - for(int j = 0; j < 4; ++j) - { - int64 dp1[4] = {0, 0, -1, 1}; - int64 dp2[4] = {-1, 1, 0, 0}; - - int64 neighborP1 = localP1 + dp1[j]; - int64 neighborP2 = localP2 + dp2[j]; - - if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) - { - int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; - int64 globalNeighborIdx = fixedIdx * fixedStride + neighborP2 * stride2 + neighborP1 * stride1; - - if(!checked[neighborIdx] && goodVoxels.getValue(globalNeighborIdx)) - { - currentVList.push_back(neighborIdx); - checked[neighborIdx] = true; - } - } - } - count++; - } - - if(static_cast(currentVList.size()) > biggestBlock) - { - biggestBlock = currentVList.size(); - sample.assign(planeDim1 * planeDim2, false); - for(int64 idx : currentVList) - { - sample[idx] = true; - } - } - currentVList.clear(); - } - } - } - if(shouldCancel) - { - return; - } - - for(int64 p2 = 0; p2 < planeDim2; ++p2) - { - for(int64 p1 = 0; p1 < planeDim1; ++p1) - { - int64 planeIndex = p2 * planeDim1 + p1; - int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; - - if(!sample[planeIndex]) - { - goodVoxels.setValue(globalIndex, false); - } - } - } - if(shouldCancel) - { - return; - } - - checked.assign(planeDim1 * planeDim2, false); - if(fillHoles) - { - for(int64 p2 = 0; p2 < planeDim2; ++p2) - { - for(int64 p1 = 0; p1 < planeDim1; ++p1) - { - int64 planeIndex = p2 * planeDim1 + p1; - int64 globalIndex = fixedIdx * fixedStride + p2 * stride2 + p1 * stride1; - - if(!checked[planeIndex] && !goodVoxels.getValue(globalIndex)) - { - currentVList.push_back(planeIndex); - int64 count = 0; - bool touchesBoundary = false; - - while(count < currentVList.size()) - { - int64 localIdx = currentVList[count]; - int64 localP1 = localIdx % planeDim1; - int64 localP2 = localIdx / planeDim1; - - if(localP1 == 0 || localP1 == planeDim1 - 1 || localP2 == 0 || localP2 == planeDim2 - 1) - { - touchesBoundary = true; - } - - for(int j = 0; j < 4; ++j) - { - int64 dp1[4] = {0, 0, -1, 1}; - int64 dp2[4] = {-1, 1, 0, 0}; - - int64 neighborP1 = localP1 + dp1[j]; - int64 neighborP2 = localP2 + dp2[j]; - - if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) - { - int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; - int64 globalNeighborIdx = fixedIdx * fixedStride + neighborP2 * stride2 + neighborP1 * stride1; - - if(!checked[neighborIdx] && !goodVoxels.getValue(globalNeighborIdx)) - { - currentVList.push_back(neighborIdx); - checked[neighborIdx] = true; - } - } - } - count++; - } - - if(!touchesBoundary) - { - for(int64 idx : currentVList) - { - int64 globalP1 = idx % planeDim1; - int64 globalP2 = idx / planeDim1; - goodVoxels.setValue(fixedIdx * fixedStride + globalP2 * stride2 + globalP1 * stride1, true); - } - } - currentVList.clear(); - } - } - } - } - } - } -}; -} // namespace - // ----------------------------------------------------------------------------- IdentifySample::IdentifySample(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, IdentifySampleInputValues* inputValues) : m_DataStructure(dataStructure) @@ -404,18 +23,7 @@ IdentifySample::~IdentifySample() noexcept = default; // ----------------------------------------------------------------------------- Result<> IdentifySample::operator()() { - auto* inputData = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); - const auto* imageGeom = m_DataStructure.getDataAs(m_InputValues->InputImageGeometryPath); - - if(m_InputValues->SliceBySlice) - { - ExecuteDataFunction(IdentifySampleSliceBySliceFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, - static_cast(m_InputValues->SliceBySlicePlaneIndex), m_MessageHandler, m_ShouldCancel); - } - else - { - ExecuteDataFunction(IdentifySampleFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, m_MessageHandler, m_ShouldCancel); - } + auto* maskArray = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); - return {}; + return DispatchAlgorithm({maskArray}, m_DataStructure, m_MessageHandler, m_ShouldCancel, m_InputValues); } diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.cpp new file mode 100644 index 0000000000..6648cb3ee6 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.cpp @@ -0,0 +1,258 @@ +#include "IdentifySampleBFS.hpp" + +#include "IdentifySample.hpp" +#include "IdentifySampleCommon.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" +#include "simplnx/Utilities/NeighborUtilities.hpp" + +using namespace nx::core; + +namespace +{ +// ============================================================================= +// IdentifySampleBFSFunctor +// ============================================================================= +// BFS flood-fill algorithm for identifying the largest connected component of +// "good" voxels in an image geometry, then optionally filling interior holes. +// +// The algorithm has two phases: +// +// Phase 1 (Find Largest Component): +// BFS flood-fill discovers all connected components of good voxels +// (goodVoxels == true). Each component is found by starting BFS from an +// unchecked good voxel and expanding to all face-adjacent good neighbors. +// The largest component by voxel count is tracked as "the sample". After +// all components are found, any good voxels NOT in the largest component +// are set to false (they are noise or satellite regions). +// Uses O(N) memory: checked + sample vectors (std::vector, 1 bit each). +// +// Phase 2 (Hole Fill, optional): +// If fillHoles is true, a second BFS pass runs on bad voxels +// (goodVoxels == false). Each connected component of bad voxels is +// discovered via BFS. During BFS, a `touchesBoundary` flag tracks whether +// any voxel in the component lies on the domain boundary (x/y/z == 0 or +// max). If the component does NOT touch the boundary, it is fully enclosed +// by the sample and is an interior hole -- all its voxels are set to true. +// If it touches the boundary, it is external empty space and left as-is. +// +// NOTE: Uses std::vector (1 bit per voxel) for minimal memory overhead. +// Fast for in-core data where random access is O(1), but causes chunk +// thrashing in OOC mode due to BFS visiting neighbors across chunk boundaries. +// Use IdentifySampleCCL for out-of-core compatible processing. +// ============================================================================= +struct IdentifySampleBFSFunctor +{ + template + void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) + { + auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); + + const auto totalPoints = static_cast(goodVoxelsPtr->getNumberOfTuples()); + + SizeVec3 udims = imageGeom->getDimensions(); + + std::array dims = { + static_cast(udims[0]), + static_cast(udims[1]), + static_cast(udims[2]), + }; + + int64 neighborPoint = 0; + std::array neighborVoxelIndexOffsets = initializeFaceNeighborOffsets(dims); + std::array faceNeighborInternalIdx = initializeFaceNeighborInternalIdx(); + + std::vector currentVList; + std::vector checked(totalPoints, false); // O(N) bits: tracks visited voxels + std::vector sample(totalPoints, false); // O(N) bits: marks voxels in the largest component + int64 biggestBlock = 0; + + // --- Phase 1: Find the largest contiguous set of good voxels ------------ + // BFS flood-fill from each unvisited good voxel. Track the largest + // connected component found so far. + float threshold = 0.0f; + for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) + { + if(shouldCancel) + { + return; + } + const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; + if(percentIncrement > threshold) + { + messageHandler(IFilter::Message::Type::Info, fmt::format("Completed: {}", percentIncrement)); + threshold = threshold + 5.0f; + if(threshold < percentIncrement) + { + threshold = percentIncrement; + } + } + + if(!checked[voxelIndex] && goodVoxels.getValue(voxelIndex)) + { + // Start BFS from this seed voxel to discover one connected component + currentVList.push_back(voxelIndex); + usize count = 0; + while(count < currentVList.size()) + { + int64 index = currentVList[count]; + int64 xIdx = index % dims[0]; + int64 yIdx = (index / dims[0]) % dims[1]; + int64 zIdx = index / (dims[0] * dims[1]); + std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; + + if(!checked[neighborPoint] && goodVoxels.getValue(neighborPoint)) + { + currentVList.push_back(neighborPoint); + checked[neighborPoint] = true; + } + } + count++; + } + // If this component is the largest found so far, record it as the sample + if(static_cast(currentVList.size()) >= biggestBlock) + { + biggestBlock = currentVList.size(); + sample.assign(totalPoints, false); + for(int64 j = 0; j < biggestBlock; j++) + { + sample[currentVList[j]] = true; + } + } + currentVList.clear(); + } + } + // Any good voxels NOT in the largest component are noise/satellites -- + // set them to false so only the primary sample remains. + for(int64 i = 0; i < totalPoints; i++) + { + if(!sample[i] && goodVoxels.getValue(i)) + { + goodVoxels.setValue(i, false); + } + } + sample.clear(); + checked.assign(totalPoints, false); + + // --- Phase 2: Hole fill (optional) ---------------------------------------- + // BFS on bad voxels (goodVoxels == false). Each connected component of + // bad voxels is checked: if any voxel in the component touches a domain + // boundary face (x/y/z == 0 or max), the component is external empty + // space and is left as-is. If the component is fully enclosed by the + // sample (touchesBoundary == false), it is an interior hole and all + // its voxels are set to true. + threshold = 0.0F; + if(fillHoles) + { + messageHandler(IFilter::Message::Type::Info, fmt::format("Filling holes in sample...")); + + bool touchesBoundary = false; + for(int64 voxelIndex = 0; voxelIndex < totalPoints; voxelIndex++) + { + if(shouldCancel) + { + return; + } + const float percentIncrement = static_cast(voxelIndex) / static_cast(totalPoints) * 100.0f; + if(percentIncrement > threshold) + { + threshold = threshold + 5.0f; + if(threshold < percentIncrement) + { + threshold = percentIncrement; + } + } + + if(!checked[voxelIndex] && !goodVoxels.getValue(voxelIndex)) + { + // BFS from this bad voxel to discover one connected component of + // bad data. Track whether any voxel in the component is on a + // domain boundary face. + currentVList.push_back(voxelIndex); + usize count = 0; + touchesBoundary = false; + while(count < currentVList.size()) + { + int64 index = currentVList[count]; + int64 xIdx = index % dims[0]; + int64 yIdx = (index / dims[0]) % dims[1]; + int64 zIdx = index / (dims[0] * dims[1]); + // Check if this voxel lies on any domain boundary face + if(xIdx == 0 || xIdx == (dims[0] - 1) || yIdx == 0 || yIdx == (dims[1] - 1) || zIdx == 0 || zIdx == (dims[2] - 1)) + { + touchesBoundary = true; + } + std::array isValidFaceNeighbor = computeValidFaceNeighbors(xIdx, yIdx, zIdx, dims); + for(const auto& faceIndex : faceNeighborInternalIdx) + { + if(!isValidFaceNeighbor[faceIndex]) + { + continue; + } + neighborPoint = index + neighborVoxelIndexOffsets[faceIndex]; + + if(!checked[neighborPoint] && !goodVoxels.getValue(neighborPoint)) + { + currentVList.push_back(neighborPoint); + checked[neighborPoint] = true; + } + } + count++; + } + // If this bad-data component does not touch any boundary, it is + // an interior hole -- fill it by setting all voxels to true. + if(!touchesBoundary) + { + for(int64 j : currentVList) + { + goodVoxels.setValue(j, true); + } + } + currentVList.clear(); + } + } + } + checked.clear(); + } +}; +} // namespace + +// ----------------------------------------------------------------------------- +IdentifySampleBFS::IdentifySampleBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +IdentifySampleBFS::~IdentifySampleBFS() noexcept = default; + +// ----------------------------------------------------------------------------- +Result<> IdentifySampleBFS::operator()() +{ + auto* inputData = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); + const auto* imageGeom = m_DataStructure.getDataAs(m_InputValues->InputImageGeometryPath); + + if(m_InputValues->SliceBySlice) + { + ExecuteDataFunction(IdentifySampleSliceBySliceFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, + static_cast(m_InputValues->SliceBySlicePlaneIndex), m_MessageHandler, m_ShouldCancel); + } + else + { + ExecuteDataFunction(IdentifySampleBFSFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, m_MessageHandler, m_ShouldCancel); + } + + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.hpp new file mode 100644 index 0000000000..e506bcb158 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleBFS.hpp @@ -0,0 +1,57 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ + +struct IdentifySampleInputValues; + +/** + * @class IdentifySampleBFS + * @brief BFS flood-fill algorithm for identifying the largest sample region. + * + * This is the in-core-optimized implementation. It uses BFS (breadth-first search) + * with std::vector for tracking visited voxels, which is memory-efficient + * (1 bit per voxel) and fast when data is in contiguous memory. However, the random + * access pattern of BFS causes severe chunk thrashing in out-of-core mode. + * + * @see IdentifySampleCCL for the out-of-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT IdentifySampleBFS +{ +public: + /** + * @brief Constructs the BFS sample identification algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling identification behavior. + */ + IdentifySampleBFS(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues); + ~IdentifySampleBFS() noexcept; + + IdentifySampleBFS(const IdentifySampleBFS&) = delete; + IdentifySampleBFS(IdentifySampleBFS&&) noexcept = delete; + IdentifySampleBFS& operator=(const IdentifySampleBFS&) = delete; + IdentifySampleBFS& operator=(IdentifySampleBFS&&) noexcept = delete; + + /** + * @brief Executes the BFS flood-fill algorithm to identify the largest sample region. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + +private: + DataStructure& m_DataStructure; + const IdentifySampleInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.cpp new file mode 100644 index 0000000000..03ba807781 --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.cpp @@ -0,0 +1,462 @@ +#include "IdentifySampleCCL.hpp" + +#include "IdentifySample.hpp" +#include "IdentifySampleCommon.hpp" + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" + +using namespace nx::core; + +namespace +{ +// ============================================================================= +// runForwardCCL +// ============================================================================= +// Generic chunk-sequential Connected Component Labeling function that works on +// any boolean condition. It processes the volume in chunk order (OOC-friendly) +// using a rolling 2-slice label buffer instead of storing labels for the +// entire volume. +// +// How it works: +// - Scans voxels in chunk order (z, y, x innermost). For each voxel where +// `condition(store, index)` returns true, checks three backward neighbors +// (x-1, y-1, z-1) for existing labels. +// - If no labeled neighbor exists, assigns a new provisional label. +// - If multiple differently-labeled neighbors exist, unites them in the +// union-find structure. +// - Tracks per-label voxel counts (labelSizes) so the largest root can be +// identified after flattening, without a separate counting pass. +// +// The `condition` lambda determines which voxels to label. For example: +// - `store[idx] == true` labels good voxels (sample identification) +// - `!store[idx]` labels bad voxels (hole detection) +// +// The lastClearedZ optimization prevents re-clearing the rolling buffer when +// a Z-slice spans multiple OOC chunks (e.g., chunk shape that splits within +// a Z-plane). Without it, entering the same Z from the next chunk would +// zero out labels already written by the previous chunk. +// +// Returns a CCLResult containing the union-find, accumulated root sizes, +// the next available label, and the largest root/size. +// ============================================================================= +struct CCLResult +{ + VectorUnionFind unionFind; + std::vector rootSizes; + int64 nextLabel = 1; + int64 largestRoot = -1; + uint64 largestSize = 0; +}; + +template +CCLResult runForwardCCL(AbstractDataStore& store, int64 dimX, int64 dimY, int64 dimZ, ConditionFn condition, const std::atomic_bool& shouldCancel) +{ + CCLResult result; + const usize sliceSize = static_cast(dimX * dimY); + + // Rolling 2-slice buffer: only the current and previous Z-slice labels are + // kept in memory. The scanline CCL only looks at backward neighbors (x-1, + // y-1, z-1), so two slices suffice. This gives O(dimX * dimY) memory + // instead of O(volume). + std::vector labelBuffer(2 * sliceSize, 0); + // Per-label voxel count, accumulated during the forward scan so we can + // find the largest component after flattening without a separate pass. + std::vector labelSizes; + labelSizes.push_back(0); // index 0 unused (labels start at 1) + + const uint64 numChunks = store.getNumberOfChunks(); + // Track last cleared Z-slice to avoid re-clearing when a Z-slice spans + // multiple chunks (see algorithm overview comment above). + int64 lastClearedZ = -1; + + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(shouldCancel) + { + return result; + } + store.loadChunk(chunkIdx); + const auto lower = store.getChunkLowerBounds(chunkIdx); + const auto upper = store.getChunkUpperBounds(chunkIdx); + + for(usize z = lower[0]; z <= upper[0]; z++) + { + // Clear current slice in rolling buffer only when entering a NEW z value. + // A single Z-slice may span multiple chunks (e.g., chunk shape 1x3x25 with dimY=5), + // so we must not re-clear data written by a previous chunk for the same z. + const usize curOff = (z % 2) * sliceSize; + if(static_cast(z) != lastClearedZ) + { + std::fill(labelBuffer.begin() + curOff, labelBuffer.begin() + curOff + sliceSize, 0); + lastClearedZ = static_cast(z); + } + const usize prevOff = ((z + 1) % 2) * sliceSize; + + for(usize y = lower[1]; y <= upper[1]; y++) + { + for(usize x = lower[2]; x <= upper[2]; x++) + { + const usize index = z * sliceSize + y * static_cast(dimX) + x; + + if(!condition(store, index)) + { + continue; + } + + const usize inSlice = y * static_cast(dimX) + x; + int64 nbrA = 0, nbrB = 0, nbrC = 0; + + if(x > 0) + { + nbrA = labelBuffer[curOff + inSlice - 1]; + } + if(y > 0) + { + nbrB = labelBuffer[curOff + inSlice - static_cast(dimX)]; + } + if(z > 0) + { + nbrC = labelBuffer[prevOff + inSlice]; + } + + int64 minLabel = 0; + if(nbrA > 0) + { + minLabel = nbrA; + } + if(nbrB > 0 && (minLabel == 0 || nbrB < minLabel)) + { + minLabel = nbrB; + } + if(nbrC > 0 && (minLabel == 0 || nbrC < minLabel)) + { + minLabel = nbrC; + } + + int64 assignedLabel; + if(minLabel == 0) + { + assignedLabel = result.nextLabel++; + result.unionFind.makeSet(assignedLabel); + labelSizes.resize(result.nextLabel, 0); + } + else + { + assignedLabel = minLabel; + if(nbrA > 0 && nbrA != assignedLabel) + { + result.unionFind.unite(assignedLabel, nbrA); + } + if(nbrB > 0 && nbrB != assignedLabel) + { + result.unionFind.unite(assignedLabel, nbrB); + } + if(nbrC > 0 && nbrC != assignedLabel) + { + result.unionFind.unite(assignedLabel, nbrC); + } + } + + labelBuffer[curOff + inSlice] = assignedLabel; + labelSizes[assignedLabel]++; + } + } + } + } + + // Flatten union-find and accumulate sizes to roots + result.rootSizes.resize(result.nextLabel, 0); + for(int64 lbl = 1; lbl < result.nextLabel; lbl++) + { + int64 root = result.unionFind.find(lbl); + result.rootSizes[root] += labelSizes[lbl]; + } + + // Find largest root + for(int64 r = 1; r < result.nextLabel; r++) + { + if(result.rootSizes[r] >= result.largestSize) + { + result.largestSize = result.rootSizes[r]; + result.largestRoot = r; + } + } + + return result; +} + +// ============================================================================= +// replayForwardCCL +// ============================================================================= +// Re-derives labels by running the exact same forward CCL scan a second time +// (same chunk order, same scanline traversal, same union-find). Since CCL +// label assignment is fully deterministic given the same scan order and +// condition, the re-derived provisional labels match the original ones from +// runForwardCCL exactly. The union-find (already flattened) is then used to +// resolve each provisional label to its root. +// +// The `action` lambda is called for each labeled voxel with its resolved root +// label, the store, and the voxel's (x, y, z) coordinates. This allows +// per-voxel decisions (e.g., "mask out if root != largestRoot", or "fill if +// root is an interior hole") without ever storing labels for the entire volume. +// +// This is the key OOC trick: by re-computing labels on the fly using only a +// 2-slice rolling buffer, we avoid O(volume) label storage. The trade-off is +// reading the data twice, but for OOC datasets the memory savings are critical. +// +// Note: the union-find unite() calls from the first pass are not repeated here +// because the union-find is already flattened. We only need the label +// assignment logic to re-derive the same provisional labels. +// ============================================================================= +template +void replayForwardCCL(AbstractDataStore& store, int64 dimX, int64 dimY, int64 dimZ, VectorUnionFind& unionFind, ConditionFn condition, ActionFn action, const std::atomic_bool& shouldCancel) +{ + const usize sliceSize = static_cast(dimX * dimY); + std::vector labelBuffer(2 * sliceSize, 0); + int64 nextLabel = 1; + + const uint64 numChunks = store.getNumberOfChunks(); + int64 lastClearedZ = -1; + + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(shouldCancel) + { + return; + } + store.loadChunk(chunkIdx); + const auto lower = store.getChunkLowerBounds(chunkIdx); + const auto upper = store.getChunkUpperBounds(chunkIdx); + + for(usize z = lower[0]; z <= upper[0]; z++) + { + const usize curOff = (z % 2) * sliceSize; + if(static_cast(z) != lastClearedZ) + { + std::fill(labelBuffer.begin() + curOff, labelBuffer.begin() + curOff + sliceSize, 0); + lastClearedZ = static_cast(z); + } + const usize prevOff = ((z + 1) % 2) * sliceSize; + + for(usize y = lower[1]; y <= upper[1]; y++) + { + for(usize x = lower[2]; x <= upper[2]; x++) + { + const usize index = z * sliceSize + y * static_cast(dimX) + x; + + if(!condition(store, index)) + { + continue; + } + + const usize inSlice = y * static_cast(dimX) + x; + int64 nbrA = 0, nbrB = 0, nbrC = 0; + + if(x > 0) + { + nbrA = labelBuffer[curOff + inSlice - 1]; + } + if(y > 0) + { + nbrB = labelBuffer[curOff + inSlice - static_cast(dimX)]; + } + if(z > 0) + { + nbrC = labelBuffer[prevOff + inSlice]; + } + + int64 minLabel = 0; + if(nbrA > 0) + { + minLabel = nbrA; + } + if(nbrB > 0 && (minLabel == 0 || nbrB < minLabel)) + { + minLabel = nbrB; + } + if(nbrC > 0 && (minLabel == 0 || nbrC < minLabel)) + { + minLabel = nbrC; + } + + int64 assignedLabel; + if(minLabel == 0) + { + assignedLabel = nextLabel++; + } + else + { + assignedLabel = minLabel; + } + + labelBuffer[curOff + inSlice] = assignedLabel; + + // Apply the action with the re-derived label + int64 root = unionFind.find(assignedLabel); + action(store, index, root, x, y, z); + } + } + } + } +} + +// ============================================================================= +// IdentifySampleCCLFunctor +// ============================================================================= +// Chunk-sequential scanline CCL implementation for identifying the largest +// connected component of good voxels in a 3D image geometry, then optionally +// filling interior holes. Processes data in chunk order to avoid random chunk +// access in OOC mode, using a 2-slice rolling buffer (O(slice) memory) instead +// of O(volume). +// +// The algorithm has up to four phases: +// +// Phase 1: Forward CCL on good voxels +// Run runForwardCCL with condition = (goodVoxels[idx] == true) to discover +// all connected components and find the largest one by voxel count. +// +// Phase 2: Replay CCL to mask non-sample voxels +// Run replayForwardCCL with the same good-voxel condition. For each voxel +// whose resolved root != largestRoot, set goodVoxels to false. This removes +// satellite regions and noise without storing the full label volume. +// +// Phase 3 (if fillHoles): Forward CCL on bad voxels +// Run runForwardCCL with condition = (!goodVoxels[idx]) to discover all +// connected components of non-sample space (potential holes + exterior). +// +// Phase 4 (if fillHoles): Replay CCL to identify and fill interior holes +// First replay: for each bad-voxel component, check if any voxel lies on +// a domain boundary. Mark boundary-touching roots in a boolean vector. +// Second replay: for each bad voxel whose root is NOT boundary-touching, +// set goodVoxels to true (filling the interior hole). +// ============================================================================= +struct IdentifySampleCCLFunctor +{ + template + void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) + { + auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); + + SizeVec3 udims = imageGeom->getDimensions(); + const int64 dimX = static_cast(udims[0]); + const int64 dimY = static_cast(udims[1]); + const int64 dimZ = static_cast(udims[2]); + + const uint64 numChunks = goodVoxels.getNumberOfChunks(); + + // --- Phase 1: Forward CCL on good voxels ---------------------------------- + // Discover all connected components of good voxels and find the largest one. + // The condition lambda selects voxels where goodVoxels[idx] is true. + messageHandler(IFilter::Message::Type::Info, "Identifying sample regions..."); + auto goodCondition = [](const AbstractDataStore& s, usize idx) -> bool { return static_cast(s[idx]); }; + auto cclResult = runForwardCCL(goodVoxels, dimX, dimY, dimZ, goodCondition, shouldCancel); + + if(shouldCancel || cclResult.largestRoot < 0) + { + return; + } + + // --- Phase 2: Replay CCL to mask non-sample voxels ---------------------- + // Re-derive labels using a second forward pass with the same scan order + // and condition. For each voxel whose resolved root is not the largest + // component, set goodVoxels to false (removing satellite regions/noise). + // No O(volume) label storage is needed -- labels are recomputed on the fly. + messageHandler(IFilter::Message::Type::Info, "Masking non-sample voxels..."); + const int64 largestRoot = cclResult.largestRoot; + replayForwardCCL( + goodVoxels, dimX, dimY, dimZ, cclResult.unionFind, goodCondition, + [&largestRoot](AbstractDataStore& s, usize idx, int64 root, usize /*x*/, usize /*y*/, usize /*z*/) { + if(root != largestRoot) + { + s.setValue(idx, static_cast(false)); + } + }, + shouldCancel); + goodVoxels.flush(); + + // --- Phase 3: Forward CCL on bad voxels (hole detection) ----------------- + // Only runs if fillHoles is true. Discovers connected components of + // non-good voxels (the complement of the sample). These include both + // exterior empty space and interior holes. + if(fillHoles) + { + messageHandler(IFilter::Message::Type::Info, "Filling holes in sample..."); + + // Condition selects voxels where goodVoxels[idx] is false (bad data) + auto holeCondition = [](const AbstractDataStore& s, usize idx) -> bool { return !static_cast(s[idx]); }; + auto holeCCL = runForwardCCL(goodVoxels, dimX, dimY, dimZ, holeCondition, shouldCancel); + + if(shouldCancel) + { + return; + } + + // --- Phase 4a: Replay CCL to identify boundary-touching roots --------- + // Replay the hole CCL to re-derive labels. For each labeled voxel, + // check if it lies on a domain boundary face. If so, mark its resolved + // root as boundary-touching. Components that touch the boundary are + // exterior space (not holes). This avoids O(volume) label storage by + // re-computing labels on the fly. + std::vector boundaryRoots(holeCCL.nextLabel, false); + replayForwardCCL( + goodVoxels, dimX, dimY, dimZ, holeCCL.unionFind, holeCondition, + [&boundaryRoots, dimX, dimY, dimZ](AbstractDataStore& /*s*/, usize /*idx*/, int64 root, usize x, usize y, usize z) { + if(x == 0 || x == static_cast(dimX - 1) || y == 0 || y == static_cast(dimY - 1) || z == 0 || z == static_cast(dimZ - 1)) + { + boundaryRoots[root] = true; + } + }, + shouldCancel); + + // --- Phase 4b: Replay CCL again to fill interior holes ---------------- + // A third replay of the same CCL (same condition, same union-find) to + // apply the fill. For each bad voxel whose root is NOT boundary-touching, + // it must be an interior hole fully enclosed by the sample -- set it to + // true. Boundary-touching components are exterior and left as-is. + replayForwardCCL( + goodVoxels, dimX, dimY, dimZ, holeCCL.unionFind, holeCondition, + [&boundaryRoots](AbstractDataStore& s, usize idx, int64 root, usize /*x*/, usize /*y*/, usize /*z*/) { + if(!boundaryRoots[root]) + { + s.setValue(idx, static_cast(true)); + } + }, + shouldCancel); + goodVoxels.flush(); + } + } +}; +} // namespace + +// ----------------------------------------------------------------------------- +IdentifySampleCCL::IdentifySampleCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues) +: m_DataStructure(dataStructure) +, m_InputValues(inputValues) +, m_ShouldCancel(shouldCancel) +, m_MessageHandler(mesgHandler) +{ +} + +// ----------------------------------------------------------------------------- +IdentifySampleCCL::~IdentifySampleCCL() noexcept = default; + +// ----------------------------------------------------------------------------- +Result<> IdentifySampleCCL::operator()() +{ + auto* inputData = m_DataStructure.getDataAs(m_InputValues->MaskArrayPath); + const auto* imageGeom = m_DataStructure.getDataAs(m_InputValues->InputImageGeometryPath); + + if(m_InputValues->SliceBySlice) + { + ExecuteDataFunction(IdentifySampleSliceBySliceFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, + static_cast(m_InputValues->SliceBySlicePlaneIndex), m_MessageHandler, m_ShouldCancel); + } + else + { + ExecuteDataFunction(IdentifySampleCCLFunctor{}, inputData->getDataType(), imageGeom, inputData, m_InputValues->FillHoles, m_MessageHandler, m_ShouldCancel); + } + + return {}; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.hpp new file mode 100644 index 0000000000..cba07dface --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCCL.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include "SimplnxCore/SimplnxCore_export.hpp" + +#include "simplnx/DataStructure/DataPath.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/Filter/IFilter.hpp" + +namespace nx::core +{ + +struct IdentifySampleInputValues; + +/** + * @class IdentifySampleCCL + * @brief Chunk-sequential CCL algorithm for identifying the largest sample region. + * + * This is the out-of-core-optimized implementation. It uses scanline Connected + * Component Labeling (CCL) with a union-find structure, processing data in chunk + * order to minimize disk I/O. The algorithm only accesses backward neighbors + * (-X, -Y, -Z) during labeling, ensuring sequential chunk access. + * + * Trade-off: Uses a std::vector label array (8 bytes per voxel) which is + * more memory than the BFS approach (1 bit per voxel), but avoids the random + * access pattern that causes chunk thrashing in OOC mode. + * + * @see IdentifySampleBFS for the in-core-optimized alternative. + * @see AlgorithmDispatch.hpp for the dispatch mechanism that selects between them. + */ +class SIMPLNXCORE_EXPORT IdentifySampleCCL +{ +public: + /** + * @brief Constructs the CCL sample identification algorithm with the required context. + * @param dataStructure The data structure containing the arrays to process. + * @param mesgHandler Handler for progress and informational messages. + * @param shouldCancel Cancellation flag checked during execution. + * @param inputValues Filter parameter values controlling identification behavior. + */ + IdentifySampleCCL(DataStructure& dataStructure, const IFilter::MessageHandler& mesgHandler, const std::atomic_bool& shouldCancel, const IdentifySampleInputValues* inputValues); + ~IdentifySampleCCL() noexcept; + + IdentifySampleCCL(const IdentifySampleCCL&) = delete; + IdentifySampleCCL(IdentifySampleCCL&&) noexcept = delete; + IdentifySampleCCL& operator=(const IdentifySampleCCL&) = delete; + IdentifySampleCCL& operator=(IdentifySampleCCL&&) noexcept = delete; + + /** + * @brief Executes the CCL-based algorithm to identify the largest sample region. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> operator()(); + +private: + DataStructure& m_DataStructure; + const IdentifySampleInputValues* m_InputValues = nullptr; + const std::atomic_bool& m_ShouldCancel; + const IFilter::MessageHandler& m_MessageHandler; +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCommon.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCommon.hpp new file mode 100644 index 0000000000..9bc48cea9d --- /dev/null +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/IdentifySampleCommon.hpp @@ -0,0 +1,340 @@ +#pragma once + +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Filter/IFilter.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" + +#include +#include +#include + +namespace nx::core +{ + +/** + * @class VectorUnionFind + * @brief Vector-based union-find for dense label sets (labels 1..N). + * + * Uses flat vectors instead of hash maps for O(1) access. Suitable for + * connected component labeling where labels are assigned sequentially. + */ +class VectorUnionFind +{ +public: + VectorUnionFind() = default; + + /** + * @brief Pre-allocates internal storage for the expected number of labels. + * @param capacity Maximum expected label value. + */ + void reserve(usize capacity) + { + m_Parent.reserve(capacity + 1); + m_Rank.reserve(capacity + 1); + } + + /** + * @brief Creates a new singleton set for label x if it does not already exist. + * @param x Label to initialize. + */ + void makeSet(int64 x) + { + if(static_cast(x) >= m_Parent.size()) + { + m_Parent.resize(x + 1, 0); + m_Rank.resize(x + 1, 0); + } + if(m_Parent[x] == 0) + { + m_Parent[x] = x; + } + } + + /** + * @brief Finds the root label with path-halving compression. + * @param x Label to find the root for. + * @return Root label of the equivalence class. + */ + int64 find(int64 x) + { + while(m_Parent[x] != x) + { + m_Parent[x] = m_Parent[m_Parent[x]]; // path halving + x = m_Parent[x]; + } + return x; + } + + /** + * @brief Merges the equivalence classes of two labels using union-by-rank. + * @param a First label. + * @param b Second label. + */ + void unite(int64 a, int64 b) + { + a = find(a); + b = find(b); + if(a == b) + { + return; + } + if(m_Rank[a] < m_Rank[b]) + { + std::swap(a, b); + } + m_Parent[b] = a; + if(m_Rank[a] == m_Rank[b]) + { + m_Rank[a]++; + } + } + +private: + std::vector m_Parent; + std::vector m_Rank; +}; + +/** + * @struct IdentifySampleSliceBySliceFunctor + * @brief BFS-based implementation for slice-by-slice mode. + * + * Slices are 2D and small relative to the full volume, so OOC chunk + * thrashing is not a concern. This functor is used by both the in-core + * and OOC algorithm classes when slice-by-slice mode is enabled. + */ +struct IdentifySampleSliceBySliceFunctor +{ + /** + * @brief Enumerates the three orthogonal slice planes. + */ + enum class Plane + { + XY, + XZ, + YZ + }; + + static constexpr int64 k_Dp1[4] = {0, 0, -1, 1}; + static constexpr int64 k_Dp2[4] = {-1, 1, 0, 0}; + + /** + * @brief Performs BFS-based sample identification on each 2D slice of the given plane. + * @param imageGeom The image geometry providing dimensions. + * @param goodVoxelsPtr The mask array marking sample vs. non-sample voxels. + * @param fillHoles Whether to fill interior holes in each slice. + * @param plane Which orthogonal plane to slice along. + * @param messageHandler Handler for progress messages. + * @param shouldCancel Cancellation flag checked between slices. + */ + template + void operator()(const ImageGeom* imageGeom, IDataArray* goodVoxelsPtr, bool fillHoles, Plane plane, const IFilter::MessageHandler& messageHandler, const std::atomic_bool& shouldCancel) + { + auto& goodVoxels = goodVoxelsPtr->template getIDataStoreRefAs>(); + + SizeVec3 uDims = imageGeom->getDimensions(); + const int64 dimX = static_cast(uDims[0]); + const int64 dimY = static_cast(uDims[1]); + const int64 dimZ = static_cast(uDims[2]); + + int64 planeDim1, planeDim2, fixedDim; + int64 stride1, stride2, fixedStride; + + switch(plane) + { + case Plane::XY: + planeDim1 = dimX; + planeDim2 = dimY; + fixedDim = dimZ; + stride1 = 1; + stride2 = dimX; + fixedStride = dimX * dimY; + break; + + case Plane::XZ: + planeDim1 = dimX; + planeDim2 = dimZ; + fixedDim = dimY; + stride1 = 1; + stride2 = dimX * dimY; + fixedStride = dimX; + break; + + case Plane::YZ: + planeDim1 = dimY; + planeDim2 = dimZ; + fixedDim = dimX; + stride1 = dimX; + stride2 = dimX * dimY; + fixedStride = 1; + break; + } + + const usize sliceSize = static_cast(planeDim1 * planeDim2); + auto sliceBuffer = std::make_unique(sliceSize); + + for(int64 fixedIdx = 0; fixedIdx < fixedDim; ++fixedIdx) + { + if(shouldCancel) + { + return; + } + messageHandler(IFilter::Message::Type::Info, fmt::format("Slice {}", fixedIdx)); + + // Read the 2D slice into a local buffer with sequential DataStore access. + // This avoids chunk thrashing during BFS for non-XY planes where stride2 + // jumps across chunks (e.g., XZ/YZ planes stride by dimX*dimY per Z-step). + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + sliceBuffer[static_cast(p2 * planeDim1 + p1)] = goodVoxels.getValue(static_cast(fixedIdx * fixedStride + p2 * stride2 + p1 * stride1)); + } + } + + // BFS for sample identification — operates entirely on local sliceBuffer + std::vector checked(sliceSize, false); + std::vector sample(sliceSize, false); + std::vector currentVList; + int64 biggestBlock = 0; + + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + int64 planeIndex = p2 * planeDim1 + p1; + + if(!checked[planeIndex] && static_cast(sliceBuffer[planeIndex])) + { + currentVList.push_back(planeIndex); + int64 count = 0; + + while(count < static_cast(currentVList.size())) + { + int64 localIdx = currentVList[count]; + int64 localP1 = localIdx % planeDim1; + int64 localP2 = localIdx / planeDim1; + + for(int j = 0; j < 4; ++j) + { + int64 neighborP1 = localP1 + k_Dp1[j]; + int64 neighborP2 = localP2 + k_Dp2[j]; + + if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) + { + int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; + + if(!checked[neighborIdx] && static_cast(sliceBuffer[neighborIdx])) + { + currentVList.push_back(neighborIdx); + checked[neighborIdx] = true; + } + } + } + count++; + } + + if(static_cast(currentVList.size()) > biggestBlock) + { + biggestBlock = currentVList.size(); + sample.assign(sliceSize, false); + for(int64 idx : currentVList) + { + sample[idx] = true; + } + } + currentVList.clear(); + } + } + } + if(shouldCancel) + { + return; + } + + // Mark non-sample voxels as false in the local buffer + for(usize i = 0; i < sliceSize; ++i) + { + if(!sample[i]) + { + sliceBuffer[i] = static_cast(false); + } + } + + if(shouldCancel) + { + return; + } + + // BFS for hole filling — operates entirely on local sliceBuffer + checked.assign(sliceSize, false); + if(fillHoles) + { + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + int64 planeIndex = p2 * planeDim1 + p1; + + if(!checked[planeIndex] && !static_cast(sliceBuffer[planeIndex])) + { + currentVList.push_back(planeIndex); + int64 count = 0; + bool touchesBoundary = false; + + while(count < static_cast(currentVList.size())) + { + int64 localIdx = currentVList[count]; + int64 localP1 = localIdx % planeDim1; + int64 localP2 = localIdx / planeDim1; + + if(localP1 == 0 || localP1 == planeDim1 - 1 || localP2 == 0 || localP2 == planeDim2 - 1) + { + touchesBoundary = true; + } + + for(int j = 0; j < 4; ++j) + { + int64 neighborP1 = localP1 + k_Dp1[j]; + int64 neighborP2 = localP2 + k_Dp2[j]; + + if(neighborP1 >= 0 && neighborP1 < planeDim1 && neighborP2 >= 0 && neighborP2 < planeDim2) + { + int64 neighborIdx = neighborP2 * planeDim1 + neighborP1; + + if(!checked[neighborIdx] && !static_cast(sliceBuffer[neighborIdx])) + { + currentVList.push_back(neighborIdx); + checked[neighborIdx] = true; + } + } + } + count++; + } + + if(!touchesBoundary) + { + for(int64 idx : currentVList) + { + sliceBuffer[idx] = static_cast(true); + } + } + currentVList.clear(); + } + } + } + } + + // Write the modified slice back to the DataStore + for(int64 p2 = 0; p2 < planeDim2; ++p2) + { + for(int64 p1 = 0; p1 < planeDim1; ++p1) + { + goodVoxels.setValue(static_cast(fixedIdx * fixedStride + p2 * stride2 + p1 * stride1), sliceBuffer[static_cast(p2 * planeDim1 + p1)]); + } + } + } + } +}; + +} // namespace nx::core diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp index 51805d1765..ea64a61388 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.cpp @@ -1,10 +1,13 @@ #include "ScalarSegmentFeatures.hpp" +#include #include #include "simplnx/DataStructure/DataStore.hpp" #include "simplnx/DataStructure/Geometry/IGridGeometry.hpp" #include "simplnx/Filter/Actions/CreateArrayAction.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" +#include "simplnx/Utilities/FilterUtilities.hpp" using namespace nx::core; @@ -54,6 +57,15 @@ class TSpecificCompareFunctorBool : public SegmentFeatures::CompareFunctor return false; } + bool compare(int64 index, int64 neighIndex) override + { + if(index >= m_Length || neighIndex >= m_Length) + { + return false; + } + return (*m_Data)[neighIndex] == (*m_Data)[index]; + } + private: int64 m_Length = 0; // Length of the Data Array AbstractDataStore* m_FeatureIdsArray = nullptr; // The Feature Ids @@ -109,12 +121,43 @@ class TSpecificCompareFunctor : public SegmentFeatures::CompareFunctor return false; } + bool compare(int64 index, int64 neighIndex) override + { + if(index >= m_Length || neighIndex >= m_Length) + { + return false; + } + + if(m_Data[index] >= m_Data[neighIndex]) + { + return (m_Data[index] - m_Data[neighIndex]) <= m_Tolerance; + } + return (m_Data[neighIndex] - m_Data[index]) <= m_Tolerance; + } + private: int64 m_Length = 0; // Length of the Data Array T m_Tolerance = static_cast(0); // The tolerance of the comparison AbstractDataStore* m_FeatureIdsArray = nullptr; // The Feature Ids DataStoreType& m_Data; // The data that is being compared }; + +/** + * @brief Functor for type-dispatched filling of a scalar slice buffer. + * Converts typed data store values to float64 for uniform comparison. + */ +struct FillScalarSliceBufferFunctor +{ + template + void operator()(IDataArray* dataArray, int64 baseIndex, usize sliceSize, std::vector& buffer, usize bufferOffset) + { + auto& store = dataArray->template getIDataStoreRefAs>(); + for(usize i = 0; i < sliceSize; i++) + { + buffer[bufferOffset + i] = static_cast(store[static_cast(baseIndex) + i]); + } + } +}; } // namespace ScalarSegmentFeatures::ScalarSegmentFeatures(DataStructure& dataStructure, ScalarSegmentFeaturesInputValues* inputValues, const std::atomic_bool& shouldCancel, @@ -127,6 +170,39 @@ ScalarSegmentFeatures::ScalarSegmentFeatures(DataStructure& dataStructure, Scala ScalarSegmentFeatures::~ScalarSegmentFeatures() noexcept = default; +// ----------------------------------------------------------------------------- +// Segments an image/rectilinear grid into features (regions) by flood-filling +// contiguous voxels whose scalar values differ by no more than a user-specified +// tolerance. This is a general-purpose segmentation: it works on any single- +// component scalar array (int8 through float64, plus boolean), unlike the +// orientation-based EBSD and CAxis segment filters. +// +// Comparator setup: +// A type-dispatched CompareFunctor is instantiated via a switch on the input +// array's DataType. Each TSpecificCompareFunctor stores the tolerance cast +// to the native type and performs |a - b| <= tolerance using unsigned-safe +// subtraction. Boolean arrays use a dedicated TSpecificCompareFunctorBool +// that checks for exact equality (no tolerance concept). If the input array +// has more than one component, a default CompareFunctor that always returns +// false is used, effectively preventing any grouping. +// +// Algorithm dispatch: +// - In-core data -> execute() : classic depth-first-search (DFS) flood fill +// - Out-of-core -> executeCCL() : connected-component labeling that streams +// data slice-by-slice to limit memory usage +// The choice is made by checking IsOutOfCore() on the FeatureIds array (i.e., +// whether the backing DataStore lives on disk) or if ForceOocAlgorithm() is +// set (used for testing). +// +// Post-processing after either algorithm: +// 1. Validate that at least one feature was found (error if not). +// 2. Resize the Feature AttributeMatrix to (m_FoundFeatures + 1) tuples so +// that all per-feature arrays (Active, etc.) have the correct size. +// Index 0 is reserved as an invalid/background feature. +// 3. Initialize the Active array: fill with 1 (active), then set index 0 +// to 0 to mark it as the reserved background slot. +// 4. Optionally randomize FeatureIds so that spatially adjacent features get +// non-sequential IDs, improving visual contrast in color-mapped renders. // ----------------------------------------------------------------------------- Result<> ScalarSegmentFeatures::operator()() { @@ -150,6 +226,7 @@ Result<> ScalarSegmentFeatures::operator()() m_FeatureIdsArray = m_DataStructure.getDataAs(m_InputValues->FeatureIdsArrayPath); auto* inputDataArray = m_DataStructure.getDataAs(m_InputValues->InputDataPath); + m_InputDataArray = inputDataArray; size_t inDataPoints = inputDataArray->getNumberOfTuples(); nx::core::DataType dataType = inputDataArray->getDataType(); @@ -209,8 +286,21 @@ Result<> ScalarSegmentFeatures::operator()() m_CompareFunctor = std::make_shared(); // The default CompareFunctor which ALWAYS returns false for the comparison } - // Run the segmentation algorithm - execute(gridGeom); + // Dispatch between DFS (in-core) and CCL (OOC) algorithms + if(IsOutOfCore(*m_FeatureIdsArray) || ForceOocAlgorithm()) + { + SizeVec3 udims = gridGeom->getDimensions(); + allocateSliceBuffers(static_cast(udims[0]), static_cast(udims[1])); + + auto& featureIdsStore = m_FeatureIdsArray->getDataStoreRef(); + executeCCL(gridGeom, featureIdsStore); + + deallocateSliceBuffers(); + } + else + { + execute(gridGeom); + } // Sanity check the result. if(this->m_FoundFeatures < 1) { @@ -223,8 +313,9 @@ Result<> ScalarSegmentFeatures::operator()() cellFeaturesAM.resizeTuples(tDims); // This will resize the active array // make sure all values are initialized and "re-reserve" index 0 - auto& activeStore = m_DataStructure.getDataAs(m_InputValues->ActiveArrayPath)->getDataStoreRef(); - activeStore[0] = 0; + auto* activeArray = m_DataStructure.getDataAs(m_InputValues->ActiveArrayPath); + activeArray->getDataStore()->fill(1); + (*activeArray)[0] = 0; // Randomize the feature Ids for purely visual clarify. Having random Feature Ids // allows users visualizing the data to better discern each grain otherwise the coloring @@ -237,6 +328,24 @@ Result<> ScalarSegmentFeatures::operator()() return {}; } +// ----------------------------------------------------------------------------- +// Finds the next unassigned voxel that can serve as the seed for a new feature. +// The scan is a simple linear walk starting from `nextSeed`, which is the index +// immediately after the last seed found. This avoids rescanning already-assigned +// voxels at the beginning of the array. +// +// A voxel is eligible to become a seed when both conditions are met: +// 1. featureId == 0 : the voxel has not yet been assigned to any feature. +// 2. Passes the mask: if masking is enabled, the voxel must be flagged as +// "good" (e.g., not a bad scan point). +// +// Note: Unlike EBSD and CAxis segmentation, there is no phase > 0 requirement +// because scalar segmentation is phase-agnostic -- it operates on arbitrary +// numeric data that has no concept of crystallographic phase. +// +// When a valid seed is found, its featureId is immediately set to `gnum` +// (the new feature number) so that subsequent calls will skip it. +// Returns the linear index of the seed, or -1 if no more seeds exist. // ----------------------------------------------------------------------------- int64_t ScalarSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const { @@ -271,6 +380,19 @@ int64_t ScalarSegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const return seed; } +// ----------------------------------------------------------------------------- +// Determines whether a neighboring voxel should be merged into the current +// feature during the DFS flood fill (execute() path). This is NOT used by +// the CCL path, which calls areNeighborsSimilar() instead. +// +// The method checks two conditions before grouping: +// 1. The neighbor's featureId must be 0 (unassigned). +// 2. The neighbor must pass the mask (if masking is enabled). +// +// If both conditions pass, the type-dispatched CompareFunctor is invoked. +// The functor checks whether |scalar[reference] - scalar[neighbor]| <= tolerance +// (for numeric types) or exact equality (for booleans). As a side effect, the +// functor assigns featureId = gnum to the neighbor if the comparison succeeds. // ----------------------------------------------------------------------------- bool ScalarSegmentFeatures::determineGrouping(int64 referencepoint, int64 neighborpoint, int32 gnum) const { @@ -283,3 +405,236 @@ bool ScalarSegmentFeatures::determineGrouping(int64 referencepoint, int64 neighb return false; } + +// ----------------------------------------------------------------------------- +// Checks whether a single voxel is eligible for segmentation (used by the CCL +// path in executeCCL()). For scalar segmentation, validity only depends on the +// mask -- there is no phase check because scalar data is phase-agnostic. +// +// Slice buffer fast path: +// When m_UseSliceBuffers is true (OOC mode), the method first checks whether +// the voxel's Z-slice is currently loaded in the rolling 2-slot buffer. The +// slot is determined by (iz % 2). If the voxel's slice matches the buffered +// slice index, the mask value is read directly from the in-memory m_MaskBuffer, +// avoiding an on-disk I/O round-trip. +// +// OOC fallback: +// If slice buffers are not active, or if the voxel's slice is not currently +// buffered (which can happen during Phase 1b of CCL when periodic boundary +// merging accesses non-adjacent slices), the method falls back to direct +// MaskCompare access, which may trigger on-disk I/O for out-of-core data. +// ----------------------------------------------------------------------------- +bool ScalarSegmentFeatures::isValidVoxel(int64 point) const +{ + if(m_UseSliceBuffers) + { + const int64 iz = point / m_BufSliceSize; + const int slot = static_cast(iz % 2); + if(m_BufferedSliceZ[slot] == iz) + { + const usize off = static_cast(slot) * static_cast(m_BufSliceSize) + static_cast(point - iz * m_BufSliceSize); + if(m_InputValues->UseMask && m_MaskBuffer[off] == 0) + { + return false; + } + return true; + } + } + + // Fallback: direct OOC access + if(m_InputValues->UseMask && !m_GoodVoxels->isTrue(point)) + { + return false; + } + return true; +} + +// ----------------------------------------------------------------------------- +// Determines whether two neighboring voxels have sufficiently similar scalar +// values to belong to the same feature. Used exclusively by the CCL path +// (executeCCL()), whereas the DFS path uses determineGrouping() instead. +// +// Slice buffer fast path: +// When both voxels' Z-slices are present in the rolling 2-slot buffer, all +// data is read from the in-memory buffers (m_ScalarBuffer, m_MaskBuffer). +// The buffer offset for each point is computed as: +// slot * sliceSize + (point - iz * sliceSize) +// The method then: +// 1. Checks point2's mask validity. +// 2. Reads both scalar values from m_ScalarBuffer as float64. +// 3. Computes |val1 - val2| and returns true if <= ScalarTolerance. +// All scalar types are stored as float64 in the buffer so that a single +// comparison path works regardless of the original data type. The tolerance +// is also cast to float64 for the comparison. +// +// OOC fallback: +// If either voxel's slice is not buffered (e.g., during Phase 1b periodic +// merge), falls back to direct DataStore access: validates point2 via +// isValidVoxel(), then delegates to m_CompareFunctor->compare() which +// reads from the original typed array on disk. +// ----------------------------------------------------------------------------- +bool ScalarSegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + if(m_UseSliceBuffers) + { + const int64 iz1 = point1 / m_BufSliceSize; + const int slot1 = static_cast(iz1 % 2); + const int64 iz2 = point2 / m_BufSliceSize; + const int slot2 = static_cast(iz2 % 2); + + if(m_BufferedSliceZ[slot1] == iz1 && m_BufferedSliceZ[slot2] == iz2) + { + const usize sliceSize = static_cast(m_BufSliceSize); + const usize off1 = static_cast(slot1) * sliceSize + static_cast(point1 - iz1 * m_BufSliceSize); + const usize off2 = static_cast(slot2) * sliceSize + static_cast(point2 - iz2 * m_BufSliceSize); + + // Check point2 validity + if(m_InputValues->UseMask && m_MaskBuffer[off2] == 0) + { + return false; + } + + // Compare scalar values from the pre-loaded buffer + float64 val1 = m_ScalarBuffer[off1]; + float64 val2 = m_ScalarBuffer[off2]; + float64 diff = val1 >= val2 ? (val1 - val2) : (val2 - val1); + return diff <= static_cast(m_InputValues->ScalarTolerance); + } + } + + // Fallback: direct OOC access + if(!isValidVoxel(point2)) + { + return false; + } + return m_CompareFunctor->compare(point1, point2); +} + +// ----------------------------------------------------------------------------- +// Allocates the rolling 2-slot slice buffers used by the CCL (OOC) algorithm. +// Called once at the start of the OOC branch in operator(), before executeCCL(). +// +// Each slot holds one full XY slice (dimX * dimY voxels). Two slots are needed +// because the CCL algorithm compares the current slice (iz) with the previous +// slice (iz-1), so both must be in memory simultaneously. +// +// Buffers allocated: +// - m_ScalarBuffer : 2 * sliceSize float64 values (one scalar per voxel, +// stored as float64 regardless of the original data type +// so that a single comparison path works for all types) +// - m_MaskBuffer : 2 * sliceSize uint8 values (one mask flag per voxel) +// +// Both m_BufferedSliceZ slots are initialized to -1 (no slice loaded). +// m_UseSliceBuffers is set to true so that isValidVoxel() and +// areNeighborsSimilar() will use the fast buffer path. +// ----------------------------------------------------------------------------- +void ScalarSegmentFeatures::allocateSliceBuffers(int64 dimX, int64 dimY) +{ + m_BufSliceSize = dimX * dimY; + const usize sliceSize = static_cast(m_BufSliceSize); + m_ScalarBuffer.resize(2 * sliceSize); + m_MaskBuffer.resize(2 * sliceSize); + m_BufferedSliceZ[0] = -1; + m_BufferedSliceZ[1] = -1; + m_UseSliceBuffers = true; +} + +// ----------------------------------------------------------------------------- +// Releases the slice buffers after executeCCL() completes, freeing the memory +// back to the system. Called in the OOC branch of operator() after the CCL +// algorithm finishes. Resets m_UseSliceBuffers to false and both +// m_BufferedSliceZ slots to -1. The vectors are replaced with default- +// constructed (empty) instances to guarantee memory deallocation. +// ----------------------------------------------------------------------------- +void ScalarSegmentFeatures::deallocateSliceBuffers() +{ + m_UseSliceBuffers = false; + m_ScalarBuffer = std::vector(); + m_MaskBuffer = std::vector(); + m_BufferedSliceZ[0] = -1; + m_BufferedSliceZ[1] = -1; +} + +// ----------------------------------------------------------------------------- +// Pre-loads voxel data for a single Z-slice into the rolling 2-slot buffer, +// called by executeCCL() before processing each slice. +// +// Rolling buffer design: +// The target slot is determined by (iz % 2), so even slices go to slot 0 and +// odd slices go to slot 1. Because the CCL algorithm processes slices in +// order (0, 1, 2, ...), at any given slice iz the previous slice (iz-1) is +// always in the other slot, keeping both the current and previous slice data +// available in memory. +// +// Sentinel behavior: +// If iz < 0, slice buffering is disabled (m_UseSliceBuffers = false). The +// CCL algorithm passes iz = -1 after completing the slice-by-slice sweep to +// signal that subsequent calls (e.g., during Phase 1b periodic boundary +// merging) should use direct DataStore access instead of the buffers. +// +// Skip-if-already-loaded: +// If m_BufferedSliceZ[slot] == iz, the data for this slice is already in the +// buffer (e.g., from a previous prepareForSlice call), so the method returns +// immediately without re-reading. +// +// Data loaded per slice: +// - Scalar values (1 float64 per voxel) into m_ScalarBuffer. The type +// dispatch uses ExecuteDataFunctionNoBool with FillScalarSliceBufferFunctor +// to convert the original typed data (int8..float64) to float64. Boolean +// arrays are handled separately because ExecuteDataFunctionNoBool excludes +// bool; they are converted to 0.0/1.0 manually. +// - Mask flags (1 uint8 per voxel) into m_MaskBuffer; if masking is disabled, +// all mask values are set to 1 (valid). +// ----------------------------------------------------------------------------- +void ScalarSegmentFeatures::prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ) +{ + if(iz < 0) + { + m_UseSliceBuffers = false; + return; + } + if(!m_UseSliceBuffers) + { + return; + } + + const int slot = static_cast(iz % 2); + if(m_BufferedSliceZ[slot] == iz) + { + return; + } + + const usize sliceSize = static_cast(m_BufSliceSize); + const usize slotOffset = static_cast(slot) * sliceSize; + const int64 baseIndex = iz * m_BufSliceSize; + + // Fill scalar data buffer using type dispatch + DataType dataType = m_InputDataArray->getDataType(); + if(dataType == DataType::boolean) + { + auto& store = m_InputDataArray->template getIDataStoreRefAs>(); + for(usize i = 0; i < sliceSize; i++) + { + m_ScalarBuffer[slotOffset + i] = store[static_cast(baseIndex) + i] ? 1.0 : 0.0; + } + } + else + { + ExecuteDataFunctionNoBool(FillScalarSliceBufferFunctor{}, dataType, m_InputDataArray, baseIndex, sliceSize, m_ScalarBuffer, slotOffset); + } + + // Fill mask buffer + if(m_InputValues->UseMask && m_GoodVoxels != nullptr) + { + for(usize i = 0; i < sliceSize; i++) + { + m_MaskBuffer[slotOffset + i] = m_GoodVoxels->isTrue(static_cast(baseIndex) + i) ? 1 : 0; + } + } + else + { + std::fill(m_MaskBuffer.begin() + slotOffset, m_MaskBuffer.begin() + slotOffset + sliceSize, static_cast(1)); + } + + m_BufferedSliceZ[slot] = iz; +} diff --git a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp index 6a1393eeae..2d0b70a177 100644 --- a/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp +++ b/src/Plugins/SimplnxCore/src/SimplnxCore/Filters/Algorithms/ScalarSegmentFeatures.hpp @@ -51,32 +51,50 @@ class SIMPLNXCORE_EXPORT ScalarSegmentFeatures : public SegmentFeatures Result<> operator()(); protected: + int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + /** - * @brief - * @param data - * @param args - * @param gnum - * @param nextSeed - * @return int64 + * @brief Checks whether a voxel can participate in scalar segmentation based on the mask. + * @param point Linear voxel index. + * @return true if the voxel passes the mask check (or no mask is used). */ - int64_t getSeed(int32 gnum, int64 nextSeed) const override; + bool isValidVoxel(int64 point) const override; /** - * @brief - * @param data - * @param args - * @param referencePoint - * @param neighborPoint - * @param gnum - * @return bool + * @brief Determines whether two neighboring voxels belong to the same scalar segment. + * @param point1 First voxel index. + * @param point2 Second (neighbor) voxel index. + * @return true if both voxels are valid and their scalar values are within tolerance. */ - bool determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const override; + bool areNeighborsSimilar(int64 point1, int64 point2) const override; + + /** + * @brief Pre-loads input scalar and mask data for the given Z-slice into + * rolling buffers, eliminating per-element OOC overhead during CCL. + * @param iz Current Z-slice index, or -1 to disable buffering. + * @param dimX X dimension of the grid. + * @param dimY Y dimension of the grid. + * @param dimZ Z dimension of the grid. + */ + void prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ) override; private: + void allocateSliceBuffers(int64 dimX, int64 dimY); + void deallocateSliceBuffers(); + const ScalarSegmentFeaturesInputValues* m_InputValues = nullptr; FeatureIdsArrayType* m_FeatureIdsArray = nullptr; GoodVoxelsArrayType* m_GoodVoxelsArray = nullptr; std::shared_ptr m_CompareFunctor; std::unique_ptr m_GoodVoxels = nullptr; + IDataArray* m_InputDataArray = nullptr; + + // Rolling 2-slot input buffers for OOC optimization. + std::vector m_ScalarBuffer; + std::vector m_MaskBuffer; + int64 m_BufSliceSize = 0; + int64 m_BufferedSliceZ[2] = {-1, -1}; + bool m_UseSliceBuffers = false; }; } // namespace nx::core diff --git a/src/Plugins/SimplnxCore/test/CMakeLists.txt b/src/Plugins/SimplnxCore/test/CMakeLists.txt index 148a53daa6..27dc58bb53 100644 --- a/src/Plugins/SimplnxCore/test/CMakeLists.txt +++ b/src/Plugins/SimplnxCore/test/CMakeLists.txt @@ -283,6 +283,9 @@ if(EXISTS "${DREAM3D_DATA_DIR}" AND SIMPLNX_DOWNLOAD_TEST_FILES) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME compute_grouping_densities.tar.gz SHA512 96066196d6aa5f87cc7b717f959848c2f3025b7129589abe1eded2a8d725c539a89b0a6290a388a56b5a401e0bd3041698fbd8e8cf37a1f18fdd937debd21531) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME hierarchical_smoothing.tar.gz SHA512 47217ee420d9438c3d36a195c06ae060917f5fb7ee295feffdabf05741bec87bf29c3b44016b744930cda7383cd05e0d58df7e7776a7732dc46c12b780e51398) download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME identify_sample_v2.tar.gz SHA512 a7ffac3eaad479c07215c1dd16274c45a52466708a9d27b5f85a29b0eba3b6705b627e1052a7a27e9bfe89cd6e7df673beb7a1e98b262b6c52ea383b4848ac31) + download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME segment_features_exemplars.tar.gz SHA512 004fdccf1d2af6dbea8690a9213cf485ddd8c3afb90416895de7d8f52c51f4c2e4b73edb335b01572934ad4d2ff188edc047b3a64f76beff8d280e55ef08e2ab) + download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME fill_bad_data_exemplars.tar.gz SHA512 4a3504dd5173d5ef7c37dff68b6119f65aef24a8bb893ab9777d13dc993955c39bb39ff1771ec0dcc65db9cb9a0ee249b8eca4f15726a5db1743f41bcf20cab8) + download_test_data(DREAM3D_DATA_DIR ${DREAM3D_DATA_DIR} ARCHIVE_NAME identify_sample_exemplars.tar.gz SHA512 f9433c0e7810d9acdb706ac61eca2134bdbacd10983f2d090e5bdfb0bb6a44dbc2ebaa7d8f4bfef30e4cd813dcf3d2590cb185a1957654afff17e88a5b19783c) endif() diff --git a/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp b/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp index fc47364d82..f1e976139a 100644 --- a/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp +++ b/src/Plugins/SimplnxCore/test/FillBadDataTest.cpp @@ -1,12 +1,14 @@ #include +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/Parameters/MultiArraySelectionParameter.hpp" -#include "simplnx/Pipeline/AbstractPipelineNode.hpp" -#include "simplnx/Pipeline/Pipeline.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include "SimplnxCore/Filters/FillBadDataFilter.hpp" -#include "SimplnxCore/Filters/ReadDREAM3DFilter.hpp" #include "SimplnxCore/SimplnxCore_test_dirs.hpp" #include @@ -16,56 +18,83 @@ using namespace nx::core; using namespace nx::core::Constants; using namespace nx::core::UnitTest; -TEST_CASE("SimplnxCore::FillBadData_SmallIN100", "[Core][FillBadDataFilter]") +namespace { - // Load the Simplnx Application instance and load the plugins - UnitTest::LoadPlugins(); - - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "6_5_fill_bad_data.tar.gz", "6_5_fill_bad_data"); - // Read Exemplar DREAM3D File Filter - auto exemplarFilePath = fs::path(fmt::format("{}/6_5_fill_bad_data/6_5_exemplar.dream3d", unit_test::k_TestFilesDir)); - DataStructure exemplarDataStructure = UnitTest::LoadDataStructure(exemplarFilePath); - - // Read the Small IN100 Data set - auto baseDataFilePath = fs::path(fmt::format("{}/6_5_fill_bad_data/6_5_input.dream3d", unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(baseDataFilePath); - - { - // Instantiate the filter, a DataStructure object and an Arguments Object - FillBadDataFilter filter; - Arguments args; +/** + * @brief Builds a FillBadData test dataset with block-patterned FeatureIds + * and ~10% scattered bad voxels (FeatureId=0) using a deterministic pattern. + */ +void BuildFillBadDataTestData(DataStructure& ds, usize dimX, usize dimY, usize dimZ, usize blockSize, bool addLargeDefect = false) +{ + const ShapeType cellShape = {dimZ, dimY, dimX}; + auto* imageGeom = ImageGeom::Create(ds, "DataContainer"); + imageGeom->setDimensions({dimX, dimY, dimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); - // Create default Parameters for the filter. - args.insertOrAssign(FillBadDataFilter::k_MinAllowedDefectSize_Key, std::make_any(1000)); - args.insertOrAssign(FillBadDataFilter::k_StoreAsNewPhase_Key, std::make_any(false)); - args.insertOrAssign(FillBadDataFilter::k_CellFeatureIdsArrayPath_Key, std::make_any(k_FeatureIdsArrayPath)); - args.insertOrAssign(FillBadDataFilter::k_CellPhasesArrayPath_Key, std::make_any(k_PhasesArrayPath)); + auto* cellAM = AttributeMatrix::Create(ds, "CellData", cellShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); - args.insertOrAssign(FillBadDataFilter::k_IgnoredDataArrayPaths_Key, std::make_any(MultiArraySelectionParameter::ValueType{})); - args.insertOrAssign(FillBadDataFilter::k_SelectedImageGeometryPath_Key, std::make_any(k_DataContainerPath)); + auto featureIdsDataStore = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* featureIdsArray = DataArray::Create(ds, "FeatureIds", featureIdsDataStore, cellAM->getId()); + auto& featureIdsStore = featureIdsArray->getDataStoreRef(); - // Preflight the filter and check the result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions) + auto phasesDataStore = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* phasesArray = DataArray::Create(ds, "Phases", phasesDataStore, cellAM->getId()); + auto& phasesStore = phasesArray->getDataStoreRef(); - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); //, nullptr, IFilter::MessageHandler{[](const IFilter::Message& message) { fmt::print("{}\n", message.message); }}); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result) + const usize blocksPerDim = dimX / blockSize; + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize idx = z * dimX * dimY + y * dimX + x; + phasesStore[idx] = 1; + + usize bx = x / blockSize; + usize by = y / blockSize; + usize bz = z / blockSize; + int32 blockFeatureId = static_cast(bz * blocksPerDim * blocksPerDim + by * blocksPerDim + bx + 1); + + // Scatter bad voxels: ~10% of voxels become bad (FeatureId=0) + bool isBad = ((x * 7 + y * 13 + z * 29) % 10 == 0); + featureIdsStore[idx] = isBad ? 0 : blockFeatureId; + } + } } - UnitTest::CompareExemplarToGeneratedData(dataStructure, exemplarDataStructure, k_CellAttributeMatrix, k_DataContainer); - - // Write the DataStructure out to the file system - // #ifdef SIMPLNX_WRITE_TEST_OUTPUT - WriteTestDataStructure(dataStructure, fs::path(fmt::format("{}/7_0_fill_bad_data.dream3d", unit_test::k_BinaryTestOutputDir))); - // #endif - - UnitTest::CheckArraysInheritTupleDims(dataStructure); + // Add a contiguous large defect: entire z=dimZ/2 plane set to FeatureId=0 + if(addLargeDefect) + { + const usize z = dimZ / 2; + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + featureIdsStore[z * dimX * dimY + y * dimX + x] = 0; + } + } + } } +// Exemplar archive +const std::string k_ArchiveName = "fill_bad_data_exemplars.tar.gz"; +const std::string k_DataDirName = "fill_bad_data_exemplars"; +const fs::path k_DataDir = fs::path(unit_test::k_TestFilesDir.view()) / k_DataDirName; +const fs::path k_ExemplarFile = k_DataDir / "fill_bad_data.dream3d"; + +// Test dimensions for 200^3 tests +constexpr usize k_Dim = 200; +constexpr usize k_BlockSize = 25; +constexpr int32 k_MinDefectSize = 50; +} // namespace TEST_CASE("SimplnxCore::FillBadData::Test01_SingleSmallDefect", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); // 100 bytes - force very small arrays to OOC @@ -104,6 +133,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test01_SingleSmallDefect", "[Core][FillBadD TEST_CASE("SimplnxCore::FillBadData::Test02_SingleLargeDefect", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -143,6 +174,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test02_SingleLargeDefect", "[Core][FillBadD TEST_CASE("SimplnxCore::FillBadData::Test03_ThresholdBoundary", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -177,6 +210,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test03_ThresholdBoundary", "[Core][FillBadD TEST_CASE("SimplnxCore::FillBadData::Test04_MultipleSmallDefects", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 500, true); // Slightly larger for 10x10x10 @@ -211,6 +246,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test04_MultipleSmallDefects", "[Core][FillB TEST_CASE("SimplnxCore::FillBadData::Test05_MixedSmallAndLarge", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 500, true); @@ -245,6 +282,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test05_MixedSmallAndLarge", "[Core][FillBad TEST_CASE("SimplnxCore::FillBadData::Test06_SingleVoxelDefects", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -279,6 +318,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test06_SingleVoxelDefects", "[Core][FillBad TEST_CASE("SimplnxCore::FillBadData::Test07_DefectsAtBoundaries", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -313,6 +354,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test07_DefectsAtBoundaries", "[Core][FillBa TEST_CASE("SimplnxCore::FillBadData::Test11_NeighborTieBreaking", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 50, true); // Very small for 3x3x3 @@ -352,6 +395,8 @@ TEST_CASE("SimplnxCore::FillBadData::Test11_NeighborTieBreaking", "[Core][FillBa TEST_CASE("SimplnxCore::FillBadData::Test13_StoreAsNewPhase", "[Core][FillBadDataFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); // Configure out-of-core settings (automatically restored on scope exit) const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 100, true); @@ -387,3 +432,141 @@ TEST_CASE("SimplnxCore::FillBadData::Test13_StoreAsNewPhase", "[Core][FillBadDat UnitTest::CheckArraysInheritTupleDims(dataStructure); } + +TEST_CASE("SimplnxCore::FillBadData: 200x200x200 Correctness", "[Core][FillBadDataFilter]") +{ + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // int32 1-comp => 200*200*4 = 160,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 160000, true); + + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_ExemplarFile); + + std::string testName = GENERATE("NoNewPhase", "NewPhase"); + DYNAMIC_SECTION("Variant: " << testName) + { + const bool storeAsNewPhase = (testName == "NewPhase"); + + DataStructure dataStructure; + BuildFillBadDataTestData(dataStructure, k_Dim, k_Dim, k_Dim, k_BlockSize, true); + + FillBadDataFilter filter; + Arguments args; + args.insertOrAssign(FillBadDataFilter::k_MinAllowedDefectSize_Key, std::make_any(k_MinDefectSize)); + args.insertOrAssign(FillBadDataFilter::k_StoreAsNewPhase_Key, std::make_any(storeAsNewPhase)); + args.insertOrAssign(FillBadDataFilter::k_CellFeatureIdsArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "FeatureIds"}))); + args.insertOrAssign(FillBadDataFilter::k_CellPhasesArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Phases"}))); + args.insertOrAssign(FillBadDataFilter::k_IgnoredDataArrayPaths_Key, std::make_any(MultiArraySelectionParameter::ValueType{})); + args.insertOrAssign(FillBadDataFilter::k_SelectedImageGeometryPath_Key, std::make_any(DataPath({"DataContainer"}))); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + // Compare against exemplar + const std::string exemplarGeomName = "DataContainer_" + testName + "_Exemplar"; + const DataPath exemplarFeatureIdsPath({exemplarGeomName, "CellData", "FeatureIds"}); + const DataPath exemplarPhasesPath({exemplarGeomName, "CellData", "Phases"}); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "FeatureIds"}))); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "FeatureIds"}))); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "Phases"}))); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarPhasesPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarPhasesPath), dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "Phases"}))); + + UnitTest::CheckArraysInheritTupleDims(dataStructure); + } +} + +TEST_CASE("SimplnxCore::FillBadData: 200x200x200 Ignored Arrays", "[Core][FillBadDataFilter]") +{ + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // int32 1-comp => 200*200*4 = 160,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 160000, true); + + constexpr int32 k_Sentinel = -999; + + DataStructure dataStructure; + BuildFillBadDataTestData(dataStructure, k_Dim, k_Dim, k_Dim, k_BlockSize, false); + + // Add an extra "IgnoredArray" filled with a sentinel value + auto& cellAM = dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData"})); + auto ignoredDataStore = DataStoreUtilities::CreateDataStore(cellAM.getShape(), {1}, IDataAction::Mode::Execute); + auto* ignoredArray = DataArray::Create(dataStructure, "IgnoredArray", ignoredDataStore, cellAM.getId()); + auto& ignoredStore = ignoredArray->getDataStoreRef(); + for(usize i = 0; i < ignoredStore.getNumberOfTuples(); i++) + { + ignoredStore[i] = k_Sentinel; + } + + // Record which voxels are bad before fill + const auto& featureIdsBefore = dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "FeatureIds"})); + std::vector wasBad(featureIdsBefore.getNumberOfTuples(), false); + usize badCount = 0; + for(usize i = 0; i < featureIdsBefore.getNumberOfTuples(); i++) + { + if(featureIdsBefore.getDataStoreRef().getValue(i) == 0) + { + wasBad[i] = true; + badCount++; + } + } + REQUIRE(badCount > 0); + + { + FillBadDataFilter filter; + Arguments args; + args.insertOrAssign(FillBadDataFilter::k_MinAllowedDefectSize_Key, std::make_any(50)); + args.insertOrAssign(FillBadDataFilter::k_StoreAsNewPhase_Key, std::make_any(false)); + args.insertOrAssign(FillBadDataFilter::k_CellFeatureIdsArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "FeatureIds"}))); + args.insertOrAssign(FillBadDataFilter::k_CellPhasesArrayPath_Key, std::make_any(DataPath({"DataContainer", "CellData", "Phases"}))); + args.insertOrAssign(FillBadDataFilter::k_SelectedImageGeometryPath_Key, std::make_any(DataPath({"DataContainer"}))); + + // Include the IgnoredArray in the ignored paths + MultiArraySelectionParameter::ValueType ignoredPaths = {DataPath({"DataContainer", "CellData", "IgnoredArray"})}; + args.insertOrAssign(FillBadDataFilter::k_IgnoredDataArrayPaths_Key, std::make_any(ignoredPaths)); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + } + + // Verify: FeatureIds has no zeros (all scattered bad voxels filled) + const auto& featureIdsAfter = dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "FeatureIds"})); + for(usize i = 0; i < featureIdsAfter.getNumberOfTuples(); i++) + { + REQUIRE(featureIdsAfter.getDataStoreRef().getValue(i) != 0); + } + + // Verify: IgnoredArray is completely unchanged (sentinel at every voxel) + const auto& ignoredAfter = dataStructure.getDataRefAs(DataPath({"DataContainer", "CellData", "IgnoredArray"})); + for(usize i = 0; i < ignoredAfter.getNumberOfTuples(); i++) + { + REQUIRE(ignoredAfter.getDataStoreRef().getValue(i) == k_Sentinel); + } + + UnitTest::CheckArraysInheritTupleDims(dataStructure); +} + +TEST_CASE("SimplnxCore::FillBadData: Generate Test Data", "[Core][FillBadDataFilter][.GenerateTestData]") +{ + UnitTest::LoadPlugins(); + + const auto outputDir = fs::path(fmt::format("{}/generated_test_data/fill_bad_data", unit_test::k_BinaryTestOutputDir)); + fs::create_directories(outputDir); + + // 200^3 input data with large defect (full z=k_Dim/2 plane) + { + DataStructure ds; + BuildFillBadDataTestData(ds, k_Dim, k_Dim, k_Dim, k_BlockSize, true); + UnitTest::WriteTestDataStructure(ds, outputDir / "input.dream3d"); + } +} diff --git a/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp b/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp index c4dc3dc0cb..f0c1c1c085 100644 --- a/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp +++ b/src/Plugins/SimplnxCore/test/IdentifySampleTest.cpp @@ -2,90 +2,194 @@ #include "SimplnxCore/Filters/IdentifySampleFilter.hpp" #include "SimplnxCore/SimplnxCore_test_dirs.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/DataArray.hpp" #include "simplnx/DataStructure/Geometry/ImageGeom.hpp" -#include "simplnx/DataStructure/IDataArray.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" #include +#include + using namespace nx::core; using namespace nx::core::UnitTest; namespace { -const DataPath k_ExemplarArrayPath = Constants::k_DataContainerPath.createChildPath(Constants::k_CellData).createChildPath("Mask Exemplar"); +// Exemplar archive +const std::string k_ArchiveName = "identify_sample_exemplars.tar.gz"; +const std::string k_DataDirName = "identify_sample_exemplars"; +const fs::path k_DataDir = fs::path(unit_test::k_TestFilesDir.view()) / k_DataDirName; +const fs::path k_ExemplarFile = k_DataDir / "identify_sample.dream3d"; + +// Geometry names +constexpr StringLiteral k_GeomName = "DataContainer"; +constexpr StringLiteral k_CellDataName = "CellData"; + +// Output array paths +const DataPath k_GeomPath({k_GeomName}); +const DataPath k_MaskPath({k_GeomName, k_CellDataName, "Mask"}); + +// Test dimensions +constexpr usize k_Dim = 200; + +/** + * @brief Builds an IdentifySample test dataset: a sphere of "good" voxels + * with interior holes and exterior noise. + */ +void BuildIdentifySampleTestData(DataStructure& ds, usize dimX, usize dimY, usize dimZ, const std::string& geomName = "DataContainer") +{ + const ShapeType cellShape = {dimZ, dimY, dimX}; + auto* imageGeom = ImageGeom::Create(ds, geomName); + imageGeom->setDimensions({dimX, dimY, dimZ}); + imageGeom->setSpacing({1.0f, 1.0f, 1.0f}); + imageGeom->setOrigin({0.0f, 0.0f, 0.0f}); + + auto* cellAM = AttributeMatrix::Create(ds, "CellData", cellShape, imageGeom->getId()); + imageGeom->setCellData(*cellAM); + + auto maskDataStore = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* maskArray = DataArray::Create(ds, "Mask", maskDataStore, cellAM->getId()); + auto& maskStore = maskArray->getDataStoreRef(); + + const float cx = dimX / 2.0f; + const float cy = dimY / 2.0f; + const float cz = dimZ / 2.0f; + const float radius = dimX * 0.4f; + + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize idx = z * dimX * dimY + y * dimX + x; + const float dx = static_cast(x) - cx; + const float dy = static_cast(y) - cy; + const float dz = static_cast(z) - cz; + const float dist = std::sqrt(dx * dx + dy * dy + dz * dz); + bool good = dist < radius; + + // Interior holes (positions relative to geometry size so they work at any dim) + const float h1cx = cx + radius * 0.3f; + const float h1cy = cy + radius * 0.3f; + const float h1cz = cz + radius * 0.3f; + const float h1r = dimX * 0.053f; // ~4 at 75, ~10.6 at 200 + const float h2cx = cx - radius * 0.3f; + const float h2cy = cy - radius * 0.3f; + const float h2cz = cz - radius * 0.3f; + const float h2r = dimX * 0.04f; // ~3 at 75, ~8 at 200 + + if(good) + { + const float h1 = std::sqrt((static_cast(x) - h1cx) * (static_cast(x) - h1cx) + (static_cast(y) - h1cy) * (static_cast(y) - h1cy) + + (static_cast(z) - h1cz) * (static_cast(z) - h1cz)); + if(h1 < h1r) + { + good = false; + } + const float h2 = std::sqrt((static_cast(x) - h2cx) * (static_cast(x) - h2cx) + (static_cast(y) - h2cy) * (static_cast(y) - h2cy) + + (static_cast(z) - h2cz) * (static_cast(z) - h2cz)); + if(h2 < h2r) + { + good = false; + } + } + + // Isolated noise outside the sphere + if(!good && dist < radius + 5.0f && dist > radius) + { + if((x + y + z) % 7 == 0) + { + good = true; + } + } + + maskStore[idx] = good ? 1 : 0; + } + } + } } -TEST_CASE("SimplnxCore::IdentifySampleFilter", "[SimplnxCore][IdentifySampleFilter]") + +/** + * @brief Populates IdentifySampleFilter arguments from a test variant name. + * + * Name convention: "whole_fill", "sliced_xy_nofill", etc. + */ +void SetupArgs(Arguments& args, const std::string& testName, const DataPath& geomPath, const DataPath& maskPath) +{ + const bool fillHoles = (testName.find("nofill") == std::string::npos); + const bool sliceBySlice = (testName.find("sliced") != std::string::npos); + ChoicesParameter::ValueType slicePlane = 0; + if(testName.find("xz") != std::string::npos) + { + slicePlane = 1; + } + else if(testName.find("yz") != std::string::npos) + { + slicePlane = 2; + } + + args.insertOrAssign(IdentifySampleFilter::k_SelectedImageGeometryPath_Key, std::make_any(geomPath)); + args.insertOrAssign(IdentifySampleFilter::k_MaskArrayPath_Key, std::make_any(maskPath)); + args.insertOrAssign(IdentifySampleFilter::k_FillHoles_Key, std::make_any(fillHoles)); + args.insertOrAssign(IdentifySampleFilter::k_SliceBySlice_Key, std::make_any(sliceBySlice)); + args.insertOrAssign(IdentifySampleFilter::k_SliceBySlicePlane_Key, std::make_any(slicePlane)); +} +} // namespace + +TEST_CASE("SimplnxCore::IdentifySampleFilter: 200x200x200 Exemplar Comparison", "[SimplnxCore][IdentifySampleFilter]") { UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // uint8 1-comp => 200*200*1 = 40,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 40000, true); + + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_ExemplarFile); - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "identify_sample_v2.tar.gz", "identify_sample_v2"); - using TestArgType = std::tuple; - /* clang-format off */ - std::vector allTestParams = { - {"sliced", "xy", "fill"}, - {"sliced", "xy", "nofill"}, - {"sliced", "xz", "fill"}, - {"sliced", "xz", "nofill"}, - {"sliced", "yz", "fill"}, - {"sliced", "yz", "nofill"}, - - {"whole", "xy", "fill"}, - {"whole", "xy", "nofill"}, - {"whole", "xz", "fill"}, - {"whole", "xz", "nofill"}, - {"whole", "yz", "fill"}, - {"whole", "yz", "nofill"}, - }; - /* clang-format on */ - for(const auto& testParam : allTestParams) + std::string testName = GENERATE("whole_fill", "whole_nofill", "sliced_xy_fill", "sliced_xy_nofill", "sliced_xz_fill", "sliced_xz_nofill", "sliced_yz_fill", "sliced_yz_nofill"); + DYNAMIC_SECTION("Variant: " << testName) { - std::string slice_by_slice = std::get<0>(testParam); - bool sliceBySlice = slice_by_slice == "sliced"; + DataStructure dataStructure; + BuildIdentifySampleTestData(dataStructure, k_Dim, k_Dim, k_Dim); - std::string slice_plane = std::get<1>(testParam); + IdentifySampleFilter filter; + Arguments args; + SetupArgs(args, testName, k_GeomPath, k_MaskPath); - ChoicesParameter::ValueType sliceBySlicePlane = 0; - if(slice_plane == "xz") - sliceBySlicePlane = 1; - else if(slice_plane == "yz") - sliceBySlicePlane = 2; + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - std::string fill_holes = std::get<2>(testParam); - bool fillHoles = fill_holes == "fill"; + // Compare against exemplar + const std::string exemplarGeomName = testName + "_Exemplar"; + const DataPath exemplarMaskPath({exemplarGeomName, std::string(k_CellDataName), "Mask"}); - SECTION(fmt::format("{}_{}_{}", slice_by_slice, slice_plane, fill_holes)) - { - fs::path inputFilePath = fs::path(fmt::format("{}/identify_sample_v2/{}_{}_{}.dream3d", unit_test::k_TestFilesDir, slice_by_slice, slice_plane, fill_holes)); - std::cout << inputFilePath.string() << std::endl; - - DataStructure dataStructure = LoadDataStructure(inputFilePath); - IdentifySampleFilter filter; - Arguments args; - args.insert(IdentifySampleFilter::k_SelectedImageGeometryPath_Key, std::make_any(Constants::k_DataContainerPath)); - args.insert(IdentifySampleFilter::k_MaskArrayPath_Key, std::make_any(Constants::k_MaskArrayPath)); - args.insert(IdentifySampleFilter::k_FillHoles_Key, std::make_any(fillHoles)); - args.insert(IdentifySampleFilter::k_SliceBySlice_Key, std::make_any(sliceBySlice)); - args.insert(IdentifySampleFilter::k_SliceBySlicePlane_Key, std::make_any(sliceBySlicePlane)); - - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions) - - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result) - -#ifdef SIMPLNX_WRITE_TEST_OUTPUT - WriteTestDataStructure(dataStructure, fmt::format("{}/identify_sample_output_{}_{}_{}.dream3d", unit_test::k_BinaryTestOutputDir, fillHoles, sliceBySlice, sliceBySlicePlane)); -#endif - - const IDataArray& computedArray = dataStructure.getDataRefAs(Constants::k_MaskArrayPath); - const IDataArray& exemplarArray = dataStructure.getDataRefAs(k_ExemplarArrayPath); - CompareDataArrays(computedArray, exemplarArray); - - UnitTest::CheckArraysInheritTupleDims(dataStructure); - } + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_MaskPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarMaskPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarMaskPath), dataStructure.getDataRefAs(k_MaskPath)); + + UnitTest::CheckArraysInheritTupleDims(dataStructure); + } +} + +TEST_CASE("SimplnxCore::IdentifySampleFilter: Generate Test Data", "[SimplnxCore][IdentifySampleFilter][.GenerateTestData]") +{ + UnitTest::LoadPlugins(); + + const auto outputDir = fs::path(fmt::format("{}/generated_test_data/identify_sample", unit_test::k_BinaryTestOutputDir)); + fs::create_directories(outputDir); + + DataStructure ds; + for(const auto& name : {"whole_fill", "whole_nofill", "sliced_xy_fill", "sliced_xy_nofill", "sliced_xz_fill", "sliced_xz_nofill", "sliced_yz_fill", "sliced_yz_nofill"}) + { + BuildIdentifySampleTestData(ds, k_Dim, k_Dim, k_Dim, name); } + UnitTest::WriteTestDataStructure(ds, outputDir / "input.dream3d"); } diff --git a/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp b/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp index 00cb47718f..49c67f5bac 100644 --- a/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp +++ b/src/Plugins/SimplnxCore/test/ScalarSegmentFeaturesTest.cpp @@ -1,173 +1,300 @@ #include "SimplnxCore/Filters/ScalarSegmentFeaturesFilter.hpp" #include "SimplnxCore/SimplnxCore_test_dirs.hpp" -#include "simplnx/DataStructure/IO/HDF5/DataStructureWriter.hpp" -#include "simplnx/Parameters/ArrayCreationParameter.hpp" -#include "simplnx/Parameters/BoolParameter.hpp" +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" #include "simplnx/Parameters/ChoicesParameter.hpp" +#include "simplnx/UnitTest/SegmentFeaturesTestUtils.hpp" #include "simplnx/UnitTest/UnitTestCommon.hpp" -#include "simplnx/Utilities/DataArrayUtilities.hpp" -#include "simplnx/Utilities/Parsing/DREAM3D/Dream3dIO.hpp" -#include "simplnx/Utilities/Parsing/HDF5/IO/FileIO.hpp" +#include "simplnx/Utilities/AlgorithmDispatch.hpp" #include +#include + using namespace nx::core; using namespace nx::core::UnitTest; -using namespace nx::core::Constants; namespace { -const std::string k_SharedEdgesInputArrayName = "Shared Edges"; -const std::string k_SharedPointsInputArrayName = "Shared Points"; -const std::string k_NothingSharedInputArrayName = "Nothing Shared"; -const std::string k_CombinationInputArrayName = "Combination"; -const std::string k_ExemplarySharedEdgesFaceOnlyFeatureIdsName = "Exemplary Shared Edges FeatureIds - Face Only"; -const std::string k_ExemplarySharedEdgesAllConnectedFeatureIdsName = "Exemplary Shared Edges FeatureIds - All Connected"; -const std::string k_ExemplarySharedPointsFaceOnlyFeatureIdsName = "Exemplary Shared Points FeatureIds - Face Only"; -const std::string k_ExemplarySharedPointsAllConnectedFeatureIdsName = "Exemplary Shared Points FeatureIds - All Connected"; -const std::string k_ExemplaryNothingSharedFaceOnlyFeatureIdsName = "Exemplary Nothing Shared FeatureIds - Face Only"; -const std::string k_ExemplaryNothingSharedAllConnectedFeatureIdsName = "Exemplary Nothing Shared FeatureIds - All Connected"; -const std::string k_ExemplaryCombinationFaceOnlyFeatureIdsName = "Exemplary Combination FeatureIds - Face Only"; -const std::string k_ExemplaryCombinationAllConnectedFeatureIdsName = "Exemplary Combination FeatureIds - All Connected"; +// Exemplar archive +const std::string k_ArchiveName = "segment_features_exemplars.tar.gz"; +const std::string k_DataDirName = "segment_features_exemplars"; +const fs::path k_DataDir = fs::path(unit_test::k_TestFilesDir.view()) / k_DataDirName; +const fs::path k_SmallExemplarFile = k_DataDir / "scalar_small.dream3d"; +const fs::path k_LargeExemplarFile = k_DataDir / "scalar_large.dream3d"; + +// Geometry names +constexpr StringLiteral k_GeomName = "DataContainer"; +constexpr StringLiteral k_CellDataName = "CellData"; +constexpr StringLiteral k_FeatureDataName = "CellFeatureData"; + +// Output array paths +const DataPath k_GeomPath({k_GeomName}); +const DataPath k_FeatureIdsPath({k_GeomName, k_CellDataName, "FeatureIds"}); +const DataPath k_ActivePath({k_GeomName, k_FeatureDataName, "Active"}); +const DataPath k_MaskPath({k_GeomName, k_CellDataName, "Mask"}); + +// Test dimensions +constexpr usize k_SmallDim = 15; +constexpr usize k_SmallBlockSize = 5; +constexpr usize k_LargeDim = 200; +constexpr usize k_LargeBlockSize = 25; + +/** + * @brief Populates ScalarSegmentFeaturesFilter arguments. + */ +void SetupArgs(Arguments& args, bool useMask, bool isPeriodic, int tolerance, ChoicesParameter::ValueType neighborScheme = 0, bool randomize = false) +{ + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_GridGeomPath_Key, std::make_any(k_GeomPath)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_InputArrayPathKey, std::make_any(DataPath({k_GeomName, k_CellDataName, "ScalarData"}))); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ScalarToleranceKey, std::make_any(tolerance)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_UseMask_Key, std::make_any(useMask)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(useMask ? k_MaskPath : DataPath{})); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborScheme)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_IsPeriodic_Key, std::make_any(isPeriodic)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_FeatureIdsName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_CellFeatureName_Key, std::make_any(std::string(k_FeatureDataName))); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_RandomizeFeatures_Key, std::make_any(randomize)); +} } // namespace -TEST_CASE("SimplnxCore::ScalarSegmentFeatures", "[SimplnxCore][ScalarSegmentFeatures]") +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Small Correctness", "[SimplnxCore][ScalarSegmentFeatures]") { - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "6_5_test_data_1_v2.tar.gz", "6_5_test_data_1_v2"); + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // int32 1-comp => 15*15*4 = 900 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 900, true); - // Read the Small IN100 Data set - auto baseDataFilePath = fs::path(fmt::format("{}/6_5_test_data_1_v2/6_5_test_data_1_v2.dream3d", nx::core::unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(baseDataFilePath); + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_SmallExemplarFile); + std::string testName = GENERATE("Base", "Masked", "Periodic", "Tolerance"); + DYNAMIC_SECTION("Variant: " << testName) { - Arguments args; + const bool useMask = (testName == "Masked"); + const bool isPeriodic = (testName == "Periodic"); + const int tolerance = (testName == "Tolerance") ? 1 : 0; + + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, {k_SmallDim, k_SmallDim, k_SmallDim}, std::string(k_GeomName), std::string(k_CellDataName)); + BuildScalarTestData(dataStructure, cellShape, am->getId(), k_SmallBlockSize, "ScalarData", isPeriodic); + + if(useMask) + { + BuildSphericalMask(dataStructure, cellShape, am->getId()); + } + ScalarSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, useMask, isPeriodic, tolerance); - DataPath smallIn100Group({k_DataContainer}); - DataPath ebsdScanDataPath = smallIn100Group.createChildPath(k_CellData); - DataPath inputDataArrayPath = ebsdScanDataPath.createChildPath(k_FeatureIds); - std::string outputFeatureIdsName = "Output_Feature_Ids"; - std::string computedCellDataName = "Computed_CellData"; - DataPath outputFeatureIdsPath = ebsdScanDataPath.createChildPath(outputFeatureIdsName); - DataPath featureDataGroupPath = smallIn100Group.createChildPath(computedCellDataName); - DataPath activeArrayDataPath = featureDataGroupPath.createChildPath(k_ActiveName); - - DataPath gridGeomDataPath({k_DataContainer}); - int scalarTolerance = 0; - - // Create default Parameters for the filter. - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_GridGeomPath_Key, std::make_any(gridGeomDataPath)); - // Turn off the use of a Mask Array - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); - // Set the input array and the tolerance - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_InputArrayPathKey, std::make_any(inputDataArrayPath)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ScalarToleranceKey, std::make_any(scalarTolerance)); - // Set the paths to the created arrays - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_FeatureIdsName_Key, std::make_any(outputFeatureIdsName)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_CellFeatureName_Key, std::make_any(computedCellDataName)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - // Are we going to randomize the featureIds when completed. - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_RandomizeFeatures_Key, std::make_any(true)); - - // Preflight the filter and check result auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions) - - // Execute the filter and check the result + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result) + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); - UInt8Array& actives = dataStructure.getDataRefAs(activeArrayDataPath); - size_t numFeatures = actives.getNumberOfTuples(); - REQUIRE(numFeatures == 847); - } + // Compare against exemplar + const std::string exemplarGeomName = testName + "_Exemplar"; + const DataPath exemplarFeatureIdsPath({exemplarGeomName, std::string(k_CellDataName), "FeatureIds"}); + const DataPath exemplarActivePath({exemplarGeomName, std::string(k_FeatureDataName), "Active"}); - { - // Write out the DataStructure for later viewing/debugging - std::string filePath = fmt::format("{}/ScalarSegmentFeatures.dream3d", unit_test::k_BinaryTestOutputDir); - // std::cout << "Writing file to: " << filePath << std::endl; - nx::core::HDF5::FileIO fileWriter = nx::core::HDF5::FileIO::WriteFile(filePath); + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(k_FeatureIdsPath)); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarActivePath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarActivePath), dataStructure.getDataRefAs(k_ActivePath)); - auto resultH5 = HDF5::DataStructureWriter::WriteFile(dataStructure, fileWriter); - SIMPLNX_RESULT_REQUIRE_VALID(resultH5); + UnitTest::CheckArraysInheritTupleDims(dataStructure); } +} + +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: FaceEdgeVertex Connectivity", "[SimplnxCore][ScalarSegmentFeatures]") +{ + UnitTest::LoadPlugins(); + + // Shared test: verifies vertex and edge connectivity with FaceEdgeVertex scheme. + // Setup lambda creates ScalarData with 4 isolated voxels (2 pairs) and configures args. + auto setupScalar = [](Arguments& args, DataStructure& ds, const DataPath& geomPath, const DataPath& cellDataPath, ChoicesParameter::ValueType neighborScheme) { + const ShapeType cellShape = {3, 3, 3}; + auto& am = ds.getDataRefAs(cellDataPath); + auto scalarDS = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* scalarArr = DataArray::Create(ds, "ScalarData", scalarDS, am.getId()); + auto& store = scalarArr->getDataStoreRef(); + store.fill(0); + store[0 * 9 + 0 * 3 + 0] = 1; // (0,0,0) — vertex pair A + store[1 * 9 + 1 * 3 + 1] = 1; // (1,1,1) — vertex pair B + store[0 * 9 + 0 * 3 + 2] = 2; // (2,0,0) — edge pair C + store[1 * 9 + 1 * 3 + 2] = 2; // (2,1,1) — edge pair D + + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_GridGeomPath_Key, std::make_any(geomPath)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_InputArrayPathKey, std::make_any(cellDataPath.createChildPath("ScalarData"))); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ScalarToleranceKey, std::make_any(0)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborScheme)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_IsPeriodic_Key, std::make_any(false)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_FeatureIdsName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_CellFeatureName_Key, std::make_any("CellFeatureData")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_RandomizeFeatures_Key, std::make_any(false)); + }; + + RunFaceEdgeVertexConnectivityTest([&](Arguments& args, DataStructure& ds, const DataPath& gp, const DataPath& cp) { setupScalar(args, ds, gp, cp, 0); }, + [&](Arguments& args, DataStructure& ds, const DataPath& gp, const DataPath& cp) { setupScalar(args, ds, gp, cp, 1); }); +} + +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: 200x200x200 Large OOC", "[SimplnxCore][ScalarSegmentFeatures]") +{ + UnitTest::LoadPlugins(); + bool forceOocAlgo = GENERATE(false, true); + const nx::core::ForceOocAlgorithmGuard guard(forceOocAlgo); + // int32 1-comp => 200*200*4 = 160,000 bytes/slice + const UnitTest::PreferencesSentinel prefsSentinel("Zarr", 160000, true); + + const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, k_ArchiveName, k_DataDirName); + DataStructure exemplarDS = UnitTest::LoadDataStructure(k_LargeExemplarFile); + + const ShapeType cellShape = {k_LargeDim, k_LargeDim, k_LargeDim}; + + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, {k_LargeDim, k_LargeDim, k_LargeDim}, std::string(k_GeomName), std::string(k_CellDataName)); + BuildScalarTestData(dataStructure, cellShape, am->getId(), k_LargeBlockSize, "ScalarData", true); + BuildSphericalMask(dataStructure, cellShape, am->getId()); + + ScalarSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/true, /*isPeriodic=*/true, /*tolerance=*/0); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + // Compare against exemplar + const DataPath exemplarFeatureIdsPath({"DataContainer_Exemplar", std::string(k_CellDataName), "FeatureIds"}); + const DataPath exemplarActivePath({"DataContainer_Exemplar", std::string(k_FeatureDataName), "Active"}); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarFeatureIdsPath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarFeatureIdsPath), dataStructure.getDataRefAs(k_FeatureIdsPath)); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + REQUIRE_NOTHROW(exemplarDS.getDataRefAs(exemplarActivePath)); + CompareDataArrays(exemplarDS.getDataRefAs(exemplarActivePath), dataStructure.getDataRefAs(k_ActivePath)); UnitTest::CheckArraysInheritTupleDims(dataStructure); } -TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Neighbor Scheme", "[Reconstruction][ScalarSegmentFeatures]") +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: No Valid Voxels Returns Error", "[SimplnxCore][ScalarSegmentFeatures]") +{ + UnitTest::LoadPlugins(); + + RunNoValidVoxelsErrorTest([](Arguments& args, DataStructure& ds, const DataPath& geomPath, const DataPath& cellDataPath, const DataPath& maskPath) { + const ShapeType cellShape = {3, 3, 3}; + auto& am = ds.getDataRefAs(cellDataPath); + CreateTestDataArray(ds, "ScalarData", cellShape, {1}, am.getId()); + + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_GridGeomPath_Key, std::make_any(geomPath)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_InputArrayPathKey, std::make_any(cellDataPath.createChildPath("ScalarData"))); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ScalarToleranceKey, std::make_any(0)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_UseMask_Key, std::make_any(true)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(maskPath)); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_FeatureIdsName_Key, std::make_any("FeatureIds")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_CellFeatureName_Key, std::make_any("FeatureData")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any("Active")); + args.insertOrAssign(ScalarSegmentFeaturesFilter::k_RandomizeFeatures_Key, std::make_any(false)); + }); +} + +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Randomize Feature IDs", "[SimplnxCore][ScalarSegmentFeatures]") +{ + UnitTest::LoadPlugins(); + + constexpr usize k_ExpectedFeatures = 27; // 3^3 + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + + DataStructure dataStructure; + auto* am = BuildSegmentFeaturesTestGeometry(dataStructure, {k_SmallDim, k_SmallDim, k_SmallDim}, std::string(k_GeomName), std::string(k_CellDataName)); + BuildScalarTestData(dataStructure, cellShape, am->getId(), k_SmallBlockSize); + + ScalarSegmentFeaturesFilter filter; + Arguments args; + SetupArgs(args, /*useMask=*/false, /*isPeriodic=*/false, /*tolerance=*/0, /*neighborScheme=*/0, /*randomize=*/true); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_ActivePath)); + const auto& actives = dataStructure.getDataRefAs(k_ActivePath); + REQUIRE(actives.getNumberOfTuples() == k_ExpectedFeatures + 1); + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(k_FeatureIdsPath)); + const auto& featureIds = dataStructure.getDataRefAs(k_FeatureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + std::set uniqueIds; + int32 minId = std::numeric_limits::max(); + int32 maxId = std::numeric_limits::min(); + for(usize i = 0; i < featureStore.getNumberOfTuples(); i++) + { + int32 fid = featureStore.getValue(i); + uniqueIds.insert(fid); + minId = std::min(minId, fid); + maxId = std::max(maxId, fid); + } + REQUIRE(minId == 1); + REQUIRE(maxId == static_cast(k_ExpectedFeatures)); + REQUIRE(uniqueIds.size() == k_ExpectedFeatures); +} + +TEST_CASE("SimplnxCore::ScalarSegmentFeatures: Generate Test Data", "[SimplnxCore][ScalarSegmentFeatures][.GenerateTestData]") { - /** - * We are going to use Catch2's GENERATE macro to create variations of parameter values. - * EVERYTHING after the GENERATE macro will be run for each of the generated sets of values - */ - auto [sectionName, inputDataArrayName, exemplaryFeatureIdsArrayName, neighborSchemeIndex] = - GENERATE(std::make_tuple("Shared Edges - Face Only", k_SharedEdgesInputArrayName, k_ExemplarySharedEdgesFaceOnlyFeatureIdsName, 0), - std::make_tuple("Shared Edges - All Connected", k_SharedEdgesInputArrayName, k_ExemplarySharedEdgesAllConnectedFeatureIdsName, 1), - std::make_tuple("Shared Points - Face Only", k_SharedPointsInputArrayName, k_ExemplarySharedPointsFaceOnlyFeatureIdsName, 0), - std::make_tuple("Shared Points - All Connected", k_SharedPointsInputArrayName, k_ExemplarySharedPointsAllConnectedFeatureIdsName, 1), - std::make_tuple("Nothing Shared - Face Only", k_NothingSharedInputArrayName, k_ExemplaryNothingSharedFaceOnlyFeatureIdsName, 0), - std::make_tuple("Nothing Shared - All Connected", k_NothingSharedInputArrayName, k_ExemplaryNothingSharedAllConnectedFeatureIdsName, 1), - std::make_tuple("Combination - Face Only", k_CombinationInputArrayName, k_ExemplaryCombinationFaceOnlyFeatureIdsName, 0), - std::make_tuple("Combination - All Connected", k_CombinationInputArrayName, k_ExemplaryCombinationAllConnectedFeatureIdsName, 1)); - - /** - * @note EVERYTHING from here to the end of the test will be run for **each** tuple set above - */ - const nx::core::UnitTest::TestFileSentinel testDataSentinel(nx::core::unit_test::k_TestFilesDir, "segment_features_neighbor_scheme_test.tar.gz", "segment_features_neighbor_scheme_test"); - auto baseDataFilePath = fs::path(fmt::format("{}/segment_features_neighbor_scheme_test/segment_features_neighbor_scheme_test.dream3d", nx::core::unit_test::k_TestFilesDir)); - DataStructure dataStructure = UnitTest::LoadDataStructure(baseDataFilePath); + UnitTest::LoadPlugins(); + + const auto outputDir = fs::path(fmt::format("{}/generated_test_data/scalar_segment_features", unit_test::k_BinaryTestOutputDir)); + fs::create_directories(outputDir); + // Small input data (15^3) — one geometry per test variant { - Arguments args; - ScalarSegmentFeaturesFilter filter; + const ShapeType cellShape = {k_SmallDim, k_SmallDim, k_SmallDim}; + const std::array dims = {k_SmallDim, k_SmallDim, k_SmallDim}; - DataPath smallIn100Group({k_SmallIn100ImageGeom}); - DataPath ebsdScanDataPath = smallIn100Group.createChildPath(k_Cell_Data); - std::string outputFeatureIdsName = "Output_Feature_Ids"; - std::string computedCellDataName = "Computed_CellData"; - DataPath outputFeatureIdsPath = ebsdScanDataPath.createChildPath(outputFeatureIdsName); - DataPath featureDataGroupPath = smallIn100Group.createChildPath(computedCellDataName); - DataPath activeArrayDataPath = featureDataGroupPath.createChildPath(k_ActiveName); + DataStructure ds; - DataPath gridGeomDataPath({k_SmallIn100ImageGeom}); - int scalarTolerance = 0; + auto* amBase = BuildSegmentFeaturesTestGeometry(ds, dims, "Base", std::string(k_CellDataName)); + BuildScalarTestData(ds, cellShape, amBase->getId(), k_SmallBlockSize); - // Create default Parameters for the filter. - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_GridGeomPath_Key, std::make_any(gridGeomDataPath)); - // Turn off the use of a Mask Array - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_UseMask_Key, std::make_any(false)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_MaskArrayPath_Key, std::make_any(DataPath{})); - // Set the tolerance - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ScalarToleranceKey, std::make_any(scalarTolerance)); - // Set the paths to the created arrays - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_FeatureIdsName_Key, std::make_any(outputFeatureIdsName)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_CellFeatureName_Key, std::make_any(computedCellDataName)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_ActiveArrayName_Key, std::make_any(k_ActiveName)); - // Are we going to randomize the featureIds when completed. - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_RandomizeFeatures_Key, std::make_any(false)); + auto* amMasked = BuildSegmentFeaturesTestGeometry(ds, dims, "Masked", std::string(k_CellDataName)); + BuildScalarTestData(ds, cellShape, amMasked->getId(), k_SmallBlockSize); + BuildSphericalMask(ds, cellShape, amMasked->getId()); - SECTION(sectionName) - { - DataPath inputDataArrayPath = ebsdScanDataPath.createChildPath(inputDataArrayName); - DataPath exemplaryFeatureIdsArrayPath = ebsdScanDataPath.createChildPath(exemplaryFeatureIdsArrayName); - DataPath computedFeatureIdsPath = ebsdScanDataPath.createChildPath(outputFeatureIdsName); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_InputArrayPathKey, std::make_any(inputDataArrayPath)); - args.insertOrAssign(ScalarSegmentFeaturesFilter::k_NeighborScheme_Key, std::make_any(neighborSchemeIndex)); + auto* amPeriodic = BuildSegmentFeaturesTestGeometry(ds, dims, "Periodic", std::string(k_CellDataName)); + BuildScalarTestData(ds, cellShape, amPeriodic->getId(), k_SmallBlockSize, "ScalarData", true); - // Preflight the filter and check result - auto preflightResult = filter.preflight(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions) + auto* amTolerance = BuildSegmentFeaturesTestGeometry(ds, dims, "Tolerance", std::string(k_CellDataName)); + BuildScalarTestData(ds, cellShape, amTolerance->getId(), k_SmallBlockSize); - // Execute the filter and check the result - auto executeResult = filter.execute(dataStructure, args); - SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result) + UnitTest::WriteTestDataStructure(ds, outputDir / "small_input.dream3d"); + } + + // Large input data (200^3) — mask=true, periodic=true + { + const ShapeType cellShape = {k_LargeDim, k_LargeDim, k_LargeDim}; + const std::array dims = {k_LargeDim, k_LargeDim, k_LargeDim}; - UnitTest::CompareArrays(dataStructure, exemplaryFeatureIdsArrayPath, computedFeatureIdsPath); + DataStructure ds; + auto* am = BuildSegmentFeaturesTestGeometry(ds, dims, std::string(k_GeomName), std::string(k_CellDataName)); + BuildScalarTestData(ds, cellShape, am->getId(), k_LargeBlockSize, "ScalarData", true); + BuildSphericalMask(ds, cellShape, am->getId()); - UnitTest::CheckArraysInheritTupleDims(dataStructure); - } + UnitTest::WriteTestDataStructure(ds, outputDir / "large_input.dream3d"); } } diff --git a/src/simplnx/Utilities/SegmentFeatures.cpp b/src/simplnx/Utilities/SegmentFeatures.cpp index 037b7175b0..4e833f023e 100644 --- a/src/simplnx/Utilities/SegmentFeatures.cpp +++ b/src/simplnx/Utilities/SegmentFeatures.cpp @@ -1,8 +1,10 @@ #include "SegmentFeatures.hpp" +#include "simplnx/DataStructure/AbstractDataStore.hpp" #include "simplnx/DataStructure/Geometry/IGridGeometry.hpp" #include "simplnx/Utilities/ClusteringUtilities.hpp" #include "simplnx/Utilities/MessageHelper.hpp" +#include "simplnx/Utilities/UnionFind.hpp" #include @@ -11,109 +13,154 @@ using namespace nx::core; namespace { /** - * @brief This will find the 6 face neighbor's indices. - * @param currentPoint - * @param width - * @param height - * @param depth - * @return Vector of indices + * @brief Returns the 6 face neighbor indices. When isPeriodic is true, + * boundary voxels wrap to the opposite face instead of being skipped. + * @param currentPoint Linear voxel index + * @param width X dimension + * @param height Y dimension + * @param depth Z dimension + * @param isPeriodic Whether to apply periodic boundary wrapping + * @return Vector of neighbor indices */ -std::vector getFaceNeighbors(const int64 currentPoint, const int64 width, const int64 height, const int64 depth) +std::vector getFaceNeighbors(const int64 currentPoint, const int64 width, const int64 height, const int64 depth, const bool isPeriodic) { std::vector neighbors; neighbors.reserve(6); - // decode currentPoint -> (col, row, plane) const int64 col = currentPoint % width; const int64 tmp = currentPoint / width; const int64 row = tmp % height; const int64 plane = tmp / height; - // stride for one z-slice const int64 slice = width * height; + // -X if(col > 0) { neighbors.push_back(currentPoint - 1); } + else if(isPeriodic) + { + neighbors.push_back(currentPoint + width - 1); + } + + // +X if(col < width - 1) { neighbors.push_back(currentPoint + 1); } + else if(isPeriodic) + { + neighbors.push_back(currentPoint - width + 1); + } + + // -Y if(row > 0) { neighbors.push_back(currentPoint - width); } + else if(isPeriodic) + { + neighbors.push_back(currentPoint + (height - 1) * width); + } + + // +Y if(row < height - 1) { neighbors.push_back(currentPoint + width); } + else if(isPeriodic) + { + neighbors.push_back(currentPoint - (height - 1) * width); + } + + // -Z if(plane > 0) { neighbors.push_back(currentPoint - slice); } + else if(isPeriodic) + { + neighbors.push_back(currentPoint + (depth - 1) * slice); + } + + // +Z if(plane < depth - 1) { neighbors.push_back(currentPoint + slice); } + else if(isPeriodic) + { + neighbors.push_back(currentPoint - (depth - 1) * slice); + } return neighbors; } /** - * @brief This will find all indices that are connected via the 26 face, edge or vertex neighbors - * @param currentPoint - * @param width - * @param height - * @param depth - * @return vector of indices + * @brief Returns up to 26 face/edge/vertex neighbor indices. When isPeriodic + * is true, boundary voxels wrap to the opposite face instead of being skipped. + * @param currentPoint Linear voxel index + * @param width X dimension + * @param height Y dimension + * @param depth Z dimension + * @param isPeriodic Whether to apply periodic boundary wrapping + * @return Vector of neighbor indices */ -std::vector getAllNeighbors(const int64 currentPoint, const int64 width, const int64 height, const int64 depth) +std::vector getAllNeighbors(const int64 currentPoint, const int64 width, const int64 height, const int64 depth, const bool isPeriodic) { std::vector neighbors; neighbors.reserve(26); - // decode currentPoint -> (col, row, plane) const int64 col = currentPoint % width; const int64 tmp = currentPoint / width; const int64 row = tmp % height; const int64 plane = tmp / height; - // stride for one z-slice const int64 slice = width * height; - // baseOffset == currentPoint - const int64 baseOffset = currentPoint; - for(int64 dz = -1; dz <= 1; ++dz) { - if(const int64 p = plane + dz; p < 0 || p >= depth) + int64 nz = plane + dz; + if(nz < 0 || nz >= depth) { - continue; + if(!isPeriodic) + { + continue; + } + nz = (nz + depth) % depth; } - const int64 dzOff = dz * slice; for(int64 dy = -1; dy <= 1; ++dy) { - if(const int64 r = row + dy; r < 0 || r >= height) + int64 ny = row + dy; + if(ny < 0 || ny >= height) { - continue; + if(!isPeriodic) + { + continue; + } + ny = (ny + height) % height; } - const int64 dyOff = dy * width; for(int64 dx = -1; dx <= 1; ++dx) { - // skip the center voxel itself if(dx == 0 && dy == 0 && dz == 0) { continue; } - if(int64 c = col + dx; c < 0 || c >= width) + + int64 nx = col + dx; + if(nx < 0 || nx >= width) { - continue; + if(!isPeriodic) + { + continue; + } + nx = (nx + width) % width; } - int64 neighbor = baseOffset + dzOff + dyOff + dx; - neighbors.push_back(neighbor); + + neighbors.push_back(nz * slice + ny * width + nx); } } } @@ -134,7 +181,36 @@ SegmentFeatures::SegmentFeatures(DataStructure& dataStructure, const std::atomic // ----------------------------------------------------------------------------- SegmentFeatures::~SegmentFeatures() = default; -// ----------------------------------------------------------------------------- +// ============================================================================= +// DFS Flood-Fill Segmentation (In-Core Path) +// ============================================================================= +// +// This method implements a depth-first search (DFS) flood-fill algorithm for +// segmenting voxels into features when data resides entirely in memory. +// +// Algorithm overview: +// 1. Iterate through voxels to find "seed" voxels — unassigned, valid voxels +// that start a new feature. +// 2. For each seed, assign a new feature ID (gnum) and push the seed onto a +// stack (voxelsList). +// 3. Pop voxels from the stack, examine their neighbors via the configured +// neighbor scheme (Face or FaceEdgeVertex), and call the subclass's +// determineGrouping() to decide whether a neighbor belongs to the same +// feature. If so, the neighbor is assigned the feature ID and pushed +// onto the stack for further expansion. +// 4. When the stack empties, the current feature is complete. Find the next +// seed and repeat until no seeds remain. +// +// Features are numbered in seed-discovery order (the first unassigned voxel +// encountered becomes feature 1, the next becomes feature 2, etc.). +// +// Performance note: +// This algorithm uses random-access memory patterns — the stack can pop to +// any voxel in the volume, causing non-sequential reads. This is efficient +// for in-core DataStore (O(1) random access) but extremely slow for OOC +// ZarrStore, where random access triggers chunk loads/evictions ("chunk +// thrashing"). Use executeCCL() for out-of-core datasets. +// ============================================================================= Result<> SegmentFeatures::execute(IGridGeometry* gridGeom) { ThrottledMessenger throttledMessenger = m_MessageHelper.createThrottledMessenger(); @@ -145,13 +221,20 @@ Result<> SegmentFeatures::execute(IGridGeometry* gridGeom) int64 dims[3] = {static_cast(udims[0]), static_cast(udims[1]), static_cast(udims[2])}; - // Initialize a sequence of execution modifiers + // gnum tracks the current feature ID being assigned, starting at 1. + // nextSeed is an optimization: it tracks the lowest voxel index that might + // still be unassigned, so getSeed() can skip over already-segmented voxels + // instead of rescanning from index 0 every time. int32 gnum = 1; int64 nextSeed = 0; - int64 seed = 0; // Always use the very first value of the array that we are using to segment + int64 seed = getSeed(gnum, nextSeed); + nextSeed = seed + 1; usize size = 0; - // Initialize containers + // voxelsList serves as the DFS stack (LIFO). It is pre-allocated to avoid + // frequent reallocations. 'size' is the logical stack pointer — elements + // are pushed by writing to voxelsList[size] and incrementing, and popped + // by decrementing size and reading voxelsList[size]. constexpr usize initialVoxelsListSize = 100000; std::vector voxelsList(initialVoxelsListSize, -1); @@ -163,34 +246,46 @@ Result<> SegmentFeatures::execute(IGridGeometry* gridGeom) return {}; } + // Start a new feature: push the seed onto the stack size = 0; voxelsList[size] = seed; size++; + // DFS expansion loop: pop a voxel, check its neighbors, push matches while(size > 0) { + // Pop the top of the stack (LIFO order) const int64 currentPoint = voxelsList[size - 1]; size -= 1; std::vector neighPoints; switch(m_NeighborScheme) { case NeighborScheme::Face: - neighPoints = getFaceNeighbors(currentPoint, dims[0], dims[1], dims[2]); + neighPoints = getFaceNeighbors(currentPoint, dims[0], dims[1], dims[2], m_IsPeriodic); break; case NeighborScheme::FaceEdgeVertex: - neighPoints = getAllNeighbors(currentPoint, dims[0], dims[1], dims[2]); + neighPoints = getAllNeighbors(currentPoint, dims[0], dims[1], dims[2], m_IsPeriodic); break; } for(const auto& neighbor : neighPoints) { + // determineGrouping() is implemented by the subclass. It checks whether + // the neighbor is unassigned & similar to the reference voxel, and if + // so, assigns it the current feature ID (gnum) and returns true. if(determineGrouping(currentPoint, neighbor, gnum)) { + // Push the newly-claimed neighbor onto the stack for further expansion voxelsList[size] = neighbor; size++; + // nextSeed optimization: if this neighbor was the next candidate seed, + // advance nextSeed so getSeed() won't return an already-assigned voxel. if(neighbor == nextSeed) { nextSeed = neighbor + 1; } + // If the stack has grown beyond the allocated capacity, extend it. + // The stack is stored in a flat vector, so we grow by a fixed block + // and initialize the new entries to -1. if(size >= voxelsList.size()) { size = voxelsList.size(); @@ -208,7 +303,8 @@ Result<> SegmentFeatures::execute(IGridGeometry* gridGeom) // Send a progress message float percentComplete = static_cast(totalVoxelsSegmented) / static_cast(totalVoxels) * 100.0f; throttledMessenger.sendThrottledMessage([&]() { return fmt::format("{:.2f}% - Current Feature Count: {}", percentComplete, gnum); }); - // Increment or set values for the next iteration + // Reset the stack for the next feature. assign() shrinks/grows the vector + // back to the finished feature size + 1 and fills with -1. voxelsList.assign(size + 1, -1); gnum++; // Get the next seed value @@ -216,21 +312,638 @@ Result<> SegmentFeatures::execute(IGridGeometry* gridGeom) nextSeed = seed + 1; } - m_FoundFeatures = gnum - 1; // Decrement the gnum because it will end up 1 larger than it should have been. + m_FoundFeatures = gnum - 1; // Decrement because gnum ends up 1 larger than the last assigned feature. + m_MessageHelper.sendMessage(fmt::format("Total Features Found: {}", m_FoundFeatures)); + return {}; +} + +// ============================================================================= +// Chunk-Sequential Connected Component Labeling (CCL) Algorithm +// ============================================================================= +// +// This method replaces the DFS flood-fill (execute()) with a scanline-based +// connected-component labeling algorithm optimized for out-of-core (OOC) +// data stores (e.g. ZarrStore). Unlike DFS, which accesses voxels in +// unpredictable stack-driven order, CCL processes voxels in strict Z-Y-X +// scanline order, resulting in sequential chunk access patterns that avoid +// chunk thrashing. +// +// The algorithm has three phases: +// +// Phase 1 (Forward CCL): +// Scan voxels in Z-Y-X order. For each valid voxel, examine only its +// "backward" neighbors — those already visited earlier in scanline order. +// If a backward neighbor has a label and is similar (per areNeighborsSimilar), +// adopt that label. If multiple distinct labels are found among backward +// neighbors, unite them in a Union-Find structure. If no backward neighbor +// matches, assign a fresh provisional label. Labels are written to both an +// in-memory rolling buffer (for fast neighbor lookups) and to the OOC +// featureIds store (for persistence). +// +// Phase 1b (Periodic boundary merge): +// If periodic boundaries are enabled, Phase 1 cannot detect connections +// that wrap around the volume (the wrapped neighbor has a higher linear +// index and hasn't been visited yet). This phase reads back provisional +// labels and unites similar voxels on opposite boundary faces. +// +// Phase 2 (Resolution + Relabeling): +// Flatten the Union-Find tree, then scan the featureIds store chunk by +// chunk. For each provisional label, look up its Union-Find root and +// map it to a contiguous final feature ID. Write the final ID back in +// the same pass. This combined discover-and-write approach halves the +// number of OOC accesses compared to separate resolution and write +// passes, and chunk-sequential iteration ensures optimal I/O. +// ============================================================================= +Result<> SegmentFeatures::executeCCL(IGridGeometry* gridGeom, AbstractDataStore& featureIdsStore) +{ + ThrottledMessenger throttledMessenger = m_MessageHelper.createThrottledMessenger(); + + const SizeVec3 udims = gridGeom->getDimensions(); + // getDimensions() returns [X, Y, Z] + const int64 dimX = static_cast(udims[0]); + const int64 dimY = static_cast(udims[1]); + const int64 dimZ = static_cast(udims[2]); + const usize totalVoxels = static_cast(dimX) * static_cast(dimY) * static_cast(dimZ); + + const int64 sliceStride = dimX * dimY; + + const bool useFaceOnly = (m_NeighborScheme == NeighborScheme::Face); + + UnionFind unionFind; + int32 nextLabel = 1; // Provisional labels start at 1 + + // Rolling 2-slice buffer for backward neighbor label lookups. + // + // Why 2 slices is sufficient: + // In Z-Y-X scanline order, a voxel at (ix, iy, iz) has backward neighbors + // only in the current Z-slice (iz) or the immediately previous Z-slice + // (iz-1). No backward neighbor can ever be in Z-slice (iz-2) or earlier, + // because all 13 backward neighbor offsets have dz in {-1, 0}. Therefore, + // keeping just 2 slices in memory — the current and the previous — is + // enough for all backward neighbor label reads. + // + // This design uses O(dimX * dimY) memory instead of O(dimX * dimY * dimZ), + // enabling processing of datasets much larger than available RAM. + // + // Buffer layout: Z-slice (iz % 2) occupies indices + // [sliceOffset .. sliceOffset + sliceStride), where + // sliceOffset = (iz % 2) * sliceSize. + const usize sliceSize = static_cast(sliceStride); + std::vector labelBuffer(2 * sliceSize, 0); + + // ========================================================================= + // Phase 1: Forward CCL - assign provisional labels using backward neighbors + // ========================================================================= + m_MessageHelper.sendMessage("Forward CCL pass..."); + + for(int64 iz = 0; iz < dimZ; iz++) + { + if(m_ShouldCancel) + { + return {}; + } + + // Let the subclass pre-load input arrays (e.g. GoodVoxels, CellPhases, + // Quats) for this Z-slice into local std::vector buffers. This eliminates + // per-element OOC overhead during areNeighborsSimilar() calls — instead + // of each comparison triggering a chunk load from ZarrStore, the subclass + // reads from fast contiguous vectors that were bulk-loaded once per slice. + prepareForSlice(iz, dimX, dimY, dimZ); + + // Clear the current slice's portion of the rolling buffer + const usize currentSliceOffset = static_cast(iz % 2) * sliceSize; + std::fill(labelBuffer.begin() + currentSliceOffset, labelBuffer.begin() + currentSliceOffset + sliceSize, 0); + + for(int64 iy = 0; iy < dimY; iy++) + { + for(int64 ix = 0; ix < dimX; ix++) + { + const int64 index = iz * sliceStride + iy * dimX + ix; + const usize bufIdx = currentSliceOffset + static_cast(iy * dimX + ix); + + // Skip voxels that are not valid + if(!isValidVoxel(index)) + { + continue; + } + + // Check backward neighbors for existing labels. + // "Backward" neighbors are those with a smaller linear index — i.e., + // already processed earlier in Z-Y-X scanline order. In 3D, these are + // neighbors with dz < 0, or dz == 0 && dy < 0, or dz == 0 && dy == 0 + // && dx < 0. Forward neighbors (higher linear index) are not yet + // labeled and cannot be consulted. + // + // Neighbor labels are read from the rolling buffer (direct memory + // access, O(1)) rather than from the OOC featureIds store, avoiding + // chunk loads for every neighbor lookup. + int32 assignedLabel = 0; + const usize prevSliceOffset = static_cast((iz + 1) % 2) * sliceSize; + + if(useFaceOnly) + { + // Face connectivity: exactly 3 backward neighbors exist: + // -X (dx=-1): one column to the left in the same row/slice + // -Y (dy=-1): one row earlier in the same slice + // -Z (dz=-1): same (x,y) position in the previous slice + // The 3 forward neighbors (+X, +Y, +Z) have not been labeled yet + // and are skipped. + + // Check -X neighbor (same Z-slice, same buffer region) + if(ix > 0) + { + const int64 neighIdx = index - 1; + int32 neighLabel = labelBuffer[bufIdx - 1]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + // Check -Y neighbor (same Z-slice, same buffer region) + if(iy > 0) + { + const int64 neighIdx = index - dimX; + int32 neighLabel = labelBuffer[currentSliceOffset + static_cast((iy - 1) * dimX + ix)]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + // Check -Z neighbor (previous Z-slice, other buffer region) + if(iz > 0) + { + const int64 neighIdx = index - sliceStride; + int32 neighLabel = labelBuffer[prevSliceOffset + static_cast(iy * dimX + ix)]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + } + else + { + // FaceEdgeVertex connectivity: 13 backward neighbors out of 26 total. + // + // A 3x3x3 neighborhood has 26 neighbors (excluding self). Exactly + // half (13) have a smaller linear index in Z-Y-X order and are thus + // "backward." These are enumerated by iterating: + // dz in {-1, 0}: + // dz=-1: all 9 neighbors in the previous Z-slice (any dx, dy) + // dz= 0: only neighbors with dy < 0 (3 neighbors), or + // dy == 0 && dx == -1 (1 neighbor) => 4 total + // Total: 9 + 4 = 13 backward neighbors + // + // The loop bounds below encode this enumeration efficiently: + // - dz ranges [-1, 0] + // - dy ranges [-1, +1] when dz<0, or [-1, 0] when dz==0 + // - dx ranges [-1, +1] when dz<0 or dy<0, or [-1, -1] when dz==0 && dy==0 + for(int64 dz = -1; dz <= 0; ++dz) + { + const int64 nz = iz + dz; + if(nz < 0 || nz >= dimZ) + { + continue; + } + + const usize neighSliceOffset = (dz < 0) ? prevSliceOffset : currentSliceOffset; + + const int64 dyStart = -1; + const int64 dyEnd = (dz < 0) ? 1 : 0; + + for(int64 dy = dyStart; dy <= dyEnd; ++dy) + { + const int64 ny = iy + dy; + if(ny < 0 || ny >= dimY) + { + continue; + } + + int64 dxStart; + int64 dxEnd; + if(dz < 0) + { + dxStart = -1; + dxEnd = 1; + } + else if(dy < 0) + { + dxStart = -1; + dxEnd = 1; + } + else + { + dxStart = -1; + dxEnd = -1; + } + + for(int64 dx = dxStart; dx <= dxEnd; ++dx) + { + const int64 nx = ix + dx; + if(nx < 0 || nx >= dimX) + { + continue; + } + if(dx == 0 && dy == 0 && dz == 0) + { + continue; + } + + const int64 neighIdx = nz * sliceStride + ny * dimX + nx; + int32 neighLabel = labelBuffer[neighSliceOffset + static_cast(ny * dimX + nx)]; + if(neighLabel > 0 && areNeighborsSimilar(index, neighIdx)) + { + if(assignedLabel == 0) + { + assignedLabel = neighLabel; + } + else if(assignedLabel != neighLabel) + { + unionFind.unite(assignedLabel, neighLabel); + } + } + } + } + } + } + + // If no matching backward neighbor, assign new provisional label + if(assignedLabel == 0) + { + assignedLabel = nextLabel++; + unionFind.find(assignedLabel); // Initialize in union-find + } + + // Write label to both rolling buffer (for neighbor reads) and featureIds store + labelBuffer[bufIdx] = assignedLabel; + featureIdsStore[index] = assignedLabel; + } + } + + // Send progress per Z-slice + float percentComplete = static_cast(iz + 1) / static_cast(dimZ) * 100.0f; + throttledMessenger.sendThrottledMessage([percentComplete]() { return fmt::format("Forward CCL: {:.1f}% complete", percentComplete); }); + } + + featureIdsStore.flush(); + + if(m_ShouldCancel) + { + return {}; + } + + // Disable subclass input buffering by passing iz=-1 as a sentinel value. + // Phase 1b (periodic boundary merge) compares voxels on opposite faces of + // the volume, which may be in any Z-slice (e.g. iz=0 vs iz=dimZ-1). The + // subclass's 1-or-2-slice buffering strategy from Phase 1 cannot handle + // arbitrary cross-volume access, so we signal it to fall back to direct + // (unbuffered) reads from the underlying data store. + prepareForSlice(-1, dimX, dimY, dimZ); + + // ========================================================================= + // Phase 1b: Periodic boundary merge + // ========================================================================= + // The forward CCL pass cannot detect connections that wrap around periodic + // boundaries because the wrapped neighbor has a higher linear index and + // has not been processed yet when the boundary voxel is visited. This + // phase reads back provisional labels from featureIdsStore and unites + // labels of similar voxels on opposite boundary faces. + if(m_IsPeriodic) + { + m_MessageHelper.sendMessage("Merging periodic boundaries..."); + + if(useFaceOnly) + { + // Face connectivity: each axis is handled independently because face + // neighbors only connect along a single axis. For each axis, we + // iterate over the 2D face and compare each voxel at the low boundary + // (e.g. ix=0) with its counterpart at the high boundary (e.g. + // ix=dimX-1). These are the same voxel pairs that getFaceNeighbors() + // would return with isPeriodic=true, but which Phase 1 could not + // process because the wrapped neighbor had not yet been labeled. + + // X-axis: unite voxels at ix=0 with ix=dimX-1 + if(dimX > 1) + { + for(int64 iz = 0; iz < dimZ; iz++) + { + for(int64 iy = 0; iy < dimY; iy++) + { + const int64 idxA = iz * sliceStride + iy * dimX; + const int64 idxB = iz * sliceStride + iy * dimX + (dimX - 1); + const int32 labelA = featureIdsStore[idxA]; + const int32 labelB = featureIdsStore[idxB]; + if(labelA > 0 && labelB > 0 && areNeighborsSimilar(idxA, idxB)) + { + unionFind.unite(labelA, labelB); + } + } + } + } + + // Y-axis: unite voxels at iy=0 with iy=dimY-1 + if(dimY > 1) + { + for(int64 iz = 0; iz < dimZ; iz++) + { + for(int64 ix = 0; ix < dimX; ix++) + { + const int64 idxA = iz * sliceStride + ix; + const int64 idxB = iz * sliceStride + (dimY - 1) * dimX + ix; + const int32 labelA = featureIdsStore[idxA]; + const int32 labelB = featureIdsStore[idxB]; + if(labelA > 0 && labelB > 0 && areNeighborsSimilar(idxA, idxB)) + { + unionFind.unite(labelA, labelB); + } + } + } + } + + // Z-axis: unite voxels at iz=0 with iz=dimZ-1 + if(dimZ > 1) + { + for(int64 iy = 0; iy < dimY; iy++) + { + for(int64 ix = 0; ix < dimX; ix++) + { + const int64 idxA = iy * dimX + ix; + const int64 idxB = (dimZ - 1) * sliceStride + iy * dimX + ix; + const int32 labelA = featureIdsStore[idxA]; + const int32 labelB = featureIdsStore[idxB]; + if(labelA > 0 && labelB > 0 && areNeighborsSimilar(idxA, idxB)) + { + unionFind.unite(labelA, labelB); + } + } + } + } + } + else + { + // FaceEdgeVertex connectivity: check all 26-neighbor pairs that wrap + // across periodic boundaries. Unlike face-only mode, edge and vertex + // neighbors can wrap across two or even three axes simultaneously + // (e.g. a corner voxel's diagonal neighbor wraps in X, Y, and Z). + // This requires checking all 26 neighbor offsets for every boundary + // voxel, filtering to only those that actually wrap. + // + // The onBoundary check skips interior voxels (whose 26 neighbors are + // all within bounds and were already handled by Phase 1). + // + // The neighIdx > index deduplication ensures each pair of periodic + // neighbors is united exactly once. Since union-find is symmetric + // (unite(A,B) == unite(B,A)), processing only the pair where the + // neighbor has the larger linear index avoids redundant work. + for(int64 iz = 0; iz < dimZ; iz++) + { + for(int64 iy = 0; iy < dimY; iy++) + { + for(int64 ix = 0; ix < dimX; ix++) + { + // Only boundary voxels can have neighbors that wrap around + const bool onBoundary = (ix == 0 || ix == dimX - 1 || iy == 0 || iy == dimY - 1 || iz == 0 || iz == dimZ - 1); + if(!onBoundary) + { + continue; + } + + const int64 index = iz * sliceStride + iy * dimX + ix; + const int32 labelCurrent = featureIdsStore[index]; + if(labelCurrent <= 0) + { + continue; + } + + for(int64 dz = -1; dz <= 1; ++dz) + { + int64 nz = iz + dz; + bool wrappedZ = false; + if(nz < 0) + { + nz += dimZ; + wrappedZ = true; + } + else if(nz >= dimZ) + { + nz -= dimZ; + wrappedZ = true; + } + + for(int64 dy = -1; dy <= 1; ++dy) + { + int64 ny = iy + dy; + bool wrappedY = false; + if(ny < 0) + { + ny += dimY; + wrappedY = true; + } + else if(ny >= dimY) + { + ny -= dimY; + wrappedY = true; + } + + for(int64 dx = -1; dx <= 1; ++dx) + { + if(dx == 0 && dy == 0 && dz == 0) + { + continue; + } + + int64 nx = ix + dx; + bool wrappedX = false; + if(nx < 0) + { + nx += dimX; + wrappedX = true; + } + else if(nx >= dimX) + { + nx -= dimX; + wrappedX = true; + } + + // Only process pairs that actually wrap around at least one + // axis. Non-wrapped pairs were already handled in Phase 1. + if(!wrappedX && !wrappedY && !wrappedZ) + { + continue; + } + + const int64 neighIdx = nz * sliceStride + ny * dimX + nx; + // Deduplication: only process the pair where neighIdx > index. + // This ensures each (voxelA, voxelB) pair is united exactly + // once, since unite() is symmetric. + if(neighIdx <= index) + { + continue; + } + + const int32 labelNeigh = featureIdsStore[neighIdx]; + if(labelNeigh > 0 && areNeighborsSimilar(index, neighIdx)) + { + unionFind.unite(labelCurrent, labelNeigh); + } + } + } + } + } + } + } + } + } + + if(m_ShouldCancel) + { + return {}; + } + + // ========================================================================= + // Phase 2: Resolution + Relabeling (combined single pass) + // ========================================================================= + // + // After Phase 1/1b, every valid voxel has a provisional label and the + // Union-Find knows which provisional labels belong to the same connected + // component. This phase: + // 1. Flattens the Union-Find so every label points directly to its root + // (path compression eliminates intermediate nodes). + // 2. Scans voxels chunk-by-chunk in deterministic order. For each + // provisional label, performs a two-level lookup: + // a) label -> root: via unionFind.find(label) (O(1) after flatten) + // b) root -> finalId: via the labelToFinal[] map + // If the root has not yet been assigned a final ID, allocate the next + // sequential ID (finalFeatureCount++). Then cache the mapping for the + // original label as well (labelToFinal[label] = finalId) so subsequent + // voxels with the same provisional label skip the union-find lookup. + // 3. Writes the final ID back to featureIdsStore[index] in the same pass. + // + // Combining discovery and relabeling into a single pass halves the number + // of OOC chunk loads compared to doing them separately. The chunk-sequential + // iteration order ensures each chunk is loaded exactly once. + // + // Because the scan is in linear (Z-Y-X) order, final feature IDs are + // assigned in the order their first voxel appears in the volume, matching + // the seed-discovery order of the DFS algorithm. + // ========================================================================= + m_MessageHelper.sendMessage("Resolving labels and writing final feature IDs..."); + + unionFind.flatten(); + + // labelToFinal maps provisional label -> final contiguous feature ID. + // Indexed by provisional label (0..nextLabel-1). A value of 0 means + // "not yet assigned." This avoids a hash map and gives O(1) lookups. + std::vector labelToFinal(static_cast(nextLabel), 0); + int32 finalFeatureCount = 0; + + const uint64 numChunks = featureIdsStore.getNumberOfChunks(); + + for(uint64 chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) + { + if(m_ShouldCancel) + { + return {}; + } + + featureIdsStore.loadChunk(chunkIdx); + const auto chunkLowerBounds = featureIdsStore.getChunkLowerBounds(chunkIdx); + const auto chunkUpperBounds = featureIdsStore.getChunkUpperBounds(chunkIdx); + + for(usize z = chunkLowerBounds[0]; z <= chunkUpperBounds[0]; z++) + { + for(usize y = chunkLowerBounds[1]; y <= chunkUpperBounds[1]; y++) + { + for(usize x = chunkLowerBounds[2]; x <= chunkUpperBounds[2]; x++) + { + const usize index = z * static_cast(sliceStride) + y * static_cast(dimX) + x; + int32 label = featureIdsStore[index]; + if(label > 0) + { + // Two-level lookup: provisional label -> union-find root -> final ID + if(labelToFinal[label] == 0) + { + // Level 1: find this label's root in the (flattened) union-find + int32 root = static_cast(unionFind.find(label)); + // Level 2: if the root hasn't been assigned a final ID yet, + // allocate the next sequential feature ID + if(labelToFinal[root] == 0) + { + finalFeatureCount++; + labelToFinal[root] = finalFeatureCount; + } + // Cache the mapping for this provisional label so future voxels + // with the same label skip the union-find lookup entirely + labelToFinal[label] = labelToFinal[root]; + } + // Write the final contiguous feature ID back to the data store + featureIdsStore[index] = labelToFinal[label]; + } + } + } + } + + // Send progress + float percentComplete = static_cast(chunkIdx + 1) / static_cast(numChunks) * 100.0f; + throttledMessenger.sendThrottledMessage([percentComplete]() { return fmt::format("Relabeling: {:.1f}% chunks complete", percentComplete); }); + } + + featureIdsStore.flush(); + + m_FoundFeatures = finalFeatureCount; m_MessageHelper.sendMessage(fmt::format("Total Features Found: {}", m_FoundFeatures)); return {}; } +// ----------------------------------------------------------------------------- int64 SegmentFeatures::getSeed(int32 gnum, int64 nextSeed) const { return -1; } +// ----------------------------------------------------------------------------- bool SegmentFeatures::determineGrouping(int64 referencePoint, int64 neighborPoint, int32 gnum) const { return false; } +// ----------------------------------------------------------------------------- +void SegmentFeatures::prepareForSlice(int64 /*iz*/, int64 /*dimX*/, int64 /*dimY*/, int64 /*dimZ*/) +{ +} + +// ----------------------------------------------------------------------------- +bool SegmentFeatures::isValidVoxel(int64 point) const +{ + return true; +} + +// ----------------------------------------------------------------------------- +bool SegmentFeatures::areNeighborsSimilar(int64 point1, int64 point2) const +{ + return false; +} + // ----------------------------------------------------------------------------- SegmentFeatures::SeedGenerator SegmentFeatures::initializeStaticVoxelSeedGenerator() const { diff --git a/src/simplnx/Utilities/SegmentFeatures.hpp b/src/simplnx/Utilities/SegmentFeatures.hpp index 1c015485e2..aa1a65e8b3 100644 --- a/src/simplnx/Utilities/SegmentFeatures.hpp +++ b/src/simplnx/Utilities/SegmentFeatures.hpp @@ -17,6 +17,8 @@ namespace nx::core { class IGridGeometry; +template +class AbstractDataStore; namespace segment_features { @@ -51,16 +53,25 @@ class SIMPLNX_EXPORT SegmentFeatures }; /** - * @brief execute + * @brief Original DFS-based segmentation (in-core optimized). * @param gridGeom * @return */ Result<> execute(IGridGeometry* gridGeom); + /** + * @brief Chunk-sequential CCL-based segmentation optimized for out-of-core. + * + * Subclasses must override isValidVoxel() and areNeighborsSimilar() to use this code path. + * + * @param gridGeom The grid geometry providing dimensions and neighbor offsets. + * @param featureIdsStore The data store to write assigned feature IDs into. + * @return Result indicating success or an error with a descriptive message. + */ + Result<> executeCCL(IGridGeometry* gridGeom, AbstractDataStore& featureIdsStore); + /** * @brief Returns the seed for the specified values. - * @param data - * @param args * @param gnum * @param nextSeed * @return int64 @@ -69,8 +80,6 @@ class SIMPLNX_EXPORT SegmentFeatures /** * @brief Determines the grouping for the specified values. - * @param data - * @param args * @param referencePoint * @param neighborPoint * @param gnum @@ -82,7 +91,6 @@ class SIMPLNX_EXPORT SegmentFeatures * @brief * @param featureIds * @param totalFeatures - * @param distribution */ void randomizeFeatureIds(Int32Array* featureIds, uint64 totalFeatures); @@ -106,8 +114,53 @@ class SIMPLNX_EXPORT SegmentFeatures { return false; } + + /** + * @brief Pure data comparison without featureId assignment. + * Used by the CCL algorithm which handles label assignment separately. + * @param index First voxel index + * @param neighIndex Second voxel index + * @return true if the two voxels should be in the same feature + */ + virtual bool compare(int64 index, int64 neighIndex) + { + return false; + } }; + /** + * @brief Can this voxel be a feature member? (mask + phase check, NO featureId check) + * Default returns true (all voxels are valid). + * @param point Linear voxel index + * @return true if this voxel can participate in segmentation + */ + virtual bool isValidVoxel(int64 point) const; + + /** + * @brief Should these two adjacent voxels be in the same feature? (data comparison only) + * Default returns false (no voxels are similar). + * @param point1 First voxel index + * @param point2 Second voxel index + * @return true if the two voxels should be grouped together + */ + virtual bool areNeighborsSimilar(int64 point1, int64 point2) const; + + /** + * @brief Called by executeCCL at the start of each Z-slice to allow subclasses + * to pre-load input data into local buffers, eliminating per-element OOC overhead + * during neighbor comparisons. + * + * Called with iz = -1 before Phase 1b (periodic boundary merge) to signal that + * buffering should be disabled, since Phase 1b may access arbitrary Z-slices. + * + * Default implementation does nothing. + * @param iz Current Z-slice index, or -1 to disable buffering. + * @param dimX X dimension of the grid. + * @param dimY Y dimension of the grid. + * @param dimZ Z dimension of the grid. + */ + virtual void prepareForSlice(int64 iz, int64 dimX, int64 dimY, int64 dimZ); + protected: DataStructure& m_DataStructure; bool m_IsPeriodic = false; diff --git a/src/simplnx/Utilities/UnionFind.hpp b/src/simplnx/Utilities/UnionFind.hpp new file mode 100644 index 0000000000..6fe6247748 --- /dev/null +++ b/src/simplnx/Utilities/UnionFind.hpp @@ -0,0 +1,188 @@ +#pragma once + +#include "simplnx/simplnx_export.hpp" + +#include "simplnx/Common/Types.hpp" + +#include +#include + +namespace nx::core +{ + +/** + * @class UnionFind + * @brief Vector-based Union-Find (Disjoint Set) data structure for tracking + * connected component equivalences during chunk-sequential processing. + * + * Uses union-by-rank and path-halving compression for near-O(1) amortized + * find() and unite() operations. Internal storage uses contiguous vectors + * indexed by label for cache-friendly access (no hash map overhead). + * + * Key features: + * - Labels are contiguous integers starting from 1 (0 is unused/invalid) + * - Grows dynamically as new labels are encountered + * - Path halving in find() for near-O(1) amortized lookups + * - Union-by-rank for balanced merges + * - Accumulates sizes at each label during construction + * - Single-pass flatten() for full path compression and size accumulation + */ +class SIMPLNX_EXPORT UnionFind +{ +public: + UnionFind() + { + // Index 0 is unused (labels start at 1). Initialize with a small capacity. + constexpr usize k_InitialCapacity = 64; + m_Parent.resize(k_InitialCapacity); + m_Rank.resize(k_InitialCapacity, 0); + m_Size.resize(k_InitialCapacity, 0); + // Initialize all entries as self-parents + for(usize i = 0; i < k_InitialCapacity; i++) + { + m_Parent[i] = static_cast(i); + } + } + + ~UnionFind() = default; + + UnionFind(const UnionFind&) = delete; + UnionFind(UnionFind&&) noexcept = default; + UnionFind& operator=(const UnionFind&) = delete; + UnionFind& operator=(UnionFind&&) noexcept = default; + + /** + * @brief Find the root label with path-halving compression. + * Each node on the path is redirected to its grandparent, giving + * near-O(1) amortized performance. + * @param x Label to find + * @return Root label + */ + int64 find(int64 x) + { + ensureCapacity(x); + + // Path halving: point each node to its grandparent while walking + while(m_Parent[x] != x) + { + m_Parent[x] = m_Parent[m_Parent[x]]; + x = m_Parent[x]; + } + return x; + } + + /** + * @brief Unite two labels into the same equivalence class using union-by-rank. + * @param a First label + * @param b Second label + */ + void unite(int64 a, int64 b) + { + int64 rootA = find(a); + int64 rootB = find(b); + + if(rootA == rootB) + { + return; + } + + if(m_Rank[rootA] < m_Rank[rootB]) + { + m_Parent[rootA] = rootB; + } + else if(m_Rank[rootA] > m_Rank[rootB]) + { + m_Parent[rootB] = rootA; + } + else + { + m_Parent[rootB] = rootA; + m_Rank[rootA]++; + } + } + + /** + * @brief Add to the size count for a label. + * Sizes are accumulated at each label, not the root. They are + * accumulated to roots during flatten(). + * @param label Label to update + * @param count Number of voxels to add + */ + void addSize(int64 label, uint64 count) + { + ensureCapacity(label); + m_Size[label] += count; + } + + /** + * @brief Get the total size of a label's equivalence class. + * Should only be called after flatten() for accurate totals. + * @param label Label to query + * @return Total number of voxels in the equivalence class + */ + uint64 getSize(int64 label) + { + int64 root = find(label); + return m_Size[root]; + } + + /** + * @brief Flatten the union-find structure with full path compression + * and accumulate all sizes to root labels. + * + * After flatten(): + * - Every label points directly to its root + * - All sizes are accumulated at root labels + * - Subsequent find() calls are O(1) (single lookup) + */ + void flatten() + { + const usize count = m_Parent.size(); + + // Full path compression: point every label directly to its root + for(usize i = 1; i < count; i++) + { + m_Parent[i] = find(static_cast(i)); + } + + // Accumulate sizes to roots + std::vector rootSizes(count, 0); + for(usize i = 1; i < count; i++) + { + rootSizes[m_Parent[i]] += m_Size[i]; + } + m_Size = std::move(rootSizes); + } + +private: + /** + * @brief Ensure the internal vectors can hold index x. + * Grows by doubling to amortize allocation cost. + */ + void ensureCapacity(int64 x) + { + auto idx = static_cast(x); + if(idx < m_Parent.size()) + { + return; + } + + usize newSize = std::max(idx + 1, m_Parent.size() * 2); + usize oldSize = m_Parent.size(); + m_Parent.resize(newSize); + m_Rank.resize(newSize, 0); + m_Size.resize(newSize, 0); + + // Initialize new entries as self-parents + for(usize i = oldSize; i < newSize; i++) + { + m_Parent[i] = static_cast(i); + } + } + + std::vector m_Parent; + std::vector m_Rank; + std::vector m_Size; +}; + +} // namespace nx::core diff --git a/test/UnitTestCommon/include/simplnx/UnitTest/SegmentFeaturesTestUtils.hpp b/test/UnitTestCommon/include/simplnx/UnitTest/SegmentFeaturesTestUtils.hpp new file mode 100644 index 0000000000..85405e3b4e --- /dev/null +++ b/test/UnitTestCommon/include/simplnx/UnitTest/SegmentFeaturesTestUtils.hpp @@ -0,0 +1,621 @@ +#pragma once + +#include "simplnx/DataStructure/AttributeMatrix.hpp" +#include "simplnx/DataStructure/DataArray.hpp" +#include "simplnx/DataStructure/DataStructure.hpp" +#include "simplnx/DataStructure/Geometry/ImageGeom.hpp" +#include "simplnx/Filter/Arguments.hpp" +#include "simplnx/Filter/IFilter.hpp" +#include "simplnx/UnitTest/UnitTestCommon.hpp" +#include "simplnx/Utilities/DataStoreUtilities.hpp" + +#include + +#include +#include +#include +#include +#include + +namespace nx::core::UnitTest +{ + +/** + * @brief Creates an ImageGeom with a CellData AttributeMatrix. + * @param ds DataStructure to create objects in. + * @param dims Voxel dimensions {X, Y, Z}. + * @param geomName Name for the ImageGeom. + * @param cellDataName Name for the CellData AttributeMatrix. + * @return Pointer to the created AttributeMatrix. + */ +inline AttributeMatrix* BuildSegmentFeaturesTestGeometry(DataStructure& ds, const std::array& dims, const std::string& geomName, const std::string& cellDataName) +{ + auto* geom = ImageGeom::Create(ds, geomName); + geom->setDimensions({dims[0], dims[1], dims[2]}); + geom->setSpacing({1.0f, 1.0f, 1.0f}); + geom->setOrigin({0.0f, 0.0f, 0.0f}); + + const ShapeType cellShape = {dims[2], dims[1], dims[0]}; + auto* am = AttributeMatrix::Create(ds, cellDataName, cellShape, geom->getId()); + geom->setCellData(*am); + return am; +} + +/** + * @brief Creates block-patterned int32 scalar data for ScalarSegmentFeatures testing. + * @param ds DataStructure to create the array in. + * @param cellShape Tuple shape {Z, Y, X}. + * @param amId Parent AttributeMatrix ID. + * @param blockSize Voxel count per block edge. + * @param arrayName Name for the scalar array. + */ +inline void BuildScalarTestData(DataStructure& ds, const ShapeType& cellShape, DataObject::IdType amId, usize blockSize, const std::string& arrayName = "ScalarData", bool wrapBoundary = false) +{ + const usize dimZ = cellShape[0]; + const usize dimY = cellShape[1]; + const usize dimX = cellShape[2]; + + auto scalarDataStore = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* scalarArray = DataArray::Create(ds, arrayName, scalarDataStore, amId); + auto& store = scalarArray->getDataStoreRef(); + + const usize blocksPerX = (dimX + blockSize - 1) / blockSize; + const usize blocksPerY = (dimY + blockSize - 1) / blockSize; + const usize blocksPerZ = (dimZ + blockSize - 1) / blockSize; + + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize idx = z * dimX * dimY + y * dimX + x; + const usize bx = x / blockSize; + const usize by = y / blockSize; + const usize bz = z / blockSize; + + if(wrapBoundary) + { + // Last block in each axis maps to the same value as the first block, + // so periodic wrapping merges them into one feature. + const usize wbx = (bx == blocksPerX - 1) ? 0 : bx; + const usize wby = (by == blocksPerY - 1) ? 0 : by; + const usize wbz = (bz == blocksPerZ - 1) ? 0 : bz; + const usize wbpx = blocksPerX - 1; + const usize wbpy = blocksPerY - 1; + store[idx] = static_cast(wbz * wbpy * wbpx + wby * wbpx + wbx); + } + else + { + store[idx] = static_cast(bz * blocksPerY * blocksPerX + by * blocksPerX + bx); + } + } + } + } +} + +/** + * @brief Creates quaternion, phase, and crystal structure arrays for EBSD/CAxis testing. + * + * Quaternions are block-patterned with distinct orientations per block. + * All voxels are assigned phase 1. CrystalStructures has phase 0 = 999 (Unknown) + * and phase 1 = the provided crystal structure value. + * + * @param ds DataStructure to create arrays in. + * @param cellShape Tuple shape {Z, Y, X}. + * @param geomId Parent geometry ID (for ensemble AM). + * @param amId Parent CellData AttributeMatrix ID. + * @param crystalStructure Crystal structure for phase 1 (1 = Cubic_High, 0 = Hexagonal_High). + * @param blockSize Voxel count per block edge. + */ +inline void BuildOrientationTestData(DataStructure& ds, const ShapeType& cellShape, DataObject::IdType geomId, DataObject::IdType amId, uint32 crystalStructure, usize blockSize, + bool wrapBoundary = false) +{ + const usize dimZ = cellShape[0]; + const usize dimY = cellShape[1]; + const usize dimX = cellShape[2]; + + auto quatsDataStore = DataStoreUtilities::CreateDataStore(cellShape, {4}, IDataAction::Mode::Execute); + auto* quatsArray = DataArray::Create(ds, "Quats", quatsDataStore, amId); + auto& quatsStore = quatsArray->getDataStoreRef(); + + auto phasesDataStore = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* phasesArray = DataArray::Create(ds, "Phases", phasesDataStore, amId); + auto& phasesStore = phasesArray->getDataStoreRef(); + + constexpr float32 k_DegToRad = 3.14159265358979323846f / 180.0f; + + const usize blocksPerX = (dimX + blockSize - 1) / blockSize; + const usize blocksPerY = (dimY + blockSize - 1) / blockSize; + const usize blocksPerZ = (dimZ + blockSize - 1) / blockSize; + const usize numBlocks = blocksPerX * blocksPerY * blocksPerZ; + + // Quaternion Hamilton product: result = a * b, where q = (w, x, y, z) + auto quatMul = [](const std::array& a, const std::array& b) -> std::array { + return {a[0] * b[0] - a[1] * b[1] - a[2] * b[2] - a[3] * b[3], a[0] * b[1] + a[1] * b[0] + a[2] * b[3] - a[3] * b[2], a[0] * b[2] - a[1] * b[3] + a[2] * b[0] + a[3] * b[1], + a[0] * b[3] + a[1] * b[2] - a[2] * b[1] + a[3] * b[0]}; + }; + + std::vector> blockQuats(numBlocks); + + // Z-layer orientation scheme (shared by EBSD and CAxis): + // All blocks in the same Z-layer share a single X-axis rotation angle. + // This produces 3 horizontal layers of identical orientations: + // z=0: 0° rotation → q = [1, 0, 0, 0] c-axis = [0, 0, 1] + // z=1: 30° rotation → q = [0.966, 0.259, 0, 0] c-axis = [0, 0.5, 0.866] + // z=2: 60° rotation → q = [0.866, 0.5, 0, 0] c-axis = [0, 0.866, 0.5] + // + // Adjacent layers differ by 30°, well above the 5° tolerance → no merge. + // Within each layer, all blocks share the same angle → they merge. + // + // Merge pair override (non-periodic only): + // Block (1,1,1) at center of z=1 is set to 0° instead of 30°. + // It merges with its z=0 neighbor (1,1,0) while staying separate + // from the other z=1 blocks (30° difference → no merge). + // + // Expected features (3x3x3 grid): + // Base (3 blocks/axis): 3 features (z=0 + center pillar, z=1 minus pillar, z=2) + // Base (8 blocks/axis): 3 features (repeating 0°/30°/60° stripes) + // Periodic: layers sharing the same angle merge across the boundary + constexpr float32 k_LayerAngles[] = {0.0f, 30.0f, 60.0f}; + + for(usize bz = 0; bz < blocksPerZ; bz++) + { + const usize layerIdx = bz % 3; + const float32 halfAngle = k_LayerAngles[layerIdx] * k_DegToRad * 0.5f; + // EBSDlib quaternion layout: (x, y, z, w) — Vector-Scalar order + const std::array layerQuat = {std::sin(halfAngle), 0.0f, 0.0f, std::cos(halfAngle)}; + + for(usize by = 0; by < blocksPerY; by++) + { + for(usize bx = 0; bx < blocksPerX; bx++) + { + const usize blockIdx = bz * blocksPerY * blocksPerX + by * blocksPerX + bx; + blockQuats[blockIdx] = layerQuat; + } + } + } + + // Merge pair: block (1,1,1) gets z=0 angle (0°) instead of z=1 angle (30°). + // It merges downward into the z=0 layer through face neighbor (1,1,0). + if(!wrapBoundary && blocksPerX >= 3 && blocksPerY >= 3 && blocksPerZ >= 3) + { + const usize idx_111 = 1 * blocksPerY * blocksPerX + 1 * blocksPerX + 1; + blockQuats[idx_111] = blockQuats[0]; // Set to 0° (z=0 layer angle) + } + + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize idx = z * dimX * dimY + y * dimX + x; + phasesStore[idx] = 1; + + usize bx = x / blockSize; + usize by = y / blockSize; + usize bz = z / blockSize; + if(wrapBoundary) + { + bx = (bx == blocksPerX - 1) ? 0 : bx; + by = (by == blocksPerY - 1) ? 0 : by; + bz = (bz == blocksPerZ - 1) ? 0 : bz; + } + const usize blockIdx = bz * blocksPerY * blocksPerX + by * blocksPerX + bx; + const auto& q = blockQuats[blockIdx]; + quatsStore[idx * 4 + 0] = q[0]; + quatsStore[idx * 4 + 1] = q[1]; + quatsStore[idx * 4 + 2] = q[2]; + quatsStore[idx * 4 + 3] = q[3]; + } + } + } + + // Create CellEnsembleData with CrystalStructures + const ShapeType ensembleTupleShape = {2}; + auto* ensembleAM = AttributeMatrix::Create(ds, "CellEnsembleData", ensembleTupleShape, geomId); + auto crystalDataStore = DataStoreUtilities::CreateDataStore(ensembleTupleShape, {1}, IDataAction::Mode::Execute); + auto* crystalStructsArray = DataArray::Create(ds, "CrystalStructures", crystalDataStore, ensembleAM->getId()); + auto& crystalStructsStore = crystalStructsArray->getDataStoreRef(); + crystalStructsStore[0] = 999; // Phase 0: Unknown + crystalStructsStore[1] = crystalStructure; +} + +/** + * @brief Creates a spherical mask array where voxels inside the sphere are 1 (good) + * and voxels outside are 0 (masked out). + * + * The sphere is centered in the volume with radius = 80% of half the smallest dimension. + * For a 200x200x200 volume, that gives a radius of 80 voxels. + * + * @param ds DataStructure to create the array in. + * @param cellShape Tuple shape {Z, Y, X}. + * @param amId Parent AttributeMatrix ID. + * @param maskName Name for the mask array. + */ +inline void BuildSphericalMask(DataStructure& ds, const ShapeType& cellShape, DataObject::IdType amId, const std::string& maskName = "Mask") +{ + const usize dimZ = cellShape[0]; + const usize dimY = cellShape[1]; + const usize dimX = cellShape[2]; + + auto maskDataStore = DataStoreUtilities::CreateDataStore(cellShape, {1}, IDataAction::Mode::Execute); + auto* maskArray = DataArray::Create(ds, maskName, maskDataStore, amId); + auto& maskStore = maskArray->getDataStoreRef(); + + const float cx = static_cast(dimX) / 2.0f; + const float cy = static_cast(dimY) / 2.0f; + const float cz = static_cast(dimZ) / 2.0f; + const float radius = std::min({cx, cy, cz}) * 0.8f; + + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize idx = z * dimX * dimY + y * dimX + x; + const float dx = static_cast(x) - cx; + const float dy = static_cast(y) - cy; + const float dz = static_cast(z) - cz; + maskStore[idx] = (dx * dx + dy * dy + dz * dz < radius * radius) ? 1 : 0; + } + } + } +} + +/** + * @brief Verifies segmentation results when a mask is applied. + * + * Checks that: + * 1. Masked voxels (mask=0) have FeatureId=0 + * 2. Unmasked voxels (mask=1) have FeatureId > 0 + * 3. At least one feature was created + * 4. Both masked and unmasked regions exist + * + * @param ds DataStructure containing the results. + * @param dims Voxel dimensions {X, Y, Z}. + * @param featureIdsPath Path to the generated FeatureIds array. + * @param activePath Path to the generated Active array. + * @param maskPath Path to the mask array. + */ +inline void VerifyMaskedSegmentation(const DataStructure& ds, const std::array& dims, const DataPath& featureIdsPath, const DataPath& activePath, const DataPath& maskPath) +{ + REQUIRE_NOTHROW(ds.getDataRefAs(featureIdsPath)); + const auto& featureIds = ds.getDataRefAs(featureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + + REQUIRE_NOTHROW(ds.getDataRefAs(maskPath)); + const auto& mask = ds.getDataRefAs(maskPath); + const auto& maskStore = mask.getDataStoreRef(); + + REQUIRE_NOTHROW(ds.getDataRefAs(activePath)); + const auto& actives = ds.getDataRefAs(activePath); + REQUIRE(actives.getNumberOfTuples() > 1); // At least one feature (index 0 + features) + + const usize totalVoxels = dims[0] * dims[1] * dims[2]; + usize maskedCount = 0; + usize unmaskedCount = 0; + + for(usize i = 0; i < totalVoxels; i++) + { + if(maskStore.getValue(i) == 0) + { + REQUIRE(featureStore.getValue(i) == 0); + maskedCount++; + } + else + { + REQUIRE(featureStore.getValue(i) > 0); + unmaskedCount++; + } + } + + REQUIRE(maskedCount > 0); + REQUIRE(unmaskedCount > 0); +} + +/** + * @brief Verifies that block-patterned segmentation produced the expected results. + * + * Checks that: + * 1. The feature count matches the expected number of blocks + * 2. All voxels within a block share the same FeatureId + * 3. Different blocks have different FeatureIds + * + * @param ds DataStructure containing the results. + * @param dims Voxel dimensions {X, Y, Z}. + * @param blockSize Voxel count per block edge. + * @param featureIdsPath Path to the generated FeatureIds array. + * @param activePath Path to the generated Active array. + */ +inline void VerifyBlockSegmentation(const DataStructure& ds, const std::array& dims, usize blockSize, const DataPath& featureIdsPath, const DataPath& activePath) +{ + const usize dimX = dims[0]; + const usize dimY = dims[1]; + const usize dimZ = dims[2]; + const usize blocksPerX = (dimX + blockSize - 1) / blockSize; + const usize blocksPerY = (dimY + blockSize - 1) / blockSize; + const usize blocksPerZ = (dimZ + blockSize - 1) / blockSize; + const usize expectedFeatures = blocksPerX * blocksPerY * blocksPerZ; + + // Check feature count (Active array includes Feature 0) + REQUIRE_NOTHROW(ds.getDataRefAs(activePath)); + const auto& actives = ds.getDataRefAs(activePath); + REQUIRE(actives.getNumberOfTuples() == expectedFeatures + 1); + + // Check FeatureIds consistency + REQUIRE_NOTHROW(ds.getDataRefAs(featureIdsPath)); + const auto& featureIds = ds.getDataRefAs(featureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + + // Map from block index to the FeatureId assigned to that block + std::unordered_map blockToFeature; + // Track all assigned FeatureIds to verify uniqueness + std::set usedFeatureIds; + + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize voxelIdx = z * dimX * dimY + y * dimX + x; + const usize blockIdx = (z / blockSize) * blocksPerY * blocksPerX + (y / blockSize) * blocksPerX + (x / blockSize); + const int32 featureId = featureStore.getValue(voxelIdx); + + REQUIRE(featureId > 0); // No voxel should be unassigned + + auto it = blockToFeature.find(blockIdx); + if(it == blockToFeature.end()) + { + blockToFeature[blockIdx] = featureId; + usedFeatureIds.insert(featureId); + } + else + { + REQUIRE(it->second == featureId); // All voxels in a block share the same FeatureId + } + } + } + } + + // Each block should have a unique FeatureId + REQUIRE(usedFeatureIds.size() == expectedFeatures); +} + +/** + * @brief Verifies segmentation results when periodic BCs are enabled and boundary + * blocks have matching data (wrapBoundary=true). + * + * With periodic wrapping, the last block in each axis merges with the first block. + * Expected feature count: (blocksPerX-1) * (blocksPerY-1) * (blocksPerZ-1). + * + * @param ds DataStructure containing the results. + * @param dims Voxel dimensions {X, Y, Z}. + * @param blockSize Voxel count per block edge. + * @param featureIdsPath Path to the generated FeatureIds array. + * @param activePath Path to the generated Active array. + */ +inline void VerifyPeriodicBlockSegmentation(const DataStructure& ds, const std::array& dims, usize blockSize, const DataPath& featureIdsPath, const DataPath& activePath) +{ + const usize dimX = dims[0]; + const usize dimY = dims[1]; + const usize dimZ = dims[2]; + const usize blocksPerX = (dimX + blockSize - 1) / blockSize; + const usize blocksPerY = (dimY + blockSize - 1) / blockSize; + const usize blocksPerZ = (dimZ + blockSize - 1) / blockSize; + const usize periodicBlocksX = blocksPerX - 1; + const usize periodicBlocksY = blocksPerY - 1; + const usize periodicBlocksZ = blocksPerZ - 1; + const usize expectedFeatures = periodicBlocksX * periodicBlocksY * periodicBlocksZ; + + // Check feature count (Active array includes Feature 0) + REQUIRE_NOTHROW(ds.getDataRefAs(activePath)); + const auto& actives = ds.getDataRefAs(activePath); + REQUIRE(actives.getNumberOfTuples() == expectedFeatures + 1); + + // Check FeatureIds consistency + REQUIRE_NOTHROW(ds.getDataRefAs(featureIdsPath)); + const auto& featureIds = ds.getDataRefAs(featureIdsPath); + const auto& featureStore = featureIds.getDataStoreRef(); + + // Map from periodic block index to the FeatureId assigned to that block + std::unordered_map blockToFeature; + std::set usedFeatureIds; + + for(usize z = 0; z < dimZ; z++) + { + for(usize y = 0; y < dimY; y++) + { + for(usize x = 0; x < dimX; x++) + { + const usize voxelIdx = z * dimX * dimY + y * dimX + x; + const usize bx = x / blockSize; + const usize by = y / blockSize; + const usize bz = z / blockSize; + + // Effective periodic block index: last block wraps to first + const usize pbx = bx % periodicBlocksX; + const usize pby = by % periodicBlocksY; + const usize pbz = bz % periodicBlocksZ; + const usize periodicBlockIdx = pbz * periodicBlocksY * periodicBlocksX + pby * periodicBlocksX + pbx; + + const int32 featureId = featureStore.getValue(voxelIdx); + REQUIRE(featureId > 0); // No voxel should be unassigned + + auto it = blockToFeature.find(periodicBlockIdx); + if(it == blockToFeature.end()) + { + blockToFeature[periodicBlockIdx] = featureId; + usedFeatureIds.insert(featureId); + } + else + { + REQUIRE(it->second == featureId); // All voxels in matching periodic blocks share the same FeatureId + } + } + } + } + + // Each periodic block group should have a unique FeatureId + REQUIRE(usedFeatureIds.size() == expectedFeatures); +} + +/** + * @brief Runs the "no valid voxels returns error -87000" test for any SegmentFeatures filter. + * + * Creates a 3x3x3 grid with all voxels masked out, runs the filter, and asserts + * that execution returns error -87000. + * + * @tparam FilterT The filter class (e.g., ScalarSegmentFeaturesFilter). + * @param setupArgs Lambda that receives (Arguments&, DataPath geomPath, DataPath cellDataPath, DataPath maskPath) + * and inserts filter-specific arguments. + */ +template +void RunNoValidVoxelsErrorTest(SetupArgsFn setupArgs) +{ + constexpr usize kDim = 3; + const std::array dims = {kDim, kDim, kDim}; + const ShapeType cellShape = {kDim, kDim, kDim}; + + DataStructure ds; + auto* am = BuildSegmentFeaturesTestGeometry(ds, dims, "Geom", "CellData"); + + auto* mask = CreateTestDataArray(ds, "Mask", cellShape, {1}, am->getId()); + mask->fill(0); + + const DataPath geomPath({"Geom"}); + const DataPath cellDataPath({"Geom", "CellData"}); + const DataPath maskPath({"Geom", "CellData", "Mask"}); + + FilterT filter; + Arguments args; + setupArgs(args, ds, geomPath, cellDataPath, maskPath); + + auto preflightResult = filter.preflight(ds, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + + auto executeResult = filter.execute(ds, args); + SIMPLNX_RESULT_REQUIRE_INVALID(executeResult.result); + + const auto& errors = executeResult.result.errors(); + REQUIRE(errors.size() == 1); + REQUIRE(errors[0].code == -87000); +} + +/** + * @brief Tests that FaceEdgeVertex (26-neighbor) connectivity correctly merges + * regions connected through shared vertices and edges, not just faces. + * + * Creates a 3x3x3 geometry with 4 isolated single-voxel regions: + * - Regions A,B (same data): voxels (0,0,0) and (1,1,1) — vertex-connected only + * - Regions C,D (same data): voxels (2,0,0) and (2,1,1) — edge-connected only + * + * With Face (6-neighbor): 5 features (1 background + 4 isolated regions) + * With FaceEdgeVertex (26-neighbor): 3 features (1 background + A&B merged + C&D merged) + * + * @tparam FilterT The filter class (e.g., ScalarSegmentFeaturesFilter). + * @param setupFaceArgs Lambda (Arguments&, DataStructure&, DataPath geomPath, DataPath cellDataPath) + * that inserts filter-specific arguments with neighborScheme=0 (Face). + * @param setupFevArgs Lambda with same signature but neighborScheme=1 (FaceEdgeVertex). + */ +template +void RunFaceEdgeVertexConnectivityTest(SetupFaceFn setupFaceArgs, SetupFevFn setupFevArgs) +{ + constexpr usize kDim = 3; + const std::array dims = {kDim, kDim, kDim}; + const ShapeType cellShape = {kDim, kDim, kDim}; + + const DataPath geomPath({"Geom"}); + const DataPath cellDataPath({"Geom", "CellData"}); + const DataPath featureIdsPath({"Geom", "CellData", "FeatureIds"}); + const DataPath activePath({"Geom", "CellFeatureData", "Active"}); + + // Face scheme: A, B, C, D are all isolated → 5 features + index 0 + { + DataStructure ds; + BuildSegmentFeaturesTestGeometry(ds, dims, "Geom", "CellData"); + FilterT filter; + Arguments args; + setupFaceArgs(args, ds, geomPath, cellDataPath); + auto preflightResult = filter.preflight(ds, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(ds, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + const auto& actives = ds.getDataRefAs(activePath); + REQUIRE(actives.getNumberOfTuples() == 6); + } + + // FaceEdgeVertex scheme: A+B merge (vertex), C+D merge (edge) → 3 features + index 0 + DataStructure ds; + BuildSegmentFeaturesTestGeometry(ds, dims, "Geom", "CellData"); + { + FilterT filter; + Arguments args; + setupFevArgs(args, ds, geomPath, cellDataPath); + auto preflightResult = filter.preflight(ds, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + auto executeResult = filter.execute(ds, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + const auto& actives = ds.getDataRefAs(activePath); + REQUIRE(actives.getNumberOfTuples() == 4); + } + + // Verify the vertex-connected pair shares a FeatureId + const auto& fids = ds.getDataRefAs(featureIdsPath); + const auto& fidsStore = fids.getDataStoreRef(); + REQUIRE(fidsStore.getValue(0 * 9 + 0 * 3 + 0) == fidsStore.getValue(1 * 9 + 1 * 3 + 1)); // A == B (vertex merge) + REQUIRE(fidsStore.getValue(0 * 9 + 0 * 3 + 2) == fidsStore.getValue(1 * 9 + 1 * 3 + 2)); // C == D (edge merge) + REQUIRE(fidsStore.getValue(0 * 9 + 0 * 3 + 0) != fidsStore.getValue(0 * 9 + 0 * 3 + 2)); // A != C (different values) +} + +/** + * @brief Runs a SegmentFeatures filter against exemplar data and verifies results. + * + * Executes the filter, optionally checks the feature count, compares computed + * FeatureIds against embedded exemplar arrays, and validates tuple dimension + * inheritance. Used by Scalar, EBSD, and CAxis neighbor scheme tests. + * + * @tparam FilterT The filter class (e.g., ScalarSegmentFeaturesFilter). + * @tparam SetupArgsFn Lambda (Arguments&) that inserts all filter-specific arguments. + * @param dataStructure DataStructure loaded from an exemplar .dream3d file. + * @param computedFeatureIdsPath Path where the filter writes its FeatureIds array. + * @param activesPath Path where the filter writes its Active array. + * @param exemplarFeatureIdsPath Path to the pre-computed exemplar FeatureIds. + * @param expectedFeatureCount Expected Active tuple count (0 to skip this check). + * @param setupArgs Lambda to populate filter Arguments. + * @param tupleCheckIgnoredPaths Paths to exclude from CheckArraysInheritTupleDims. + */ +template +void RunNeighborSchemeExemplarTest(DataStructure& dataStructure, const DataPath& computedFeatureIdsPath, const DataPath& activesPath, const DataPath& exemplarFeatureIdsPath, + usize expectedFeatureCount, SetupArgsFn setupArgs, const std::vector& tupleCheckIgnoredPaths = {}) +{ + FilterT filter; + Arguments args; + setupArgs(args); + + auto preflightResult = filter.preflight(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(preflightResult.outputActions); + + auto executeResult = filter.execute(dataStructure, args); + SIMPLNX_RESULT_REQUIRE_VALID(executeResult.result); + + if(expectedFeatureCount > 0) + { + REQUIRE_NOTHROW(dataStructure.getDataRefAs(activesPath)); + const auto& actives = dataStructure.getDataRefAs(activesPath); + REQUIRE(actives.getNumberOfTuples() == expectedFeatureCount); + } + + REQUIRE_NOTHROW(dataStructure.getDataRefAs(computedFeatureIdsPath)); + REQUIRE_NOTHROW(dataStructure.getDataRefAs(exemplarFeatureIdsPath)); + const auto& generatedArray = dataStructure.getDataRefAs(computedFeatureIdsPath); + const auto& exemplarArray = dataStructure.getDataRefAs(exemplarFeatureIdsPath); + CompareDataArrays(generatedArray, exemplarArray); + + CheckArraysInheritTupleDims(dataStructure, tupleCheckIgnoredPaths); +} + +} // namespace nx::core::UnitTest