diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h index 1a980373a5e..26b4ffadd93 100644 --- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h +++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h @@ -55,6 +55,10 @@ class BaseElementLinearFEMForceField : public sofa::component::solidmechanics::f using StrainDisplacement = typename trait::StrainDisplacement; using Real = typename trait::Real; +public: + using AssembledStiffnessMatrix = sofa::type::Mat< + trait::NumberOfDofsInElement, trait::NumberOfDofsInElement, Real>; + protected: BaseElementLinearFEMForceField(); @@ -70,6 +74,14 @@ class BaseElementLinearFEMForceField : public sofa::component::solidmechanics::f * List of precomputed element stiffness matrices */ sofa::Data > d_elementStiffness; + + /** + * Contiguous buffer of assembled stiffness matrices (one per element). + * Extracted from d_elementStiffness for cache-friendly access in the hot path. + * This avoids loading the full FactorizedElementStiffness struct (~15 KB per element) + * when only the assembled matrix (~4.6 KB) is needed. + */ + sofa::type::vector m_assembledStiffnessMatrices; }; #if !defined(ELASTICITY_COMPONENT_BASE_ELEMENT_LINEAR_FEM_FORCEFIELD_CPP) diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl index de380c5e4cc..e7827cfc55e 100644 --- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl +++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl @@ -100,6 +100,13 @@ void BaseElementLinearFEMForceField::precomputeElementSt const std::array, trait::NumberOfNodesInElement> nodesCoordinates = extractNodesVectorFromGlobalVector(element, restPositionAccessor.ref()); elementStiffness[elementId] = integrate(nodesCoordinates, elasticityTensor); }); + + // Extract assembled matrices into a contiguous buffer for cache-friendly access + m_assembledStiffnessMatrices.resize(elements.size()); + for (std::size_t i = 0; i < elements.size(); ++i) + { + m_assembledStiffnessMatrices[i] = elementStiffness[i].getAssembledMatrix(); + } } } diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl index a3a74a36a81..0298f2e4247 100644 --- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl +++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl @@ -86,7 +86,7 @@ void ElementCorotationalFEMForceField::computeElementsFo { const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); auto restPositionAccessor = this->mstate->readRestPositions(); - auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness); + const auto& assembledMatrices = this->m_assembledStiffnessMatrices; for (std::size_t elementId = range.start; elementId < range.end; ++elementId) { @@ -112,7 +112,7 @@ void ElementCorotationalFEMForceField::computeElementsFo transformedDisplacement = elementRotation.multTranspose(elementNodesCoordinates[j] - t) - (restElementNodesCoordinates[j] - t0); } - const auto& stiffnessMatrix = elementStiffness[elementId]; + const auto& stiffnessMatrix = assembledMatrices[elementId]; auto& elementForce = elementForces[elementId]; elementForce = stiffnessMatrix * displacement; @@ -134,7 +134,7 @@ void ElementCorotationalFEMForceField::computeElementsFo const sofa::VecDeriv_t& nodeDx) { const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); - auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness); + const auto& assembledMatrices = this->m_assembledStiffnessMatrices; for (std::size_t elementId = range.start; elementId < range.end; ++elementId) { @@ -150,7 +150,7 @@ void ElementCorotationalFEMForceField::computeElementsFo rotated_dx = elementRotation.multTranspose(nodeDx[element[n]]); } - const auto& stiffnessMatrix = elementStiffness[elementId]; + const auto& stiffnessMatrix = assembledMatrices[elementId]; auto& df = elementForcesDeriv[elementId]; df = stiffnessMatrix * element_dx; diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl index fc58db7abe4..191de07c12c 100644 --- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl +++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl @@ -49,12 +49,12 @@ void ElementLinearSmallStrainFEMForceField::computeEleme { const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); auto restPositionAccessor = this->mstate->readRestPositions(); - auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness); + const auto& assembledMatrices = this->m_assembledStiffnessMatrices; for (std::size_t elementId = range.start; elementId < range.end; ++elementId) { const auto& element = elements[elementId]; - const auto& stiffnessMatrix = elementStiffness[elementId]; + const auto& stiffnessMatrix = assembledMatrices[elementId]; typename trait::ElementDisplacement displacement{ sofa::type::NOINIT }; @@ -79,12 +79,12 @@ void ElementLinearSmallStrainFEMForceField::computeEleme const sofa::VecDeriv_t& nodeDx) { const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); - auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness); + const auto& assembledMatrices = this->m_assembledStiffnessMatrices; for (std::size_t elementId = range.start; elementId < range.end; ++elementId) { const auto& element = elements[elementId]; - const auto& stiffnessMatrix = elementStiffness[elementId]; + const auto& stiffnessMatrix = assembledMatrices[elementId]; const std::array, trait::NumberOfNodesInElement> elementNodesDx = extractNodesVectorFromGlobalVector(element, nodeDx); diff --git a/applications/plugins/SofaCUDA/Component/CMakeLists.txt b/applications/plugins/SofaCUDA/Component/CMakeLists.txt index 77ce7a8b2be..ce4d885c90b 100644 --- a/applications/plugins/SofaCUDA/Component/CMakeLists.txt +++ b/applications/plugins/SofaCUDA/Component/CMakeLists.txt @@ -39,6 +39,11 @@ set(HEADER_FILES ### solidmechanics + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.h ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.inl ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/hyperelastic/CudaStandardTetrahedralFEMForceField.h @@ -111,6 +116,8 @@ set(SOURCE_FILES ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/mass/CudaUniformMass.cpp ### Solidmechanics + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.cpp ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/hyperelastic/CudaStandardTetrahedralFEMForceField.cpp ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/tensormass/CudaTetrahedralTensorMassForceField.cpp @@ -181,6 +188,8 @@ set(CUDA_SOURCES ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/mass/CudaUniformMass.cu ### solidmechanics + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu + ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.cu ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/hyperelastic/CudaStandardTetrahedralFEMForceField.cu ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/tensormass/CudaTetrahedralTensorMassForceField.cu diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp index f9c124998fb..c6b0ee6b438 100644 --- a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp @@ -90,6 +90,8 @@ extern void registerPlaneForceField(sofa::core::ObjectFactory* factory); extern void registerSphereForceField(sofa::core::ObjectFactory* factory); // component::solidmechanics::fem::elastic +extern void registerElementCorotationalFEMForceField(sofa::core::ObjectFactory* factory); +extern void registerElementLinearSmallStrainFEMForceField(sofa::core::ObjectFactory* factory); extern void registerHexahedronFEMForceField(sofa::core::ObjectFactory* factory); extern void registerTetrahedronFEMForceField(sofa::core::ObjectFactory* factory); extern void registerTriangularFEMForceFieldOptim(sofa::core::ObjectFactory* factory); @@ -224,6 +226,8 @@ void registerObjects(sofa::core::ObjectFactory* factory) registerLinearForceField(factory); registerPlaneForceField(factory); registerSphereForceField(factory); + registerElementCorotationalFEMForceField(factory); + registerElementLinearSmallStrainFEMForceField(factory); registerHexahedronFEMForceField(factory); registerTetrahedronFEMForceField(factory); registerTriangularFEMForceFieldOptim(factory); diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp new file mode 100644 index 00000000000..5cd43daa6d2 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp @@ -0,0 +1,101 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include + +#include +#include +#include + +namespace sofa::component::solidmechanics::fem::elastic +{ + +using namespace sofa::gpu::cuda; + +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; + +#ifdef SOFA_GPU_CUDA_DOUBLE +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField; +#endif + +} // namespace sofa::component::solidmechanics::fem::elastic + +namespace sofa::gpu::cuda +{ + +void registerElementCorotationalFEMForceField(sofa::core::ObjectFactory* factory) +{ + using namespace sofa::component::solidmechanics::fem::elastic; + + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for EdgeCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for TriangleCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for QuadCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for TetrahedronCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for HexahedronCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + +#ifdef SOFA_GPU_CUDA_DOUBLE + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for EdgeCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for TriangleCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for QuadCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for TetrahedronCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for HexahedronCorotationalFEMForceField") + .add< CudaElementCorotationalFEMForceField >() + ); +#endif +} + +} // namespace sofa::gpu::cuda diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu new file mode 100644 index 00000000000..6616637dc55 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu @@ -0,0 +1,334 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include +#include "CudaElementFEMKernelUtils.cuh" + +namespace sofa::gpu::cuda +{ + +/** + * Combined kernel: compute rotations AND per-element forces in one pass. + */ +template +__global__ void ElementCorotationalFEMForceField_computeRotationsAndForce_kernel( + int nbElem, + const int* __restrict__ elements, + const T* __restrict__ initRotTransposed, + const T* __restrict__ stiffness, + const T* __restrict__ x, + const T* __restrict__ x0, + T* __restrict__ rotationsOut, + T* __restrict__ eforce) +{ + static_assert(Dim == 3, "Corotational rotation computation requires Dim == 3"); + constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2; + + const int elemId = blockIdx.x * blockDim.x + threadIdx.x; + if (elemId >= nbElem) return; + + // Gather element positions + T ex[NNodes * Dim], ex0[NNodes * Dim]; + gatherElementData(elements, nbElem, elemId, x, ex); + gatherElementData(elements, nbElem, elemId, x0, ex0); + + // Compute rotation frame from current positions + T frame[Dim * Dim]; + if constexpr (NNodes == 8) + computeHexahedronFrame(ex, frame); + else + computeTriangleFrame(ex, frame); + + // R = frame^T * initRotTransposed + const T* irt = initRotTransposed + elemId * Dim * Dim; + T R[Dim * Dim]; + mat3TransposeMul(frame, irt, R); + + // Store rotation for later use + T* Rout = rotationsOut + elemId * Dim * Dim; + #pragma unroll + for (int i = 0; i < Dim * Dim; ++i) + Rout[i] = R[i]; + + // Compute element centers + T center[Dim], center0[Dim]; + computeElementCenter(ex, center); + computeElementCenter(ex0, center0); + + // Compute corotational displacement + T disp[NNodes * Dim]; + computeCorotationalDisplacement(R, ex, ex0, center, center0, disp); + + // Multiply by stiffness matrix + T edf[NNodes * Dim]; + const T* K = stiffness + elemId * NSymBlocks * Dim * Dim; + symBlockMatMul(K, disp, edf); + + // Rotate forces back to global frame and negate + T* out = eforce + elemId * NNodes * Dim; + rotateAndWriteForce(R, edf, out, T(-1)); +} + +/** + * Kernel for addForce with pre-computed rotations. + */ +template +__global__ void ElementCorotationalFEMForceField_computeForce_kernel( + int nbElem, + const int* __restrict__ elements, + const T* __restrict__ rotations, + const T* __restrict__ stiffness, + const T* __restrict__ x, + const T* __restrict__ x0, + T* __restrict__ eforce) +{ + constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2; + + const int elemId = blockIdx.x * blockDim.x + threadIdx.x; + if (elemId >= nbElem) return; + + // Load rotation matrix + const T* Rptr = rotations + elemId * Dim * Dim; + T R[Dim * Dim]; + #pragma unroll + for (int i = 0; i < Dim * Dim; ++i) + R[i] = Rptr[i]; + + // Gather element positions + T ex[NNodes * Dim], ex0[NNodes * Dim]; + gatherElementData(elements, nbElem, elemId, x, ex); + gatherElementData(elements, nbElem, elemId, x0, ex0); + + // Compute element centers + T center[Dim], center0[Dim]; + computeElementCenter(ex, center); + computeElementCenter(ex0, center0); + + // Compute corotational displacement + T disp[NNodes * Dim]; + computeCorotationalDisplacement(R, ex, ex0, center, center0, disp); + + // Multiply by stiffness matrix + T edf[NNodes * Dim]; + const T* K = stiffness + elemId * NSymBlocks * Dim * Dim; + symBlockMatMul(K, disp, edf); + + // Rotate forces back to global frame and negate + T* out = eforce + elemId * NNodes * Dim; + rotateAndWriteForce(R, edf, out, T(-1)); +} + +/** + * Kernel for addDForce. + */ +template +__global__ void ElementCorotationalFEMForceField_computeDForce_kernel( + int nbElem, + const int* __restrict__ elements, + const T* __restrict__ rotations, + const T* __restrict__ stiffness, + const T* __restrict__ dx, + T* __restrict__ eforce, + T kFactor) +{ + constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2; + + const int elemId = blockIdx.x * blockDim.x + threadIdx.x; + if (elemId >= nbElem) return; + + // Load rotation matrix + const T* Rptr = rotations + elemId * Dim * Dim; + T R[Dim * Dim]; + #pragma unroll + for (int i = 0; i < Dim * Dim; ++i) + R[i] = Rptr[i]; + + // Gather and rotate displacement: rdx = R^T * dx + T rdx[NNodes * Dim]; + rotateDisplacementTranspose(R, elements, nbElem, elemId, dx, rdx); + + // Multiply by stiffness matrix + const T* K = stiffness + elemId * NSymBlocks * Dim * Dim; + T edf[NNodes * Dim]; + symBlockMatMul(K, rdx, edf); + + // Rotate forces back to global frame and scale + T* out = eforce + elemId * NNodes * Dim; + rotateAndWriteForce(R, edf, out, -kFactor); +} + +// ===================== Launch functions ===================== + +template +void ElementCorotationalFEMForceFieldCuda_addForceWithRotations( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* initRotTransposed, + const void* stiffness, + const void* x, + const void* x0, + void* f, + void* eforce, + void* rotationsOut, + const void* velems) +{ + const int computeThreads = 64; + int numBlocks = (nbElem + computeThreads - 1) / computeThreads; + ElementCorotationalFEMForceField_computeRotationsAndForce_kernel + <<>>( + nbElem, + (const int*)elements, + (const T*)initRotTransposed, + (const T*)stiffness, + (const T*)x, + (const T*)x0, + (T*)rotationsOut, + (T*)eforce); + mycudaDebugError("ElementCorotationalFEMForceField_computeRotationsAndForce_kernel"); + + const int gatherThreads = 256; + numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads; + ElementFEM_gatherForce_kernel + <<>>( + nbVertex, + maxElemPerVertex, + (const int*)velems, + (const T*)eforce, + (T*)f); + mycudaDebugError("ElementFEM_gatherForce_kernel"); +} + +template +void ElementCorotationalFEMForceFieldCuda_addForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* rotations, + const void* stiffness, + const void* x, + const void* x0, + void* f, + void* eforce, + const void* velems) +{ + const int computeThreads = 64; + int numBlocks = (nbElem + computeThreads - 1) / computeThreads; + ElementCorotationalFEMForceField_computeForce_kernel + <<>>( + nbElem, + (const int*)elements, + (const T*)rotations, + (const T*)stiffness, + (const T*)x, + (const T*)x0, + (T*)eforce); + mycudaDebugError("ElementCorotationalFEMForceField_computeForce_kernel"); + + const int gatherThreads = 256; + numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads; + ElementFEM_gatherForce_kernel + <<>>( + nbVertex, + maxElemPerVertex, + (const int*)velems, + (const T*)eforce, + (T*)f); + mycudaDebugError("ElementFEM_gatherForce_kernel"); +} + +template +void ElementCorotationalFEMForceFieldCuda_addDForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* rotations, + const void* stiffness, + const void* dx, + void* df, + void* eforce, + const void* velems, + T kFactor) +{ + const int computeThreads = 64; + int numBlocks = (nbElem + computeThreads - 1) / computeThreads; + ElementCorotationalFEMForceField_computeDForce_kernel + <<>>( + nbElem, + (const int*)elements, + (const T*)rotations, + (const T*)stiffness, + (const T*)dx, + (T*)eforce, + kFactor); + mycudaDebugError("ElementCorotationalFEMForceField_computeDForce_kernel"); + + const int gatherThreads = 256; + numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads; + ElementFEM_gatherForce_kernel + <<>>( + nbVertex, + maxElemPerVertex, + (const int*)velems, + (const T*)eforce, + (T*)df); + mycudaDebugError("ElementFEM_gatherForce_kernel"); +} + +// ===================== Explicit template instantiations ===================== + +#define INSTANTIATE_COROTATIONAL(T, NNodes) \ + template void ElementCorotationalFEMForceFieldCuda_addForce( \ + unsigned int, unsigned int, unsigned int, const void*, const void*, \ + const void*, const void*, const void*, void*, void*, const void*); \ + template void ElementCorotationalFEMForceFieldCuda_addDForce( \ + unsigned int, unsigned int, unsigned int, const void*, const void*, \ + const void*, const void*, void*, void*, const void*, T); + +#define INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(T, NNodes) \ + template void ElementCorotationalFEMForceFieldCuda_addForceWithRotations( \ + unsigned int, unsigned int, unsigned int, const void*, const void*, \ + const void*, const void*, const void*, void*, void*, void*, const void*); + +INSTANTIATE_COROTATIONAL(float, 2) +INSTANTIATE_COROTATIONAL(float, 3) +INSTANTIATE_COROTATIONAL(float, 4) +INSTANTIATE_COROTATIONAL(float, 8) +INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(float, 3) +INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(float, 4) +INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(float, 8) + +INSTANTIATE_COROTATIONAL(double, 2) +INSTANTIATE_COROTATIONAL(double, 3) +INSTANTIATE_COROTATIONAL(double, 4) +INSTANTIATE_COROTATIONAL(double, 8) +INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(double, 3) +INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(double, 4) +INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(double, 8) + +#undef INSTANTIATE_COROTATIONAL +#undef INSTANTIATE_COROTATIONAL_WITH_ROTATIONS + +} // namespace sofa::gpu::cuda diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h new file mode 100644 index 00000000000..820b4c915a1 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h @@ -0,0 +1,153 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once + +#include +#include + +namespace sofa::gpu::cuda +{ + +template +void ElementCorotationalFEMForceFieldCuda_addForceWithRotations( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* initRotTransposed, + const void* stiffness, + const void* x, + const void* x0, + void* f, + void* eforce, + void* rotationsOut, + const void* velems); + +template +void ElementCorotationalFEMForceFieldCuda_addForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* rotations, + const void* stiffness, + const void* x, + const void* x0, + void* f, + void* eforce, + const void* velems); + +template +void ElementCorotationalFEMForceFieldCuda_addDForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* rotations, + const void* stiffness, + const void* dx, + void* df, + void* eforce, + const void* velems, + T kFactor); + +} // namespace sofa::gpu::cuda + +namespace sofa::component::solidmechanics::fem::elastic +{ + +/** + * CUDA-accelerated version of ElementCorotationalFEMForceField. + * + * Works with any element type (Edge, Triangle, Quad, Tetrahedron, Hexahedron). + * The addDForce method (the CG hot path, called ~250 times per timestep) runs entirely on GPU. + * The addForce method delegates to the CPU parent and uploads rotations to GPU afterwards. + * + * Uses a two-kernel approach for addDForce: + * Kernel 1: compute per-element forces (1 thread/element, fully unrolled) + * Kernel 2: gather per-vertex (1 thread/vertex, no atomics) + */ +template +class CudaElementCorotationalFEMForceField + : public ElementCorotationalFEMForceField +{ +public: + SOFA_CLASS( + SOFA_TEMPLATE2(CudaElementCorotationalFEMForceField, DataTypes, ElementType), + SOFA_TEMPLATE2(ElementCorotationalFEMForceField, DataTypes, ElementType)); + + using Real = sofa::Real_t; + using Coord = sofa::Coord_t; + using Deriv = sofa::Deriv_t; + using VecCoord = sofa::VecCoord_t; + using VecDeriv = sofa::VecDeriv_t; + + static const std::string GetCustomClassName() + { + return ElementCorotationalFEMForceField::GetCustomClassName(); + } + + static const std::string GetCustomTemplateName() + { + return DataTypes::Name(); + } + + void init() override; + + void addForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& f, + const sofa::DataVecCoord_t& x, + const sofa::DataVecDeriv_t& v) override; + + void addDForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& df, + const sofa::DataVecDeriv_t& dx) override; + + void buildStiffnessMatrix(sofa::core::behavior::StiffnessMatrix* matrix) override; + +protected: + + CudaElementCorotationalFEMForceField() = default; + + void uploadStiffnessAndConnectivity(); + void uploadRotations(); + void uploadInitialRotationsTransposed(); + void downloadRotations(); + + gpu::cuda::CudaVector m_gpuStiffness; ///< Symmetric block-format stiffness per element + gpu::cuda::CudaVector m_gpuRotations; ///< Flat 3x3 rotation matrices per element + gpu::cuda::CudaVector m_gpuInitialRotationsTransposed; ///< Flat 3x3 initial rotation transposed per element + gpu::cuda::CudaVector m_gpuElements; ///< SoA connectivity: elements[nodeIdx * nbElem + elemId] + gpu::cuda::CudaVector m_gpuElementForce; ///< Intermediate per-element per-node force buffer + gpu::cuda::CudaVector m_gpuVelems; ///< SoA vertex-to-element mapping, 0-terminated + + unsigned int m_maxElemPerVertex = 0; + unsigned int m_nbVertices = 0; + + bool m_gpuDataUploaded = false; + bool m_gpuRotationsUploaded = false; + bool m_gpuRotationMethodSupported = false; +}; + +} // namespace sofa::component::solidmechanics::fem::elastic diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl new file mode 100644 index 00000000000..2359244539e --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl @@ -0,0 +1,345 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once +#include +#include +#include +#include + +namespace sofa::component::solidmechanics::fem::elastic +{ + +template +void CudaElementCorotationalFEMForceField::init() +{ + ElementCorotationalFEMForceField::init(); + + if (!this->isComponentStateInvalid()) + { + uploadStiffnessAndConnectivity(); + uploadInitialRotationsTransposed(); + } +} + +template +void CudaElementCorotationalFEMForceField::uploadStiffnessAndConnectivity() +{ + using trait = sofa::component::solidmechanics::fem::elastic::trait; + + if (!this->l_topology) return; + + const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); + const auto& assembledMatrices = this->m_assembledStiffnessMatrices; + + const auto nbElem = elements.size(); + constexpr auto nNodes = trait::NumberOfNodesInElement; + constexpr auto dim = trait::spatial_dimensions; + + // Find number of vertices + unsigned int maxNodeId = 0; + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& element = elements[e]; + for (unsigned int n = 0; n < nNodes; ++n) + { + if (static_cast(element[n]) > maxNodeId) + maxNodeId = static_cast(element[n]); + } + } + m_nbVertices = maxNodeId + 1; + + // Upload stiffness matrices in symmetric upper-triangle block format: + // Only blocks (ni, nj) with nj >= ni are stored. + // symIdx = ni * nNodes - ni*(ni-1)/2 + (nj - ni) + // K[symIdx * dim * dim + di * dim + dj] per element + constexpr auto nSymBlocks = nNodes * (nNodes + 1) / 2; + m_gpuStiffness.resize(nbElem * nSymBlocks * dim * dim); + { + auto* dst = m_gpuStiffness.hostWrite(); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& K = assembledMatrices[e]; + for (unsigned int ni = 0; ni < nNodes; ++ni) + { + const unsigned int diagIdx = ni * nNodes - ni * (ni - 1) / 2; + for (unsigned int nj = ni; nj < nNodes; ++nj) + { + const unsigned int symIdx = diagIdx + (nj - ni); + for (unsigned int di = 0; di < dim; ++di) + for (unsigned int dj = 0; dj < dim; ++dj) + dst[e * nSymBlocks * dim * dim + + symIdx * dim * dim + + di * dim + dj] + = static_cast(K[ni * dim + di][nj * dim + dj]); + } + } + } + } + + // Upload element connectivity in SoA layout: + // elements[nodeIdx * nbElem + elemId] = global node index + // Adjacent threads access adjacent memory for coalesced reads. + m_gpuElements.resize(nNodes * nbElem); + { + auto* dst = m_gpuElements.hostWrite(); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& element = elements[e]; + for (unsigned int n = 0; n < nNodes; ++n) + dst[n * nbElem + e] = static_cast(element[n]); + } + } + + // Build vertex-to-element mapping (velems) + // For each vertex, stores the list of (elemId * nNodes + localNode + 1). + // 0 is used as sentinel. SoA layout: velems[slot * nbVertex + vertexId]. + std::vector> vertexElems(m_nbVertices); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& element = elements[e]; + for (unsigned int n = 0; n < nNodes; ++n) + { + const int nodeId = static_cast(element[n]); + vertexElems[nodeId].push_back( + static_cast(e * nNodes + n + 1)); + } + } + + m_maxElemPerVertex = 0; + for (const auto& ve : vertexElems) + { + if (ve.size() > m_maxElemPerVertex) + m_maxElemPerVertex = static_cast(ve.size()); + } + + m_gpuVelems.resize(m_maxElemPerVertex * m_nbVertices); + { + auto* dst = m_gpuVelems.hostWrite(); + std::memset(dst, 0, m_maxElemPerVertex * m_nbVertices * sizeof(int)); + for (std::size_t v = 0; v < m_nbVertices; ++v) + { + for (std::size_t s = 0; s < vertexElems[v].size(); ++s) + dst[s * m_nbVertices + v] = vertexElems[v][s]; + } + } + + // Allocate intermediate per-element force buffer + m_gpuElementForce.resize(nbElem * nNodes * dim); + + m_gpuDataUploaded = true; + m_gpuRotationsUploaded = false; +} + +template +void CudaElementCorotationalFEMForceField::uploadRotations() +{ + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto dim = trait::spatial_dimensions; + + const auto& rotations = this->m_rotations; + const auto nbElem = rotations.size(); + + m_gpuRotations.resize(nbElem * dim * dim); + { + auto* dst = m_gpuRotations.hostWrite(); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& R = rotations[e]; + for (unsigned int i = 0; i < dim; ++i) + for (unsigned int j = 0; j < dim; ++j) + dst[e * dim * dim + i * dim + j] = static_cast(R[i][j]); + } + } + + m_gpuRotationsUploaded = true; +} + +template +void CudaElementCorotationalFEMForceField::uploadInitialRotationsTransposed() +{ + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto dim = trait::spatial_dimensions; + constexpr auto nNodes = trait::NumberOfNodesInElement; + + const auto& initRotT = this->m_initialRotationsTransposed; + const auto nbElem = initRotT.size(); + if (nbElem == 0) return; + + m_gpuInitialRotationsTransposed.resize(nbElem * dim * dim); + m_gpuRotations.resize(nbElem * dim * dim); + { + auto* dst = m_gpuInitialRotationsTransposed.hostWrite(); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& R = initRotT[e]; + for (unsigned int i = 0; i < dim; ++i) + for (unsigned int j = 0; j < dim; ++j) + dst[e * dim * dim + i * dim + j] = static_cast(R[i][j]); + } + } + + // Check if the rotation method is GPU-compatible + const auto rotationMethodKey = this->m_rotationMethods.d_rotationMethod.getValue().key(); + m_gpuRotationMethodSupported = (nNodes >= 3) + && (rotationMethodKey == "triangle" || rotationMethodKey == "hexahedron"); +} + +template +void CudaElementCorotationalFEMForceField::downloadRotations() +{ + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto dim = trait::spatial_dimensions; + + if (!m_gpuRotationsUploaded) return; + + const auto nbElem = m_gpuRotations.size() / (dim * dim); + this->m_rotations.resize(nbElem); + + const auto* src = m_gpuRotations.hostRead(); + for (std::size_t e = 0; e < nbElem; ++e) + { + auto& R = this->m_rotations[e]; + for (unsigned int i = 0; i < dim; ++i) + for (unsigned int j = 0; j < dim; ++j) + R[i][j] = static_cast(src[e * dim * dim + i * dim + j]); + } +} + +template +void CudaElementCorotationalFEMForceField::buildStiffnessMatrix( + sofa::core::behavior::StiffnessMatrix* matrix) +{ + if (m_gpuRotationMethodSupported && m_gpuRotationsUploaded) + downloadRotations(); + + ElementCorotationalFEMForceField::buildStiffnessMatrix(matrix); +} + +template +void CudaElementCorotationalFEMForceField::addForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& d_f, + const sofa::DataVecCoord_t& d_x, + const sofa::DataVecDeriv_t& d_v) +{ + if (this->isComponentStateInvalid()) + return; + + if (!m_gpuDataUploaded) + { + ElementCorotationalFEMForceField::addForce(mparams, d_f, d_x, d_v); + uploadRotations(); + return; + } + + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto nNodes = trait::NumberOfNodesInElement; + constexpr auto dim = trait::spatial_dimensions; + + const VecCoord& x = d_x.getValue(); + auto restPositionAccessor = this->mstate->readRestPositions(); + const VecCoord& x0 = restPositionAccessor.ref(); + + const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); + const auto nbElem = static_cast(elements.size()); + const auto nbVertex = static_cast(x.size()); + + VecDeriv& f = *d_f.beginEdit(); + if (f.size() < x.size()) + f.resize(x.size()); + + if constexpr (nNodes >= 3) + { + if (m_gpuRotationMethodSupported) + { + gpu::cuda::ElementCorotationalFEMForceFieldCuda_addForceWithRotations( + nbElem, nbVertex, m_maxElemPerVertex, + m_gpuElements.deviceRead(), m_gpuInitialRotationsTransposed.deviceRead(), + m_gpuStiffness.deviceRead(), x.deviceRead(), x0.deviceRead(), + f.deviceWrite(), m_gpuElementForce.deviceWrite(), + m_gpuRotations.deviceWrite(), m_gpuVelems.deviceRead()); + + m_gpuRotationsUploaded = true; + d_f.endEdit(); + return; + } + } + + // CPU rotations + GPU forces + this->computeRotations(this->m_rotations, x, x0); + uploadRotations(); + + gpu::cuda::ElementCorotationalFEMForceFieldCuda_addForce( + nbElem, nbVertex, m_maxElemPerVertex, + m_gpuElements.deviceRead(), m_gpuRotations.deviceRead(), + m_gpuStiffness.deviceRead(), x.deviceRead(), x0.deviceRead(), + f.deviceWrite(), m_gpuElementForce.deviceWrite(), + m_gpuVelems.deviceRead()); + + d_f.endEdit(); +} + +template +void CudaElementCorotationalFEMForceField::addDForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& d_df, + const sofa::DataVecDeriv_t& d_dx) +{ + if (this->isComponentStateInvalid()) + return; + + if (!m_gpuDataUploaded || !m_gpuRotationsUploaded) + { + // Fallback to CPU if GPU data not ready + ElementCorotationalFEMForceField::addDForce(mparams, d_df, d_dx); + return; + } + + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto nNodes = trait::NumberOfNodesInElement; + constexpr auto dim = trait::spatial_dimensions; + + VecDeriv& df = *d_df.beginEdit(); + const VecDeriv& dx = d_dx.getValue(); + + if (df.size() < dx.size()) + df.resize(dx.size()); + + const auto kFactor = static_cast( + sofa::core::mechanicalparams::kFactorIncludingRayleighDamping( + mparams, this->rayleighStiffness.getValue())); + + const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); + const auto nbElem = static_cast(elements.size()); + const auto nbVertex = static_cast(dx.size()); + + gpu::cuda::ElementCorotationalFEMForceFieldCuda_addDForce( + nbElem, nbVertex, m_maxElemPerVertex, + m_gpuElements.deviceRead(), m_gpuRotations.deviceRead(), + m_gpuStiffness.deviceRead(), dx.deviceRead(), + df.deviceWrite(), m_gpuElementForce.deviceWrite(), + m_gpuVelems.deviceRead(), kFactor); + + d_df.endEdit(); +} + +} // namespace sofa::component::solidmechanics::fem::elastic diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh new file mode 100644 index 00000000000..4d65c2f7130 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh @@ -0,0 +1,474 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once + +#include + +namespace sofa::gpu::cuda +{ + +//============================================================================= +// Math utilities +//============================================================================= + +template +__device__ inline T myRsqrt(T x); + +template<> +__device__ inline float myRsqrt(float x) { return rsqrtf(x); } + +template<> +__device__ inline double myRsqrt(double x) { return rsqrt(x); } + +//============================================================================= +// 3x3 Matrix operations (row-major storage) +//============================================================================= + +/// C = A * B +template +__device__ inline void mat3Mul(const T* A, const T* B, T* C) +{ + #pragma unroll + for (int i = 0; i < 3; ++i) + { + #pragma unroll + for (int j = 0; j < 3; ++j) + { + C[i * 3 + j] = A[i * 3 + 0] * B[0 * 3 + j] + + A[i * 3 + 1] * B[1 * 3 + j] + + A[i * 3 + 2] * B[2 * 3 + j]; + } + } +} + +/// C = A * B^T +template +__device__ inline void mat3MulTranspose(const T* A, const T* BT, T* C) +{ + #pragma unroll + for (int i = 0; i < 3; ++i) + { + #pragma unroll + for (int j = 0; j < 3; ++j) + { + C[i * 3 + j] = A[i * 3 + 0] * BT[j * 3 + 0] + + A[i * 3 + 1] * BT[j * 3 + 1] + + A[i * 3 + 2] * BT[j * 3 + 2]; + } + } +} + +/// C = A^T * B +template +__device__ inline void mat3TransposeMul(const T* A, const T* B, T* C) +{ + #pragma unroll + for (int i = 0; i < 3; ++i) + { + #pragma unroll + for (int j = 0; j < 3; ++j) + { + C[i * 3 + j] = A[0 * 3 + i] * B[0 * 3 + j] + + A[1 * 3 + i] * B[1 * 3 + j] + + A[2 * 3 + i] * B[2 * 3 + j]; + } + } +} + +/// out = R * in (rotate a 3D vector) +template +__device__ inline void rotateVector(const T* R, const T* in, T* out) +{ + #pragma unroll + for (int i = 0; i < 3; ++i) + { + out[i] = R[i * 3 + 0] * in[0] + + R[i * 3 + 1] * in[1] + + R[i * 3 + 2] * in[2]; + } +} + +/// out = R^T * in (rotate a 3D vector by transpose) +template +__device__ inline void rotateVectorTranspose(const T* R, const T* in, T* out) +{ + #pragma unroll + for (int i = 0; i < 3; ++i) + { + out[i] = R[0 * 3 + i] * in[0] + + R[1 * 3 + i] * in[1] + + R[2 * 3 + i] * in[2]; + } +} + +//============================================================================= +// Rotation frame computation +//============================================================================= + +/// Compute rotation frame from first 3 nodes (for Triangle, Quad, Tetrahedron) +template +__device__ inline void computeTriangleFrame(const T* pos, T* frame) +{ + // X axis: normalized (p1 - p0) + T ax = pos[3] - pos[0], ay = pos[4] - pos[1], az = pos[5] - pos[2]; + T invLen = myRsqrt(ax * ax + ay * ay + az * az); + ax *= invLen; ay *= invLen; az *= invLen; + + // Temp vector b = p2 - p0 + T bx = pos[6] - pos[0], by = pos[7] - pos[1], bz = pos[8] - pos[2]; + + // Z axis: normalized cross(a, b) + T cx = ay * bz - az * by; + T cy = az * bx - ax * bz; + T cz = ax * by - ay * bx; + invLen = myRsqrt(cx * cx + cy * cy + cz * cz); + cx *= invLen; cy *= invLen; cz *= invLen; + + // Y axis: cross(z, x) + bx = cy * az - cz * ay; + by = cz * ax - cx * az; + bz = cx * ay - cy * ax; + + // Store row-major: frame[row][col] = frame[row * 3 + col] + frame[0] = ax; frame[1] = ay; frame[2] = az; + frame[3] = bx; frame[4] = by; frame[5] = bz; + frame[6] = cx; frame[7] = cy; frame[8] = cz; +} + +/// Compute rotation frame from 8 hexahedron nodes +template +__device__ inline void computeHexahedronFrame(const T* pos, T* frame) +{ + const T quarter = T(0.25); + + // Average X direction from 4 edge pairs + T ax = ((pos[1*3+0] - pos[0*3+0]) + (pos[2*3+0] - pos[3*3+0]) + + (pos[5*3+0] - pos[4*3+0]) + (pos[6*3+0] - pos[7*3+0])) * quarter; + T ay = ((pos[1*3+1] - pos[0*3+1]) + (pos[2*3+1] - pos[3*3+1]) + + (pos[5*3+1] - pos[4*3+1]) + (pos[6*3+1] - pos[7*3+1])) * quarter; + T az = ((pos[1*3+2] - pos[0*3+2]) + (pos[2*3+2] - pos[3*3+2]) + + (pos[5*3+2] - pos[4*3+2]) + (pos[6*3+2] - pos[7*3+2])) * quarter; + + // Average Y direction + T bx = ((pos[3*3+0] - pos[0*3+0]) + (pos[2*3+0] - pos[1*3+0]) + + (pos[7*3+0] - pos[4*3+0]) + (pos[6*3+0] - pos[5*3+0])) * quarter; + T by = ((pos[3*3+1] - pos[0*3+1]) + (pos[2*3+1] - pos[1*3+1]) + + (pos[7*3+1] - pos[4*3+1]) + (pos[6*3+1] - pos[5*3+1])) * quarter; + T bz = ((pos[3*3+2] - pos[0*3+2]) + (pos[2*3+2] - pos[1*3+2]) + + (pos[7*3+2] - pos[4*3+2]) + (pos[6*3+2] - pos[5*3+2])) * quarter; + + // Normalize X + T invLen = myRsqrt(ax * ax + ay * ay + az * az); + ax *= invLen; ay *= invLen; az *= invLen; + + // Z = normalized cross(X, Y) + T cx = ay * bz - az * by; + T cy = az * bx - ax * bz; + T cz = ax * by - ay * bx; + invLen = myRsqrt(cx * cx + cy * cy + cz * cz); + cx *= invLen; cy *= invLen; cz *= invLen; + + // Y = cross(Z, X) + bx = cy * az - cz * ay; + by = cz * ax - cx * az; + bz = cx * ay - cy * ax; + + frame[0] = ax; frame[1] = ay; frame[2] = az; + frame[3] = bx; frame[4] = by; frame[5] = bz; + frame[6] = cx; frame[7] = cy; frame[8] = cz; +} + +//============================================================================= +// Element data gathering +//============================================================================= + +/// Gather positions for one element from global arrays (SoA layout) +template +__device__ inline void gatherElementData( + const int* elements, int nbElem, int elemId, + const T* globalData, + T* localData) +{ + #pragma unroll + for (int n = 0; n < NNodes; ++n) + { + const int nodeId = elements[n * nbElem + elemId]; + #pragma unroll + for (int d = 0; d < Dim; ++d) + localData[n * Dim + d] = globalData[nodeId * Dim + d]; + } +} + +/// Gather displacement (x - x0) for one element +template +__device__ inline void gatherElementDisplacement( + const int* elements, int nbElem, int elemId, + const T* x, const T* x0, + T* disp) +{ + #pragma unroll + for (int n = 0; n < NNodes; ++n) + { + const int nodeId = elements[n * nbElem + elemId]; + #pragma unroll + for (int d = 0; d < Dim; ++d) + disp[n * Dim + d] = x[nodeId * Dim + d] - x0[nodeId * Dim + d]; + } +} + +//============================================================================= +// Element center computation +//============================================================================= + +/// Compute center of element positions +template +__device__ inline void computeElementCenter(const T* pos, T* center) +{ + const T invN = T(1) / T(NNodes); + + #pragma unroll + for (int d = 0; d < Dim; ++d) + center[d] = T(0); + + #pragma unroll + for (int n = 0; n < NNodes; ++n) + { + #pragma unroll + for (int d = 0; d < Dim; ++d) + center[d] += pos[n * Dim + d]; + } + + #pragma unroll + for (int d = 0; d < Dim; ++d) + center[d] *= invN; +} + +//============================================================================= +// Corotational displacement computation +//============================================================================= + +/// Compute corotational displacement: disp = R^T * (x - center) - (x0 - center0) +template +__device__ inline void computeCorotationalDisplacement( + const T* R, + const T* x, const T* x0, + const T* center, const T* center0, + T* disp) +{ + #pragma unroll + for (int n = 0; n < NNodes; ++n) + { + // diff = x_n - center + T diff[Dim]; + #pragma unroll + for (int d = 0; d < Dim; ++d) + diff[d] = x[n * Dim + d] - center[d]; + + // rotated = R^T * diff + #pragma unroll + for (int di = 0; di < Dim; ++di) + { + T rotated = T(0); + #pragma unroll + for (int dj = 0; dj < Dim; ++dj) + rotated += R[dj * Dim + di] * diff[dj]; + disp[n * Dim + di] = rotated - (x0[n * Dim + di] - center0[di]); + } + } +} + +/// Compute R^T * dx for each node (for addDForce) +template +__device__ inline void rotateDisplacementTranspose( + const T* R, + const int* elements, int nbElem, int elemId, + const T* dx, + T* rdx) +{ + #pragma unroll + for (int n = 0; n < NNodes; ++n) + { + const int nodeId = elements[n * nbElem + elemId]; + + T nodeDx[Dim]; + #pragma unroll + for (int d = 0; d < Dim; ++d) + nodeDx[d] = dx[nodeId * Dim + d]; + + #pragma unroll + for (int di = 0; di < Dim; ++di) + { + T sum = T(0); + #pragma unroll + for (int dj = 0; dj < Dim; ++dj) + sum += R[dj * Dim + di] * nodeDx[dj]; + rdx[n * Dim + di] = sum; + } + } +} + +//============================================================================= +// Symmetric block-matrix multiply +//============================================================================= + +/** + * Symmetric block-matrix multiply: out = K * in + * + * K is stored in upper-triangle block format: + * symIdx = ni * NNodes - ni*(ni-1)/2 + (nj - ni) for nj >= ni + * K[symIdx * Dim * Dim + di * Dim + dj] for each element + */ +template +__device__ inline void symBlockMatMul(const T* K, const T* in, T* out) +{ + // Initialize output to zero + #pragma unroll + for (int i = 0; i < NNodes * Dim; ++i) + out[i] = T(0); + + #pragma unroll + for (int ni = 0; ni < NNodes; ++ni) + { + const int diagIdx = ni * NNodes - ni * (ni - 1) / 2; + + // Diagonal block: Kii * in_i -> out_i + { + const T* Kii = K + diagIdx * Dim * Dim; + #pragma unroll + for (int di = 0; di < Dim; ++di) + { + T sum = T(0); + #pragma unroll + for (int dj = 0; dj < Dim; ++dj) + sum += Kii[di * Dim + dj] * in[ni * Dim + dj]; + out[ni * Dim + di] += sum; + } + } + + // Off-diagonal blocks (symmetric: Kij and Kij^T) + #pragma unroll + for (int nj = ni + 1; nj < NNodes; ++nj) + { + const int symIdx = diagIdx + (nj - ni); + const T* Kij = K + symIdx * Dim * Dim; + + // Kij * in_j -> out_i + #pragma unroll + for (int di = 0; di < Dim; ++di) + { + T sum = T(0); + #pragma unroll + for (int dj = 0; dj < Dim; ++dj) + sum += Kij[di * Dim + dj] * in[nj * Dim + dj]; + out[ni * Dim + di] += sum; + } + + // Kij^T * in_i -> out_j + #pragma unroll + for (int dj = 0; dj < Dim; ++dj) + { + T sum = T(0); + #pragma unroll + for (int di = 0; di < Dim; ++di) + sum += Kij[di * Dim + dj] * in[ni * Dim + di]; + out[nj * Dim + dj] += sum; + } + } + } +} + +//============================================================================= +// Force output with rotation +//============================================================================= + +/// Rotate local forces to global frame and write: out = scale * R * localForce +template +__device__ inline void rotateAndWriteForce( + const T* R, + const T* localForce, + T* out, + T scale) +{ + #pragma unroll + for (int n = 0; n < NNodes; ++n) + { + #pragma unroll + for (int di = 0; di < Dim; ++di) + { + T sum = T(0); + #pragma unroll + for (int dj = 0; dj < Dim; ++dj) + sum += R[di * Dim + dj] * localForce[n * Dim + dj]; + out[n * Dim + di] = scale * sum; + } + } +} + +/// Write negated force (for linear case without rotation): out = scale * localForce +template +__device__ inline void writeForce(const T* localForce, T* out, T scale) +{ + #pragma unroll + for (int i = 0; i < NNodes * Dim; ++i) + out[i] = scale * localForce[i]; +} + +//============================================================================= +// Gather kernel for accumulating per-vertex forces +//============================================================================= + +/** + * Gather per-vertex forces from per-element contributions. + * velems[slot * nbVertex + vertexId] contains (elemId * NNodes + localNode + 1), 0 = end + */ +template +__global__ void ElementFEM_gatherForce_kernel( + int nbVertex, + int maxElemPerVertex, + const int* __restrict__ velems, + const T* __restrict__ eforce, + T* df) +{ + const int vertexId = blockIdx.x * blockDim.x + threadIdx.x; + if (vertexId >= nbVertex) return; + + T acc[Dim]; + #pragma unroll + for (int d = 0; d < Dim; ++d) + acc[d] = T(0); + + for (int s = 0; s < maxElemPerVertex; ++s) + { + const int idx = velems[s * nbVertex + vertexId]; + if (idx == 0) break; + const int base = (idx - 1) * Dim; + #pragma unroll + for (int d = 0; d < Dim; ++d) + acc[d] += eforce[base + d]; + } + + #pragma unroll + for (int d = 0; d < Dim; ++d) + df[vertexId * Dim + d] += acc[d]; +} + +} // namespace sofa::gpu::cuda diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp new file mode 100644 index 00000000000..d8d3b9ef1c3 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp @@ -0,0 +1,101 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include + +#include +#include +#include + +namespace sofa::component::solidmechanics::fem::elastic +{ + +using namespace sofa::gpu::cuda; + +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; + +#ifdef SOFA_GPU_CUDA_DOUBLE +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField; +#endif + +} // namespace sofa::component::solidmechanics::fem::elastic + +namespace sofa::gpu::cuda +{ + +void registerElementLinearSmallStrainFEMForceField(sofa::core::ObjectFactory* factory) +{ + using namespace sofa::component::solidmechanics::fem::elastic; + + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for EdgeLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for TriangleLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for QuadLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for TetrahedronLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA for HexahedronLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + +#ifdef SOFA_GPU_CUDA_DOUBLE + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for EdgeLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for TriangleLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for QuadLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for TetrahedronLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); + factory->registerObjects(sofa::core::ObjectRegistrationData( + "Supports GPU-side computations using CUDA (double) for HexahedronLinearSmallStrainFEMForceField") + .add< CudaElementLinearSmallStrainFEMForceField >() + ); +#endif +} + +} // namespace sofa::gpu::cuda diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu new file mode 100644 index 00000000000..9d0195ab580 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu @@ -0,0 +1,189 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#include +#include +#include "CudaElementFEMKernelUtils.cuh" + +namespace sofa::gpu::cuda +{ + +/** + * Kernel for addForce: f = -K * (x - x0) + */ +template +__global__ void ElementLinearSmallStrainFEMForceField_computeForce_kernel( + int nbElem, + const int* __restrict__ elements, + const T* __restrict__ stiffness, + const T* __restrict__ x, + const T* __restrict__ x0, + T* __restrict__ eforce) +{ + constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2; + + const int elemId = blockIdx.x * blockDim.x + threadIdx.x; + if (elemId >= nbElem) return; + + // Gather displacement (x - x0) + T disp[NNodes * Dim]; + gatherElementDisplacement(elements, nbElem, elemId, x, x0, disp); + + // Multiply by stiffness matrix + const T* K = stiffness + elemId * NSymBlocks * Dim * Dim; + T edf[NNodes * Dim]; + symBlockMatMul(K, disp, edf); + + // Write negated force + T* out = eforce + elemId * NNodes * Dim; + writeForce(edf, out, T(-1)); +} + +/** + * Kernel for addDForce: df = -kFactor * K * dx + */ +template +__global__ void ElementLinearSmallStrainFEMForceField_computeDForce_kernel( + int nbElem, + const int* __restrict__ elements, + const T* __restrict__ stiffness, + const T* __restrict__ dx, + T* __restrict__ eforce, + T kFactor) +{ + constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2; + + const int elemId = blockIdx.x * blockDim.x + threadIdx.x; + if (elemId >= nbElem) return; + + // Gather displacement increment + T edx[NNodes * Dim]; + gatherElementData(elements, nbElem, elemId, dx, edx); + + // Multiply by stiffness matrix + const T* K = stiffness + elemId * NSymBlocks * Dim * Dim; + T edf[NNodes * Dim]; + symBlockMatMul(K, edx, edf); + + // Write scaled negated force + T* out = eforce + elemId * NNodes * Dim; + writeForce(edf, out, -kFactor); +} + +// ===================== Launch functions ===================== + +template +void ElementLinearSmallStrainFEMForceFieldCuda_addForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* stiffness, + const void* x, + const void* x0, + void* f, + void* eforce, + const void* velems) +{ + const int computeThreads = 64; + int numBlocks = (nbElem + computeThreads - 1) / computeThreads; + ElementLinearSmallStrainFEMForceField_computeForce_kernel + <<>>( + nbElem, + (const int*)elements, + (const T*)stiffness, + (const T*)x, + (const T*)x0, + (T*)eforce); + mycudaDebugError("ElementLinearSmallStrainFEMForceField_computeForce_kernel"); + + const int gatherThreads = 256; + numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads; + ElementFEM_gatherForce_kernel + <<>>( + nbVertex, + maxElemPerVertex, + (const int*)velems, + (const T*)eforce, + (T*)f); + mycudaDebugError("ElementFEM_gatherForce_kernel"); +} + +template +void ElementLinearSmallStrainFEMForceFieldCuda_addDForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* stiffness, + const void* dx, + void* df, + void* eforce, + const void* velems, + T kFactor) +{ + const int computeThreads = 64; + int numBlocks = (nbElem + computeThreads - 1) / computeThreads; + ElementLinearSmallStrainFEMForceField_computeDForce_kernel + <<>>( + nbElem, + (const int*)elements, + (const T*)stiffness, + (const T*)dx, + (T*)eforce, + kFactor); + mycudaDebugError("ElementLinearSmallStrainFEMForceField_computeDForce_kernel"); + + const int gatherThreads = 256; + numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads; + ElementFEM_gatherForce_kernel + <<>>( + nbVertex, + maxElemPerVertex, + (const int*)velems, + (const T*)eforce, + (T*)df); + mycudaDebugError("ElementFEM_gatherForce_kernel"); +} + +// ===================== Explicit template instantiations ===================== + +#define INSTANTIATE_LINEAR(T, NNodes) \ + template void ElementLinearSmallStrainFEMForceFieldCuda_addForce( \ + unsigned int, unsigned int, unsigned int, const void*, const void*, \ + const void*, const void*, void*, void*, const void*); \ + template void ElementLinearSmallStrainFEMForceFieldCuda_addDForce( \ + unsigned int, unsigned int, unsigned int, const void*, const void*, \ + const void*, void*, void*, const void*, T); + +INSTANTIATE_LINEAR(float, 2) +INSTANTIATE_LINEAR(float, 3) +INSTANTIATE_LINEAR(float, 4) +INSTANTIATE_LINEAR(float, 8) + +INSTANTIATE_LINEAR(double, 2) +INSTANTIATE_LINEAR(double, 3) +INSTANTIATE_LINEAR(double, 4) +INSTANTIATE_LINEAR(double, 8) + +#undef INSTANTIATE_LINEAR + +} // namespace sofa::gpu::cuda diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h new file mode 100644 index 00000000000..45d119846e8 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h @@ -0,0 +1,128 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once + +#include +#include + +namespace sofa::gpu::cuda +{ + +template +void ElementLinearSmallStrainFEMForceFieldCuda_addForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* stiffness, + const void* x, + const void* x0, + void* f, + void* eforce, + const void* velems); + +template +void ElementLinearSmallStrainFEMForceFieldCuda_addDForce( + unsigned int nbElem, + unsigned int nbVertex, + unsigned int maxElemPerVertex, + const void* elements, + const void* stiffness, + const void* dx, + void* df, + void* eforce, + const void* velems, + T kFactor); + +} // namespace sofa::gpu::cuda + +namespace sofa::component::solidmechanics::fem::elastic +{ + +/** + * CUDA-accelerated version of ElementLinearSmallStrainFEMForceField. + * + * Works with any element type (Edge, Triangle, Quad, Tetrahedron, Hexahedron). + * Both addForce and addDForce run entirely on GPU. + * + * Uses a two-kernel approach for addDForce: + * Kernel 1: compute per-element forces (1 thread/element, fully unrolled) + * Kernel 2: gather per-vertex (1 thread/vertex, no atomics) + * + * Compared to the corotational version, no rotation matrices are needed. + */ +template +class CudaElementLinearSmallStrainFEMForceField + : public ElementLinearSmallStrainFEMForceField +{ +public: + SOFA_CLASS( + SOFA_TEMPLATE2(CudaElementLinearSmallStrainFEMForceField, DataTypes, ElementType), + SOFA_TEMPLATE2(ElementLinearSmallStrainFEMForceField, DataTypes, ElementType)); + + using Real = sofa::Real_t; + using Coord = sofa::Coord_t; + using Deriv = sofa::Deriv_t; + using VecCoord = sofa::VecCoord_t; + using VecDeriv = sofa::VecDeriv_t; + + static const std::string GetCustomClassName() + { + return ElementLinearSmallStrainFEMForceField::GetCustomClassName(); + } + + static const std::string GetCustomTemplateName() + { + return DataTypes::Name(); + } + + void init() override; + + void addForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& f, + const sofa::DataVecCoord_t& x, + const sofa::DataVecDeriv_t& v) override; + + void addDForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& df, + const sofa::DataVecDeriv_t& dx) override; + +protected: + + CudaElementLinearSmallStrainFEMForceField() = default; + + void uploadStiffnessAndConnectivity(); + + gpu::cuda::CudaVector m_gpuStiffness; ///< Symmetric block-format stiffness per element + gpu::cuda::CudaVector m_gpuElements; ///< SoA connectivity: elements[nodeIdx * nbElem + elemId] + gpu::cuda::CudaVector m_gpuElementForce; ///< Intermediate per-element per-node force buffer + gpu::cuda::CudaVector m_gpuVelems; ///< SoA vertex-to-element mapping, 0-terminated + + unsigned int m_maxElemPerVertex = 0; + unsigned int m_nbVertices = 0; + + bool m_gpuDataUploaded = false; +}; + +} // namespace sofa::component::solidmechanics::fem::elastic diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl new file mode 100644 index 00000000000..f27c06b92e7 --- /dev/null +++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl @@ -0,0 +1,234 @@ +/****************************************************************************** +* SOFA, Simulation Open-Framework Architecture * +* (c) 2006 INRIA, USTL, UJF, CNRS, MGH * +* * +* This program is free software; you can redistribute it and/or modify it * +* under the terms of the GNU Lesser General Public License as published by * +* the Free Software Foundation; either version 2.1 of the License, or (at * +* your option) any later version. * +* * +* This program is distributed in the hope that it will be useful, but WITHOUT * +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * +* for more details. * +* * +* You should have received a copy of the GNU Lesser General Public License * +* along with this program. If not, see . * +******************************************************************************* +* Authors: The SOFA Team and external contributors (see Authors.txt) * +* * +* Contact information: contact@sofa-framework.org * +******************************************************************************/ +#pragma once +#include +#include +#include +#include + +namespace sofa::component::solidmechanics::fem::elastic +{ + +template +void CudaElementLinearSmallStrainFEMForceField::init() +{ + ElementLinearSmallStrainFEMForceField::init(); + + if (!this->isComponentStateInvalid()) + { + uploadStiffnessAndConnectivity(); + } +} + +template +void CudaElementLinearSmallStrainFEMForceField::uploadStiffnessAndConnectivity() +{ + using trait = sofa::component::solidmechanics::fem::elastic::trait; + + if (!this->l_topology) return; + + const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); + const auto& assembledMatrices = this->m_assembledStiffnessMatrices; + + const auto nbElem = elements.size(); + constexpr auto nNodes = trait::NumberOfNodesInElement; + constexpr auto dim = trait::spatial_dimensions; + + // Find number of vertices + unsigned int maxNodeId = 0; + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& element = elements[e]; + for (unsigned int n = 0; n < nNodes; ++n) + { + if (static_cast(element[n]) > maxNodeId) + maxNodeId = static_cast(element[n]); + } + } + m_nbVertices = maxNodeId + 1; + + // Upload stiffness matrices in symmetric upper-triangle block format: + // Only blocks (ni, nj) with nj >= ni are stored. + // symIdx = ni * nNodes - ni*(ni-1)/2 + (nj - ni) + // K[symIdx * dim * dim + di * dim + dj] per element + constexpr auto nSymBlocks = nNodes * (nNodes + 1) / 2; + m_gpuStiffness.resize(nbElem * nSymBlocks * dim * dim); + { + auto* dst = m_gpuStiffness.hostWrite(); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& K = assembledMatrices[e]; + for (unsigned int ni = 0; ni < nNodes; ++ni) + { + const unsigned int diagIdx = ni * nNodes - ni * (ni - 1) / 2; + for (unsigned int nj = ni; nj < nNodes; ++nj) + { + const unsigned int symIdx = diagIdx + (nj - ni); + for (unsigned int di = 0; di < dim; ++di) + for (unsigned int dj = 0; dj < dim; ++dj) + dst[e * nSymBlocks * dim * dim + + symIdx * dim * dim + + di * dim + dj] + = static_cast(K[ni * dim + di][nj * dim + dj]); + } + } + } + } + + // Upload element connectivity in SoA layout: + // elements[nodeIdx * nbElem + elemId] = global node index + m_gpuElements.resize(nNodes * nbElem); + { + auto* dst = m_gpuElements.hostWrite(); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& element = elements[e]; + for (unsigned int n = 0; n < nNodes; ++n) + dst[n * nbElem + e] = static_cast(element[n]); + } + } + + // Build vertex-to-element mapping (velems) + std::vector> vertexElems(m_nbVertices); + for (std::size_t e = 0; e < nbElem; ++e) + { + const auto& element = elements[e]; + for (unsigned int n = 0; n < nNodes; ++n) + { + const int nodeId = static_cast(element[n]); + vertexElems[nodeId].push_back( + static_cast(e * nNodes + n + 1)); + } + } + + m_maxElemPerVertex = 0; + for (const auto& ve : vertexElems) + { + if (ve.size() > m_maxElemPerVertex) + m_maxElemPerVertex = static_cast(ve.size()); + } + + m_gpuVelems.resize(m_maxElemPerVertex * m_nbVertices); + { + auto* dst = m_gpuVelems.hostWrite(); + std::memset(dst, 0, m_maxElemPerVertex * m_nbVertices * sizeof(int)); + for (std::size_t v = 0; v < m_nbVertices; ++v) + { + for (std::size_t s = 0; s < vertexElems[v].size(); ++s) + dst[s * m_nbVertices + v] = vertexElems[v][s]; + } + } + + // Allocate intermediate per-element force buffer + m_gpuElementForce.resize(nbElem * nNodes * dim); + + m_gpuDataUploaded = true; +} + +template +void CudaElementLinearSmallStrainFEMForceField::addForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& d_f, + const sofa::DataVecCoord_t& d_x, + const sofa::DataVecDeriv_t& d_v) +{ + if (this->isComponentStateInvalid()) + return; + + if (!m_gpuDataUploaded) + { + ElementLinearSmallStrainFEMForceField::addForce(mparams, d_f, d_x, d_v); + return; + } + + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto nNodes = trait::NumberOfNodesInElement; + constexpr auto dim = trait::spatial_dimensions; + + VecDeriv& f = *d_f.beginEdit(); + const VecCoord& x = d_x.getValue(); + + if (f.size() < x.size()) + f.resize(x.size()); + + auto restPositionAccessor = this->mstate->readRestPositions(); + const VecCoord& x0 = restPositionAccessor.ref(); + + const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); + const auto nbElem = static_cast(elements.size()); + const auto nbVertex = static_cast(x.size()); + + gpu::cuda::ElementLinearSmallStrainFEMForceFieldCuda_addForce( + nbElem, nbVertex, m_maxElemPerVertex, + m_gpuElements.deviceRead(), m_gpuStiffness.deviceRead(), + x.deviceRead(), x0.deviceRead(), + f.deviceWrite(), m_gpuElementForce.deviceWrite(), + m_gpuVelems.deviceRead()); + + d_f.endEdit(); +} + +template +void CudaElementLinearSmallStrainFEMForceField::addDForce( + const sofa::core::MechanicalParams* mparams, + sofa::DataVecDeriv_t& d_df, + const sofa::DataVecDeriv_t& d_dx) +{ + if (this->isComponentStateInvalid()) + return; + + if (!m_gpuDataUploaded) + { + // Fallback to CPU if GPU data not ready + ElementLinearSmallStrainFEMForceField::addDForce(mparams, d_df, d_dx); + return; + } + + using trait = sofa::component::solidmechanics::fem::elastic::trait; + constexpr auto nNodes = trait::NumberOfNodesInElement; + constexpr auto dim = trait::spatial_dimensions; + + VecDeriv& df = *d_df.beginEdit(); + const VecDeriv& dx = d_dx.getValue(); + + if (df.size() < dx.size()) + df.resize(dx.size()); + + const auto kFactor = static_cast( + sofa::core::mechanicalparams::kFactorIncludingRayleighDamping( + mparams, this->rayleighStiffness.getValue())); + + const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology); + const auto nbElem = static_cast(elements.size()); + const auto nbVertex = static_cast(dx.size()); + + gpu::cuda::ElementLinearSmallStrainFEMForceFieldCuda_addDForce( + nbElem, nbVertex, m_maxElemPerVertex, + m_gpuElements.deviceRead(), m_gpuStiffness.deviceRead(), + dx.deviceRead(), df.deviceWrite(), + m_gpuElementForce.deviceWrite(), m_gpuVelems.deviceRead(), + kFactor); + + d_df.endEdit(); +} + +} // namespace sofa::component::solidmechanics::fem::elastic diff --git a/applications/plugins/SofaCUDA/examples/CudaElementCorotationalFEMForceField.scn b/applications/plugins/SofaCUDA/examples/CudaElementCorotationalFEMForceField.scn new file mode 100644 index 00000000000..7ecf5da0d41 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/CudaElementCorotationalFEMForceField.scn @@ -0,0 +1,102 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/CudaElementLinearSmallStrainFEMForceField.scn b/applications/plugins/SofaCUDA/examples/CudaElementLinearSmallStrainFEMForceField.scn new file mode 100644 index 00000000000..39a5017048d --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/CudaElementLinearSmallStrainFEMForceField.scn @@ -0,0 +1,101 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_hexa.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_hexa.scn new file mode 100644 index 00000000000..a08c65e0024 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_hexa.scn @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_quad.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_quad.scn new file mode 100644 index 00000000000..047a96624f9 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_quad.scn @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_tetra.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_tetra.scn new file mode 100644 index 00000000000..57a39e63286 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_tetra.scn @@ -0,0 +1,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_triangle.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_triangle.scn new file mode 100644 index 00000000000..0a47b7e393d --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_triangle.scn @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_hexa.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_hexa.scn new file mode 100644 index 00000000000..228e4a08943 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_hexa.scn @@ -0,0 +1,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_quad.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_quad.scn new file mode 100644 index 00000000000..14babc5d207 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_quad.scn @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_tetra.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_tetra.scn new file mode 100644 index 00000000000..eac98ff93ac --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_tetra.scn @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_triangle.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_triangle.scn new file mode 100644 index 00000000000..03c6955697c --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_triangle.scn @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py new file mode 100644 index 00000000000..aec2063fbe6 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py @@ -0,0 +1,93 @@ +import Sofa + +import os +import numpy as np +from utilities import generate_regular_grid + +g_grid_min_corner=(0, 6, -2) +g_grid_max_corner=(16, 10, 2) + +g_fem_version = os.environ.get('FEM_VERSION', 'new') #either 'new' or 'legacy' +g_fem_template = os.environ.get('FEM_TEMPLATE', 'Vec3d') + +# default is (76, 16, 16) +g_grid_nx = int(os.environ.get('NX', '76')) +g_grid_ny = int(os.environ.get('NY', '16')) +g_grid_nz = int(os.environ.get('NZ', '16')) + +g_nb_steps = int(os.environ.get('NBSTEPS', '1000')) + +def createScene(root_node): + root_node.name = "root" + root_node.gravity = (0, -9, 0) + root_node.dt = 0.01 + + plugin_node = root_node.addChild('Plugins') + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Engine.Select") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.LinearSolver.Iterative") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.ODESolver.Backward") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.StateContainer") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Dynamic") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Grid") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Visual") + plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Constraint.Projective') # Needed to use components [FixedProjectiveConstraint] + plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Mass') # Needed to use components [DiagonalMass] + plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.SolidMechanics.FEM.Elastic') # Needed to use components [HexahedronCorotationalFEMForceField] + plugin_node.addObject('RequiredPlugin', pluginName='SofaCUDA.Component') + plugin_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields") + + root_node.addObject('DefaultAnimationLoop') + root_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields") + + grid_nodes, grid_hexa = generate_regular_grid(nx=g_grid_nx, ny=g_grid_ny, nz=g_grid_nz, min_corner=g_grid_min_corner, max_corner=g_grid_max_corner) + + hexahedron_node = root_node.addChild('Hexahedron') + hexahedron_node.addObject('EulerImplicitSolver', rayleighStiffness="0.1", rayleighMass="0.1") + hexahedron_node.addObject('CGLinearSolver', iterations="250", name="linear_solver", tolerance="1.0e-12", threshold="1.0e-12") + hexahedron_node.addObject('MechanicalObject', name="ms", template=g_fem_template, position=grid_nodes) + hexahedron_node.addObject('HexahedronSetTopologyContainer', hexahedra=grid_hexa) + hexahedron_node.addObject('DiagonalMass', totalMass="50.0") + hexahedron_node.addObject('BoxROI', name="boxroi1", box="-0.1 5 -3 0.1 11 3", drawBoxes="1") + hexahedron_node.addObject('FixedProjectiveConstraint', indices="@boxroi1.indices") + if g_fem_version == "legacy": + hexahedron_node.addObject('HexahedronFEMForceField', name="LegacyFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3", method="large") + if g_fem_version == "new": + hexahedron_node.addObject('HexahedronCorotationalFEMForceField', name="NewFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3") + +def main(): + + enable_gui = False + + try: + import Sofa.Gui + import SofaImGui + except: + enable_gui = False + + root = Sofa.Core.Node("root") + createScene(root) + + Sofa.Simulation.initRoot(root) + + if enable_gui: + Sofa.Gui.GUIManager.Init("myscene","imgui") + Sofa.Gui.GUIManager.createGUI(root, __file__) + Sofa.Gui.GUIManager.MainLoop(root) + Sofa.Gui.GUIManager.closeGUI() + else: + import time + + print(f"Running on {g_nb_steps} steps...") + start_timer = time.time() + + for iteration in range(g_nb_steps): + Sofa.Simulation.animate(root, root.dt.value) + + stop_timer = time.time() + print(f"... Done.") + print(f"{g_nb_steps} steps done in {stop_timer - start_timer:.3}s ({g_nb_steps/(stop_timer - start_timer):.5} fps).") + + +# Function used only if this script is called from a python environment +if __name__ == '__main__': + main() diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py.view b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py.view new file mode 100644 index 00000000000..1e9c7f6670c --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py.view @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py new file mode 100644 index 00000000000..d9480c34dfa --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py @@ -0,0 +1,94 @@ +import Sofa + +import os +import numpy as np +from utilities import generate_regular_grid, hexa_to_tetra + +g_grid_min_corner=(0, 6, -2) +g_grid_max_corner=(16, 10, 2) + +g_fem_version = os.environ.get('FEM_VERSION', 'new') #either 'new' or 'legacy' +g_fem_template = os.environ.get('FEM_TEMPLATE', 'Vec3d') + +# default is (76, 16, 16) +g_grid_nx = int(os.environ.get('NX', '76')) +g_grid_ny = int(os.environ.get('NY', '16')) +g_grid_nz = int(os.environ.get('NZ', '16')) + +g_nb_steps = int(os.environ.get('NBSTEPS', '1000')) + +def createScene(root_node): + root_node.name = "root" + root_node.gravity = (0, -9, 0) + root_node.dt = 0.01 + + plugin_node = root_node.addChild('Plugins') + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Engine.Select") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.LinearSolver.Iterative") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.ODESolver.Backward") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.StateContainer") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Dynamic") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Grid") + plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Visual") + plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Constraint.Projective') # Needed to use components [FixedProjectiveConstraint] + plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Mass') # Needed to use components [DiagonalMass] + plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.SolidMechanics.FEM.Elastic') # Needed to use components [TetrahedronCorotationalFEMForceField] + plugin_node.addObject('RequiredPlugin', pluginName='SofaCUDA.Component') + plugin_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields") + + root_node.addObject('DefaultAnimationLoop') + root_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields") + + grid_nodes, grid_hexa = generate_regular_grid(nx=g_grid_nx, ny=g_grid_ny, nz=g_grid_nz, min_corner=g_grid_min_corner, max_corner=g_grid_max_corner) + grid_tetra = hexa_to_tetra(grid_hexa) + + tetrahedron_node = root_node.addChild('Tetrahedron') + tetrahedron_node.addObject('EulerImplicitSolver', rayleighStiffness="0.1", rayleighMass="0.1") + tetrahedron_node.addObject('CGLinearSolver', iterations="250", name="linear_solver", tolerance="1.0e-12", threshold="1.0e-12") + tetrahedron_node.addObject('MechanicalObject', name="ms", template=g_fem_template, position=grid_nodes) + tetrahedron_node.addObject('TetrahedronSetTopologyContainer', tetrahedra=grid_tetra) + tetrahedron_node.addObject('DiagonalMass', totalMass="50.0") + tetrahedron_node.addObject('BoxROI', name="boxroi1", box="-0.1 5 -3 0.1 11 3", drawBoxes="1") + tetrahedron_node.addObject('FixedProjectiveConstraint', indices="@boxroi1.indices") + if g_fem_version == "legacy": + tetrahedron_node.addObject('TetrahedronFEMForceField', name="LegacyFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3", method="large") + if g_fem_version == "new": + tetrahedron_node.addObject('TetrahedronCorotationalFEMForceField', name="NewFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3") + +def main(): + + enable_gui = False + + try: + import Sofa.Gui + import SofaImGui + except: + enable_gui = False + + root = Sofa.Core.Node("root") + createScene(root) + + Sofa.Simulation.initRoot(root) + + if enable_gui: + Sofa.Gui.GUIManager.Init("myscene","imgui") + Sofa.Gui.GUIManager.createGUI(root, __file__) + Sofa.Gui.GUIManager.MainLoop(root) + Sofa.Gui.GUIManager.closeGUI() + else: + import time + + print(f"Running on {g_nb_steps} steps...") + start_timer = time.time() + + for iteration in range(g_nb_steps): + Sofa.Simulation.animate(root, root.dt.value) + + stop_timer = time.time() + print(f"... Done.") + print(f"{g_nb_steps} steps done in {stop_timer - start_timer:.3}s ({g_nb_steps/(stop_timer - start_timer):.5} fps).") + + +# Function used only if this script is called from a python environment +if __name__ == '__main__': + main() diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py.view b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py.view new file mode 100644 index 00000000000..433112afafd --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py.view @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/utilities.py b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/utilities.py new file mode 100644 index 00000000000..3bdf301c641 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/utilities.py @@ -0,0 +1,114 @@ +import numpy as np + +def generate_regular_grid(nx=10, ny=10, nz=10, min_corner=(0, 0, 0), max_corner=(1, 1, 1)): + """ + Generate a regular grid of hexahedra. + + Args: + nx, ny, nz: Number of vertices in each direction (grid resolution) + min_corner: (xmin, ymin, zmin) tuple + max_corner: (xmax, ymax, zmax) tuple + + Returns: + points: array of shape (nx*ny*nz, 3) - vertex positions + hexahedra: array of shape ((nx-1)*(ny-1)*(nz-1), 8) - hexahedra indices + """ + xmin, ymin, zmin = min_corner + xmax, ymax, zmax = max_corner + + # Compute spacing + dx = (xmax - xmin) / (nx - 1) if nx > 1 else 0 + dy = (ymax - ymin) / (ny - 1) if ny > 1 else 0 + dz = (zmax - zmin) / (nz - 1) if nz > 1 else 0 + + # Generate points + points = [] + for k in range(nz): + for j in range(ny): + for i in range(nx): + points.append([xmin + i*dx, ymin + j*dy, zmin + k*dz]) + points = np.array(points) + + # Helper to get point index from grid coordinates + def point_index(i, j, k): + return nx * (ny * k + j) + i + + # Generate hexahedra (8 vertices per hexa, in SOFA convention) + hexahedra = [] + for k in range(nz - 1): + for j in range(ny - 1): + for i in range(nx - 1): + hexa = [ + point_index(i, j, k), + point_index(i+1, j, k), + point_index(i+1, j+1, k), + point_index(i, j+1, k), + point_index(i, j, k+1), + point_index(i+1, j, k+1), + point_index(i+1, j+1, k+1), + point_index(i, j+1, k+1), + ] + hexahedra.append(hexa) + hexahedra = np.array(hexahedra) + + return points, hexahedra + +def hexa_to_tetra(hexahedra): + """ + Convert hexahedra to tetrahedra. + + Each hexahedron is split into 5 tetrahedra. + + Args: + hexahedra: array of shape (N, 8) - hexahedra vertex indices + + Returns: + tetrahedra: array of shape (N*5, 4) - tetrahedra vertex indices + """ + tetrahedra = [] + + # 5-tetra decomposition using diagonal 1-3-4-6 + splits = [ + [0, 1, 3, 4], + [1, 2, 3, 6], + [1, 4, 5, 6], + [3, 4, 6, 7], + [1, 3, 4, 6], # central tetrahedron + ] + + for hexa in hexahedra: + for split in splits: + tetrahedra.append([hexa[i] for i in split]) + + return np.array(tetrahedra) + +def hexa_to_tetra_symmetric(hexahedra): + """ + Convert hexahedra to tetrahedra using symmetric 6-tetra decomposition. + + Each hexahedron is split into 6 tetrahedra around the space diagonal (0-6). + Better symmetry properties for FEM simulations. + + Args: + hexahedra: array of shape (N, 8) - hexahedra vertex indices + + Returns: + tetrahedra: array of shape (N*6, 4) - tetrahedra vertex indices + """ + tetrahedra = [] + + # 6-tetra symmetric decomposition around diagonal 0-6 + splits = [ + [0, 1, 2, 6], + [0, 2, 3, 6], + [0, 3, 7, 6], + [0, 7, 4, 6], + [0, 4, 5, 6], + [0, 5, 1, 6], + ] + + for hexa in hexahedra: + for split in splits: + tetrahedra.append([hexa[i] for i in split]) + + return np.array(tetrahedra) \ No newline at end of file diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn new file mode 100644 index 00000000000..6875f9a8849 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn @@ -0,0 +1,95 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn.view b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn.view new file mode 100644 index 00000000000..3bf10e74929 --- /dev/null +++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn.view @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml b/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml index 4e1fab99d9e..2133c327c4a 100644 --- a/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml +++ b/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml @@ -27,7 +27,7 @@ - +