diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h
index 1a980373a5e..26b4ffadd93 100644
--- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h
+++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.h
@@ -55,6 +55,10 @@ class BaseElementLinearFEMForceField : public sofa::component::solidmechanics::f
     using StrainDisplacement = typename trait::StrainDisplacement;
     using Real = typename trait::Real;
 
+public:
+    using AssembledStiffnessMatrix = sofa::type::Mat<
+        trait::NumberOfDofsInElement, trait::NumberOfDofsInElement, Real>;
+
 protected:
 
     BaseElementLinearFEMForceField();
@@ -70,6 +74,14 @@ class BaseElementLinearFEMForceField : public sofa::component::solidmechanics::f
      * List of precomputed element stiffness matrices
      */
     sofa::Data<sofa::type::vector<ElementStiffness> > d_elementStiffness;
+
+    /**
+     * Contiguous buffer of assembled stiffness matrices (one per element).
+     * Extracted from d_elementStiffness for cache-friendly access in the hot path.
+     * This avoids loading the full FactorizedElementStiffness struct (~15 KB per element)
+     * when only the assembled matrix (~4.6 KB) is needed.
+     */
+    sofa::type::vector<AssembledStiffnessMatrix> m_assembledStiffnessMatrices;
 };
 
 #if !defined(ELASTICITY_COMPONENT_BASE_ELEMENT_LINEAR_FEM_FORCEFIELD_CPP)
diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl
index de380c5e4cc..e7827cfc55e 100644
--- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl
+++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/BaseElementLinearFEMForceField.inl
@@ -100,6 +100,13 @@ void BaseElementLinearFEMForceField<DataTypes, ElementType>::precomputeElementSt
             const std::array<sofa::Coord_t<DataTypes>, trait::NumberOfNodesInElement> nodesCoordinates = extractNodesVectorFromGlobalVector(element, restPositionAccessor.ref());
             elementStiffness[elementId] = integrate<DataTypes, ElementType, trait::matrixVectorProductType>(nodesCoordinates, elasticityTensor);
         });
+
+    // Extract assembled matrices into a contiguous buffer for cache-friendly access
+    m_assembledStiffnessMatrices.resize(elements.size());
+    for (std::size_t i = 0; i < elements.size(); ++i)
+    {
+        m_assembledStiffnessMatrices[i] = elementStiffness[i].getAssembledMatrix();
+    }
 }
 
 }
diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl
index a3a74a36a81..0298f2e4247 100644
--- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl
+++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl
@@ -86,7 +86,7 @@ void ElementCorotationalFEMForceField<DataTypes, ElementType>::computeElementsFo
 {
     const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
     auto restPositionAccessor = this->mstate->readRestPositions();
-    auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness);
+    const auto& assembledMatrices = this->m_assembledStiffnessMatrices;
 
     for (std::size_t elementId = range.start; elementId < range.end; ++elementId)
     {
@@ -112,7 +112,7 @@ void ElementCorotationalFEMForceField<DataTypes, ElementType>::computeElementsFo
             transformedDisplacement = elementRotation.multTranspose(elementNodesCoordinates[j] - t) - (restElementNodesCoordinates[j] - t0);
         }
 
-        const auto& stiffnessMatrix = elementStiffness[elementId];
+        const auto& stiffnessMatrix = assembledMatrices[elementId];
 
         auto& elementForce = elementForces[elementId];
         elementForce = stiffnessMatrix * displacement;
@@ -134,7 +134,7 @@ void ElementCorotationalFEMForceField<DataTypes, ElementType>::computeElementsFo
     const sofa::VecDeriv_t<DataTypes>& nodeDx)
 {
     const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
-    auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness);
+    const auto& assembledMatrices = this->m_assembledStiffnessMatrices;
 
     for (std::size_t elementId = range.start; elementId < range.end; ++elementId)
     {
@@ -150,7 +150,7 @@ void ElementCorotationalFEMForceField<DataTypes, ElementType>::computeElementsFo
             rotated_dx = elementRotation.multTranspose(nodeDx[element[n]]);
         }
 
-        const auto& stiffnessMatrix = elementStiffness[elementId];
+        const auto& stiffnessMatrix = assembledMatrices[elementId];
 
         auto& df = elementForcesDeriv[elementId];
         df = stiffnessMatrix * element_dx;
diff --git a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl
index fc58db7abe4..191de07c12c 100644
--- a/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl
+++ b/Sofa/Component/SolidMechanics/FEM/Elastic/src/sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl
@@ -49,12 +49,12 @@ void ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::computeEleme
 {
     const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
     auto restPositionAccessor = this->mstate->readRestPositions();
-    auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness);
+    const auto& assembledMatrices = this->m_assembledStiffnessMatrices;
 
     for (std::size_t elementId = range.start; elementId < range.end; ++elementId)
     {
         const auto& element = elements[elementId];
-        const auto& stiffnessMatrix = elementStiffness[elementId];
+        const auto& stiffnessMatrix = assembledMatrices[elementId];
 
         typename trait::ElementDisplacement displacement{ sofa::type::NOINIT };
 
@@ -79,12 +79,12 @@ void ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::computeEleme
     const sofa::VecDeriv_t<DataTypes>& nodeDx)
 {
     const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
-    auto elementStiffness = sofa::helper::getReadAccessor(this->d_elementStiffness);
+    const auto& assembledMatrices = this->m_assembledStiffnessMatrices;
 
     for (std::size_t elementId = range.start; elementId < range.end; ++elementId)
     {
         const auto& element = elements[elementId];
-        const auto& stiffnessMatrix = elementStiffness[elementId];
+        const auto& stiffnessMatrix = assembledMatrices[elementId];
 
         const std::array<sofa::Coord_t<DataTypes>, trait::NumberOfNodesInElement> elementNodesDx =
             extractNodesVectorFromGlobalVector(element, nodeDx);
diff --git a/applications/plugins/SofaCUDA/Component/CMakeLists.txt b/applications/plugins/SofaCUDA/Component/CMakeLists.txt
index 77ce7a8b2be..ce4d885c90b 100644
--- a/applications/plugins/SofaCUDA/Component/CMakeLists.txt
+++ b/applications/plugins/SofaCUDA/Component/CMakeLists.txt
@@ -39,6 +39,11 @@ set(HEADER_FILES
     
 
     ### solidmechanics
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.h
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.inl
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/hyperelastic/CudaStandardTetrahedralFEMForceField.h
@@ -111,6 +116,8 @@ set(SOURCE_FILES
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/mass/CudaUniformMass.cpp
 
     ### Solidmechanics
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.cpp
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/hyperelastic/CudaStandardTetrahedralFEMForceField.cpp
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/tensormass/CudaTetrahedralTensorMassForceField.cpp
@@ -181,6 +188,8 @@ set(CUDA_SOURCES
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/mass/CudaUniformMass.cu
 
     ### solidmechanics
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu
+    ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/elastic/CudaHexahedronFEMForceField.cu
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/fem/hyperelastic/CudaStandardTetrahedralFEMForceField.cu
     ${SOFACUDA_COMPONENT_SOURCE_DIR}/SofaCUDA/component/solidmechanics/tensormass/CudaTetrahedralTensorMassForceField.cu
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp
index f9c124998fb..c6b0ee6b438 100644
--- a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/init.cpp
@@ -90,6 +90,8 @@ extern void registerPlaneForceField(sofa::core::ObjectFactory* factory);
 extern void registerSphereForceField(sofa::core::ObjectFactory* factory);
 
 // component::solidmechanics::fem::elastic
+extern void registerElementCorotationalFEMForceField(sofa::core::ObjectFactory* factory);
+extern void registerElementLinearSmallStrainFEMForceField(sofa::core::ObjectFactory* factory);
 extern void registerHexahedronFEMForceField(sofa::core::ObjectFactory* factory);
 extern void registerTetrahedronFEMForceField(sofa::core::ObjectFactory* factory);
 extern void registerTriangularFEMForceFieldOptim(sofa::core::ObjectFactory* factory);
@@ -224,6 +226,8 @@ void registerObjects(sofa::core::ObjectFactory* factory)
     registerLinearForceField(factory);
     registerPlaneForceField(factory);
     registerSphereForceField(factory);
+    registerElementCorotationalFEMForceField(factory);
+    registerElementLinearSmallStrainFEMForceField(factory);
     registerHexahedronFEMForceField(factory);
     registerTetrahedronFEMForceField(factory);
     registerTriangularFEMForceFieldOptim(factory);
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp
new file mode 100644
index 00000000000..5cd43daa6d2
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cpp
@@ -0,0 +1,101 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#include <SofaCUDA/component/config.h>
+
+#include <sofa/gpu/cuda/CudaTypes.h>
+#include <SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl>
+#include <sofa/core/ObjectFactory.h>
+
+namespace sofa::component::solidmechanics::fem::elastic
+{
+
+using namespace sofa::gpu::cuda;
+
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Edge>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Triangle>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Quad>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Tetrahedron>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Hexahedron>;
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Edge>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Triangle>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Quad>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Tetrahedron>;
+template class SOFACUDA_COMPONENT_API CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Hexahedron>;
+#endif
+
+} // namespace sofa::component::solidmechanics::fem::elastic
+
+namespace sofa::gpu::cuda
+{
+
+void registerElementCorotationalFEMForceField(sofa::core::ObjectFactory* factory)
+{
+    using namespace sofa::component::solidmechanics::fem::elastic;
+
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for EdgeCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Edge> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for TriangleCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Triangle> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for QuadCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Quad> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for TetrahedronCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Tetrahedron> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for HexahedronCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3fTypes, sofa::geometry::Hexahedron> >()
+    );
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for EdgeCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Edge> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for TriangleCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Triangle> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for QuadCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Quad> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for TetrahedronCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Tetrahedron> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for HexahedronCorotationalFEMForceField")
+        .add< CudaElementCorotationalFEMForceField<CudaVec3dTypes, sofa::geometry::Hexahedron> >()
+    );
+#endif
+}
+
+} // namespace sofa::gpu::cuda
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu
new file mode 100644
index 00000000000..6616637dc55
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.cu
@@ -0,0 +1,334 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#include <sofa/gpu/cuda/CudaCommon.h>
+#include <sofa/gpu/cuda/CudaMath.h>
+#include "CudaElementFEMKernelUtils.cuh"
+
+namespace sofa::gpu::cuda
+{
+
+/**
+ * Combined kernel: compute rotations AND per-element forces in one pass.
+ */
+template<typename T, int NNodes, int Dim>
+__global__ void ElementCorotationalFEMForceField_computeRotationsAndForce_kernel(
+    int nbElem,
+    const int* __restrict__ elements,
+    const T* __restrict__ initRotTransposed,
+    const T* __restrict__ stiffness,
+    const T* __restrict__ x,
+    const T* __restrict__ x0,
+    T* __restrict__ rotationsOut,
+    T* __restrict__ eforce)
+{
+    static_assert(Dim == 3, "Corotational rotation computation requires Dim == 3");
+    constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2;
+
+    const int elemId = blockIdx.x * blockDim.x + threadIdx.x;
+    if (elemId >= nbElem) return;
+
+    // Gather element positions
+    T ex[NNodes * Dim], ex0[NNodes * Dim];
+    gatherElementData<T, NNodes, Dim>(elements, nbElem, elemId, x, ex);
+    gatherElementData<T, NNodes, Dim>(elements, nbElem, elemId, x0, ex0);
+
+    // Compute rotation frame from current positions
+    T frame[Dim * Dim];
+    if constexpr (NNodes == 8)
+        computeHexahedronFrame(ex, frame);
+    else
+        computeTriangleFrame(ex, frame);
+
+    // R = frame^T * initRotTransposed
+    const T* irt = initRotTransposed + elemId * Dim * Dim;
+    T R[Dim * Dim];
+    mat3TransposeMul(frame, irt, R);
+
+    // Store rotation for later use
+    T* Rout = rotationsOut + elemId * Dim * Dim;
+    #pragma unroll
+    for (int i = 0; i < Dim * Dim; ++i)
+        Rout[i] = R[i];
+
+    // Compute element centers
+    T center[Dim], center0[Dim];
+    computeElementCenter<T, NNodes, Dim>(ex, center);
+    computeElementCenter<T, NNodes, Dim>(ex0, center0);
+
+    // Compute corotational displacement
+    T disp[NNodes * Dim];
+    computeCorotationalDisplacement<T, NNodes, Dim>(R, ex, ex0, center, center0, disp);
+
+    // Multiply by stiffness matrix
+    T edf[NNodes * Dim];
+    const T* K = stiffness + elemId * NSymBlocks * Dim * Dim;
+    symBlockMatMul<T, NNodes, Dim>(K, disp, edf);
+
+    // Rotate forces back to global frame and negate
+    T* out = eforce + elemId * NNodes * Dim;
+    rotateAndWriteForce<T, NNodes, Dim>(R, edf, out, T(-1));
+}
+
+/**
+ * Kernel for addForce with pre-computed rotations.
+ */
+template<typename T, int NNodes, int Dim>
+__global__ void ElementCorotationalFEMForceField_computeForce_kernel(
+    int nbElem,
+    const int* __restrict__ elements,
+    const T* __restrict__ rotations,
+    const T* __restrict__ stiffness,
+    const T* __restrict__ x,
+    const T* __restrict__ x0,
+    T* __restrict__ eforce)
+{
+    constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2;
+
+    const int elemId = blockIdx.x * blockDim.x + threadIdx.x;
+    if (elemId >= nbElem) return;
+
+    // Load rotation matrix
+    const T* Rptr = rotations + elemId * Dim * Dim;
+    T R[Dim * Dim];
+    #pragma unroll
+    for (int i = 0; i < Dim * Dim; ++i)
+        R[i] = Rptr[i];
+
+    // Gather element positions
+    T ex[NNodes * Dim], ex0[NNodes * Dim];
+    gatherElementData<T, NNodes, Dim>(elements, nbElem, elemId, x, ex);
+    gatherElementData<T, NNodes, Dim>(elements, nbElem, elemId, x0, ex0);
+
+    // Compute element centers
+    T center[Dim], center0[Dim];
+    computeElementCenter<T, NNodes, Dim>(ex, center);
+    computeElementCenter<T, NNodes, Dim>(ex0, center0);
+
+    // Compute corotational displacement
+    T disp[NNodes * Dim];
+    computeCorotationalDisplacement<T, NNodes, Dim>(R, ex, ex0, center, center0, disp);
+
+    // Multiply by stiffness matrix
+    T edf[NNodes * Dim];
+    const T* K = stiffness + elemId * NSymBlocks * Dim * Dim;
+    symBlockMatMul<T, NNodes, Dim>(K, disp, edf);
+
+    // Rotate forces back to global frame and negate
+    T* out = eforce + elemId * NNodes * Dim;
+    rotateAndWriteForce<T, NNodes, Dim>(R, edf, out, T(-1));
+}
+
+/**
+ * Kernel for addDForce.
+ */
+template<typename T, int NNodes, int Dim>
+__global__ void ElementCorotationalFEMForceField_computeDForce_kernel(
+    int nbElem,
+    const int* __restrict__ elements,
+    const T* __restrict__ rotations,
+    const T* __restrict__ stiffness,
+    const T* __restrict__ dx,
+    T* __restrict__ eforce,
+    T kFactor)
+{
+    constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2;
+
+    const int elemId = blockIdx.x * blockDim.x + threadIdx.x;
+    if (elemId >= nbElem) return;
+
+    // Load rotation matrix
+    const T* Rptr = rotations + elemId * Dim * Dim;
+    T R[Dim * Dim];
+    #pragma unroll
+    for (int i = 0; i < Dim * Dim; ++i)
+        R[i] = Rptr[i];
+
+    // Gather and rotate displacement: rdx = R^T * dx
+    T rdx[NNodes * Dim];
+    rotateDisplacementTranspose<T, NNodes, Dim>(R, elements, nbElem, elemId, dx, rdx);
+
+    // Multiply by stiffness matrix
+    const T* K = stiffness + elemId * NSymBlocks * Dim * Dim;
+    T edf[NNodes * Dim];
+    symBlockMatMul<T, NNodes, Dim>(K, rdx, edf);
+
+    // Rotate forces back to global frame and scale
+    T* out = eforce + elemId * NNodes * Dim;
+    rotateAndWriteForce<T, NNodes, Dim>(R, edf, out, -kFactor);
+}
+
+// ===================== Launch functions =====================
+
+template<typename T, int NNodes, int Dim>
+void ElementCorotationalFEMForceFieldCuda_addForceWithRotations(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* initRotTransposed,
+    const void* stiffness,
+    const void* x,
+    const void* x0,
+    void* f,
+    void* eforce,
+    void* rotationsOut,
+    const void* velems)
+{
+    const int computeThreads = 64;
+    int numBlocks = (nbElem + computeThreads - 1) / computeThreads;
+    ElementCorotationalFEMForceField_computeRotationsAndForce_kernel<T, NNodes, Dim>
+        <<<numBlocks, computeThreads>>>(
+            nbElem,
+            (const int*)elements,
+            (const T*)initRotTransposed,
+            (const T*)stiffness,
+            (const T*)x,
+            (const T*)x0,
+            (T*)rotationsOut,
+            (T*)eforce);
+    mycudaDebugError("ElementCorotationalFEMForceField_computeRotationsAndForce_kernel");
+
+    const int gatherThreads = 256;
+    numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads;
+    ElementFEM_gatherForce_kernel<T, Dim>
+        <<<numBlocks, gatherThreads>>>(
+            nbVertex,
+            maxElemPerVertex,
+            (const int*)velems,
+            (const T*)eforce,
+            (T*)f);
+    mycudaDebugError("ElementFEM_gatherForce_kernel");
+}
+
+template<typename T, int NNodes, int Dim>
+void ElementCorotationalFEMForceFieldCuda_addForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* rotations,
+    const void* stiffness,
+    const void* x,
+    const void* x0,
+    void* f,
+    void* eforce,
+    const void* velems)
+{
+    const int computeThreads = 64;
+    int numBlocks = (nbElem + computeThreads - 1) / computeThreads;
+    ElementCorotationalFEMForceField_computeForce_kernel<T, NNodes, Dim>
+        <<<numBlocks, computeThreads>>>(
+            nbElem,
+            (const int*)elements,
+            (const T*)rotations,
+            (const T*)stiffness,
+            (const T*)x,
+            (const T*)x0,
+            (T*)eforce);
+    mycudaDebugError("ElementCorotationalFEMForceField_computeForce_kernel");
+
+    const int gatherThreads = 256;
+    numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads;
+    ElementFEM_gatherForce_kernel<T, Dim>
+        <<<numBlocks, gatherThreads>>>(
+            nbVertex,
+            maxElemPerVertex,
+            (const int*)velems,
+            (const T*)eforce,
+            (T*)f);
+    mycudaDebugError("ElementFEM_gatherForce_kernel");
+}
+
+template<typename T, int NNodes, int Dim>
+void ElementCorotationalFEMForceFieldCuda_addDForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* rotations,
+    const void* stiffness,
+    const void* dx,
+    void* df,
+    void* eforce,
+    const void* velems,
+    T kFactor)
+{
+    const int computeThreads = 64;
+    int numBlocks = (nbElem + computeThreads - 1) / computeThreads;
+    ElementCorotationalFEMForceField_computeDForce_kernel<T, NNodes, Dim>
+        <<<numBlocks, computeThreads>>>(
+            nbElem,
+            (const int*)elements,
+            (const T*)rotations,
+            (const T*)stiffness,
+            (const T*)dx,
+            (T*)eforce,
+            kFactor);
+    mycudaDebugError("ElementCorotationalFEMForceField_computeDForce_kernel");
+
+    const int gatherThreads = 256;
+    numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads;
+    ElementFEM_gatherForce_kernel<T, Dim>
+        <<<numBlocks, gatherThreads>>>(
+            nbVertex,
+            maxElemPerVertex,
+            (const int*)velems,
+            (const T*)eforce,
+            (T*)df);
+    mycudaDebugError("ElementFEM_gatherForce_kernel");
+}
+
+// ===================== Explicit template instantiations =====================
+
+#define INSTANTIATE_COROTATIONAL(T, NNodes) \
+    template void ElementCorotationalFEMForceFieldCuda_addForce<T, NNodes, 3>( \
+        unsigned int, unsigned int, unsigned int, const void*, const void*, \
+        const void*, const void*, const void*, void*, void*, const void*); \
+    template void ElementCorotationalFEMForceFieldCuda_addDForce<T, NNodes, 3>( \
+        unsigned int, unsigned int, unsigned int, const void*, const void*, \
+        const void*, const void*, void*, void*, const void*, T);
+
+#define INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(T, NNodes) \
+    template void ElementCorotationalFEMForceFieldCuda_addForceWithRotations<T, NNodes, 3>( \
+        unsigned int, unsigned int, unsigned int, const void*, const void*, \
+        const void*, const void*, const void*, void*, void*, void*, const void*);
+
+INSTANTIATE_COROTATIONAL(float, 2)
+INSTANTIATE_COROTATIONAL(float, 3)
+INSTANTIATE_COROTATIONAL(float, 4)
+INSTANTIATE_COROTATIONAL(float, 8)
+INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(float, 3)
+INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(float, 4)
+INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(float, 8)
+
+INSTANTIATE_COROTATIONAL(double, 2)
+INSTANTIATE_COROTATIONAL(double, 3)
+INSTANTIATE_COROTATIONAL(double, 4)
+INSTANTIATE_COROTATIONAL(double, 8)
+INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(double, 3)
+INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(double, 4)
+INSTANTIATE_COROTATIONAL_WITH_ROTATIONS(double, 8)
+
+#undef INSTANTIATE_COROTATIONAL
+#undef INSTANTIATE_COROTATIONAL_WITH_ROTATIONS
+
+} // namespace sofa::gpu::cuda
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h
new file mode 100644
index 00000000000..820b4c915a1
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h
@@ -0,0 +1,153 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#pragma once
+
+#include <sofa/gpu/cuda/CudaTypes.h>
+#include <sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.h>
+
+namespace sofa::gpu::cuda
+{
+
+template<typename T, int NNodes, int Dim>
+void ElementCorotationalFEMForceFieldCuda_addForceWithRotations(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* initRotTransposed,
+    const void* stiffness,
+    const void* x,
+    const void* x0,
+    void* f,
+    void* eforce,
+    void* rotationsOut,
+    const void* velems);
+
+template<typename T, int NNodes, int Dim>
+void ElementCorotationalFEMForceFieldCuda_addForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* rotations,
+    const void* stiffness,
+    const void* x,
+    const void* x0,
+    void* f,
+    void* eforce,
+    const void* velems);
+
+template<typename T, int NNodes, int Dim>
+void ElementCorotationalFEMForceFieldCuda_addDForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* rotations,
+    const void* stiffness,
+    const void* dx,
+    void* df,
+    void* eforce,
+    const void* velems,
+    T kFactor);
+
+} // namespace sofa::gpu::cuda
+
+namespace sofa::component::solidmechanics::fem::elastic
+{
+
+/**
+ * CUDA-accelerated version of ElementCorotationalFEMForceField.
+ *
+ * Works with any element type (Edge, Triangle, Quad, Tetrahedron, Hexahedron).
+ * The addDForce method (the CG hot path, called ~250 times per timestep) runs entirely on GPU.
+ * The addForce method delegates to the CPU parent and uploads rotations to GPU afterwards.
+ *
+ * Uses a two-kernel approach for addDForce:
+ *   Kernel 1: compute per-element forces (1 thread/element, fully unrolled)
+ *   Kernel 2: gather per-vertex (1 thread/vertex, no atomics)
+ */
+template<class DataTypes, class ElementType>
+class CudaElementCorotationalFEMForceField
+    : public ElementCorotationalFEMForceField<DataTypes, ElementType>
+{
+public:
+    SOFA_CLASS(
+        SOFA_TEMPLATE2(CudaElementCorotationalFEMForceField, DataTypes, ElementType),
+        SOFA_TEMPLATE2(ElementCorotationalFEMForceField, DataTypes, ElementType));
+
+    using Real = sofa::Real_t<DataTypes>;
+    using Coord = sofa::Coord_t<DataTypes>;
+    using Deriv = sofa::Deriv_t<DataTypes>;
+    using VecCoord = sofa::VecCoord_t<DataTypes>;
+    using VecDeriv = sofa::VecDeriv_t<DataTypes>;
+
+    static const std::string GetCustomClassName()
+    {
+        return ElementCorotationalFEMForceField<DataTypes, ElementType>::GetCustomClassName();
+    }
+
+    static const std::string GetCustomTemplateName()
+    {
+        return DataTypes::Name();
+    }
+
+    void init() override;
+
+    void addForce(
+        const sofa::core::MechanicalParams* mparams,
+        sofa::DataVecDeriv_t<DataTypes>& f,
+        const sofa::DataVecCoord_t<DataTypes>& x,
+        const sofa::DataVecDeriv_t<DataTypes>& v) override;
+
+    void addDForce(
+        const sofa::core::MechanicalParams* mparams,
+        sofa::DataVecDeriv_t<DataTypes>& df,
+        const sofa::DataVecDeriv_t<DataTypes>& dx) override;
+
+    void buildStiffnessMatrix(sofa::core::behavior::StiffnessMatrix* matrix) override;
+
+protected:
+
+    CudaElementCorotationalFEMForceField() = default;
+
+    void uploadStiffnessAndConnectivity();
+    void uploadRotations();
+    void uploadInitialRotationsTransposed();
+    void downloadRotations();
+
+    gpu::cuda::CudaVector<Real> m_gpuStiffness;                  ///< Symmetric block-format stiffness per element
+    gpu::cuda::CudaVector<Real> m_gpuRotations;                  ///< Flat 3x3 rotation matrices per element
+    gpu::cuda::CudaVector<Real> m_gpuInitialRotationsTransposed; ///< Flat 3x3 initial rotation transposed per element
+    gpu::cuda::CudaVector<int>  m_gpuElements;                   ///< SoA connectivity: elements[nodeIdx * nbElem + elemId]
+    gpu::cuda::CudaVector<Real> m_gpuElementForce;               ///< Intermediate per-element per-node force buffer
+    gpu::cuda::CudaVector<int>  m_gpuVelems;                     ///< SoA vertex-to-element mapping, 0-terminated
+
+    unsigned int m_maxElemPerVertex = 0;
+    unsigned int m_nbVertices = 0;
+
+    bool m_gpuDataUploaded = false;
+    bool m_gpuRotationsUploaded = false;
+    bool m_gpuRotationMethodSupported = false;
+};
+
+} // namespace sofa::component::solidmechanics::fem::elastic
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl
new file mode 100644
index 00000000000..2359244539e
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.inl
@@ -0,0 +1,345 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#pragma once
+#include <SofaCUDA/component/solidmechanics/fem/elastic/CudaElementCorotationalFEMForceField.h>
+#include <sofa/component/solidmechanics/fem/elastic/ElementCorotationalFEMForceField.inl>
+#include <sofa/core/behavior/ForceField.inl>
+#include <cstring>
+
+namespace sofa::component::solidmechanics::fem::elastic
+{
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::init()
+{
+    ElementCorotationalFEMForceField<DataTypes, ElementType>::init();
+
+    if (!this->isComponentStateInvalid())
+    {
+        uploadStiffnessAndConnectivity();
+        uploadInitialRotationsTransposed();
+    }
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::uploadStiffnessAndConnectivity()
+{
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+
+    if (!this->l_topology) return;
+
+    const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
+    const auto& assembledMatrices = this->m_assembledStiffnessMatrices;
+
+    const auto nbElem = elements.size();
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    // Find number of vertices
+    unsigned int maxNodeId = 0;
+    for (std::size_t e = 0; e < nbElem; ++e)
+    {
+        const auto& element = elements[e];
+        for (unsigned int n = 0; n < nNodes; ++n)
+        {
+            if (static_cast<unsigned int>(element[n]) > maxNodeId)
+                maxNodeId = static_cast<unsigned int>(element[n]);
+        }
+    }
+    m_nbVertices = maxNodeId + 1;
+
+    // Upload stiffness matrices in symmetric upper-triangle block format:
+    // Only blocks (ni, nj) with nj >= ni are stored.
+    // symIdx = ni * nNodes - ni*(ni-1)/2 + (nj - ni)
+    // K[symIdx * dim * dim + di * dim + dj] per element
+    constexpr auto nSymBlocks = nNodes * (nNodes + 1) / 2;
+    m_gpuStiffness.resize(nbElem * nSymBlocks * dim * dim);
+    {
+        auto* dst = m_gpuStiffness.hostWrite();
+        for (std::size_t e = 0; e < nbElem; ++e)
+        {
+            const auto& K = assembledMatrices[e];
+            for (unsigned int ni = 0; ni < nNodes; ++ni)
+            {
+                const unsigned int diagIdx = ni * nNodes - ni * (ni - 1) / 2;
+                for (unsigned int nj = ni; nj < nNodes; ++nj)
+                {
+                    const unsigned int symIdx = diagIdx + (nj - ni);
+                    for (unsigned int di = 0; di < dim; ++di)
+                        for (unsigned int dj = 0; dj < dim; ++dj)
+                            dst[e * nSymBlocks * dim * dim
+                                + symIdx * dim * dim
+                                + di * dim + dj]
+                                = static_cast<Real>(K[ni * dim + di][nj * dim + dj]);
+                }
+            }
+        }
+    }
+
+    // Upload element connectivity in SoA layout:
+    // elements[nodeIdx * nbElem + elemId] = global node index
+    // Adjacent threads access adjacent memory for coalesced reads.
+    m_gpuElements.resize(nNodes * nbElem);
+    {
+        auto* dst = m_gpuElements.hostWrite();
+        for (std::size_t e = 0; e < nbElem; ++e)
+        {
+            const auto& element = elements[e];
+            for (unsigned int n = 0; n < nNodes; ++n)
+                dst[n * nbElem + e] = static_cast<int>(element[n]);
+        }
+    }
+
+    // Build vertex-to-element mapping (velems)
+    // For each vertex, stores the list of (elemId * nNodes + localNode + 1).
+    // 0 is used as sentinel. SoA layout: velems[slot * nbVertex + vertexId].
+    std::vector<std::vector<int>> vertexElems(m_nbVertices);
+    for (std::size_t e = 0; e < nbElem; ++e)
+    {
+        const auto& element = elements[e];
+        for (unsigned int n = 0; n < nNodes; ++n)
+        {
+            const int nodeId = static_cast<int>(element[n]);
+            vertexElems[nodeId].push_back(
+                static_cast<int>(e * nNodes + n + 1));
+        }
+    }
+
+    m_maxElemPerVertex = 0;
+    for (const auto& ve : vertexElems)
+    {
+        if (ve.size() > m_maxElemPerVertex)
+            m_maxElemPerVertex = static_cast<unsigned int>(ve.size());
+    }
+
+    m_gpuVelems.resize(m_maxElemPerVertex * m_nbVertices);
+    {
+        auto* dst = m_gpuVelems.hostWrite();
+        std::memset(dst, 0, m_maxElemPerVertex * m_nbVertices * sizeof(int));
+        for (std::size_t v = 0; v < m_nbVertices; ++v)
+        {
+            for (std::size_t s = 0; s < vertexElems[v].size(); ++s)
+                dst[s * m_nbVertices + v] = vertexElems[v][s];
+        }
+    }
+
+    // Allocate intermediate per-element force buffer
+    m_gpuElementForce.resize(nbElem * nNodes * dim);
+
+    m_gpuDataUploaded = true;
+    m_gpuRotationsUploaded = false;
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::uploadRotations()
+{
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    const auto& rotations = this->m_rotations;
+    const auto nbElem = rotations.size();
+
+    m_gpuRotations.resize(nbElem * dim * dim);
+    {
+        auto* dst = m_gpuRotations.hostWrite();
+        for (std::size_t e = 0; e < nbElem; ++e)
+        {
+            const auto& R = rotations[e];
+            for (unsigned int i = 0; i < dim; ++i)
+                for (unsigned int j = 0; j < dim; ++j)
+                    dst[e * dim * dim + i * dim + j] = static_cast<Real>(R[i][j]);
+        }
+    }
+
+    m_gpuRotationsUploaded = true;
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::uploadInitialRotationsTransposed()
+{
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto dim = trait::spatial_dimensions;
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+
+    const auto& initRotT = this->m_initialRotationsTransposed;
+    const auto nbElem = initRotT.size();
+    if (nbElem == 0) return;
+
+    m_gpuInitialRotationsTransposed.resize(nbElem * dim * dim);
+    m_gpuRotations.resize(nbElem * dim * dim);
+    {
+        auto* dst = m_gpuInitialRotationsTransposed.hostWrite();
+        for (std::size_t e = 0; e < nbElem; ++e)
+        {
+            const auto& R = initRotT[e];
+            for (unsigned int i = 0; i < dim; ++i)
+                for (unsigned int j = 0; j < dim; ++j)
+                    dst[e * dim * dim + i * dim + j] = static_cast<Real>(R[i][j]);
+        }
+    }
+
+    // Check if the rotation method is GPU-compatible
+    const auto rotationMethodKey = this->m_rotationMethods.d_rotationMethod.getValue().key();
+    m_gpuRotationMethodSupported = (nNodes >= 3)
+        && (rotationMethodKey == "triangle" || rotationMethodKey == "hexahedron");
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::downloadRotations()
+{
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    if (!m_gpuRotationsUploaded) return;
+
+    const auto nbElem = m_gpuRotations.size() / (dim * dim);
+    this->m_rotations.resize(nbElem);
+
+    const auto* src = m_gpuRotations.hostRead();
+    for (std::size_t e = 0; e < nbElem; ++e)
+    {
+        auto& R = this->m_rotations[e];
+        for (unsigned int i = 0; i < dim; ++i)
+            for (unsigned int j = 0; j < dim; ++j)
+                R[i][j] = static_cast<Real>(src[e * dim * dim + i * dim + j]);
+    }
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::buildStiffnessMatrix(
+    sofa::core::behavior::StiffnessMatrix* matrix)
+{
+    if (m_gpuRotationMethodSupported && m_gpuRotationsUploaded)
+        downloadRotations();
+
+    ElementCorotationalFEMForceField<DataTypes, ElementType>::buildStiffnessMatrix(matrix);
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::addForce(
+    const sofa::core::MechanicalParams* mparams,
+    sofa::DataVecDeriv_t<DataTypes>& d_f,
+    const sofa::DataVecCoord_t<DataTypes>& d_x,
+    const sofa::DataVecDeriv_t<DataTypes>& d_v)
+{
+    if (this->isComponentStateInvalid())
+        return;
+
+    if (!m_gpuDataUploaded)
+    {
+        ElementCorotationalFEMForceField<DataTypes, ElementType>::addForce(mparams, d_f, d_x, d_v);
+        uploadRotations();
+        return;
+    }
+
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    const VecCoord& x = d_x.getValue();
+    auto restPositionAccessor = this->mstate->readRestPositions();
+    const VecCoord& x0 = restPositionAccessor.ref();
+
+    const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
+    const auto nbElem = static_cast<unsigned int>(elements.size());
+    const auto nbVertex = static_cast<unsigned int>(x.size());
+
+    VecDeriv& f = *d_f.beginEdit();
+    if (f.size() < x.size())
+        f.resize(x.size());
+
+    if constexpr (nNodes >= 3)
+    {
+        if (m_gpuRotationMethodSupported)
+        {
+            gpu::cuda::ElementCorotationalFEMForceFieldCuda_addForceWithRotations<Real, nNodes, dim>(
+                nbElem, nbVertex, m_maxElemPerVertex,
+                m_gpuElements.deviceRead(), m_gpuInitialRotationsTransposed.deviceRead(),
+                m_gpuStiffness.deviceRead(), x.deviceRead(), x0.deviceRead(),
+                f.deviceWrite(), m_gpuElementForce.deviceWrite(),
+                m_gpuRotations.deviceWrite(), m_gpuVelems.deviceRead());
+
+            m_gpuRotationsUploaded = true;
+            d_f.endEdit();
+            return;
+        }
+    }
+
+    // CPU rotations + GPU forces
+    this->computeRotations(this->m_rotations, x, x0);
+    uploadRotations();
+
+    gpu::cuda::ElementCorotationalFEMForceFieldCuda_addForce<Real, nNodes, dim>(
+        nbElem, nbVertex, m_maxElemPerVertex,
+        m_gpuElements.deviceRead(), m_gpuRotations.deviceRead(),
+        m_gpuStiffness.deviceRead(), x.deviceRead(), x0.deviceRead(),
+        f.deviceWrite(), m_gpuElementForce.deviceWrite(),
+        m_gpuVelems.deviceRead());
+
+    d_f.endEdit();
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementCorotationalFEMForceField<DataTypes, ElementType>::addDForce(
+    const sofa::core::MechanicalParams* mparams,
+    sofa::DataVecDeriv_t<DataTypes>& d_df,
+    const sofa::DataVecDeriv_t<DataTypes>& d_dx)
+{
+    if (this->isComponentStateInvalid())
+        return;
+
+    if (!m_gpuDataUploaded || !m_gpuRotationsUploaded)
+    {
+        // Fallback to CPU if GPU data not ready
+        ElementCorotationalFEMForceField<DataTypes, ElementType>::addDForce(mparams, d_df, d_dx);
+        return;
+    }
+
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    VecDeriv& df = *d_df.beginEdit();
+    const VecDeriv& dx = d_dx.getValue();
+
+    if (df.size() < dx.size())
+        df.resize(dx.size());
+
+    const auto kFactor = static_cast<Real>(
+        sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(
+            mparams, this->rayleighStiffness.getValue()));
+
+    const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
+    const auto nbElem = static_cast<unsigned int>(elements.size());
+    const auto nbVertex = static_cast<unsigned int>(dx.size());
+
+    gpu::cuda::ElementCorotationalFEMForceFieldCuda_addDForce<Real, nNodes, dim>(
+        nbElem, nbVertex, m_maxElemPerVertex,
+        m_gpuElements.deviceRead(), m_gpuRotations.deviceRead(),
+        m_gpuStiffness.deviceRead(), dx.deviceRead(),
+        df.deviceWrite(), m_gpuElementForce.deviceWrite(),
+        m_gpuVelems.deviceRead(), kFactor);
+
+    d_df.endEdit();
+}
+
+} // namespace sofa::component::solidmechanics::fem::elastic
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh
new file mode 100644
index 00000000000..4d65c2f7130
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementFEMKernelUtils.cuh
@@ -0,0 +1,474 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#pragma once
+
+#include <cuda.h>
+
+namespace sofa::gpu::cuda
+{
+
+//=============================================================================
+// Math utilities
+//=============================================================================
+
+template<typename T>
+__device__ inline T myRsqrt(T x);
+
+template<>
+__device__ inline float myRsqrt<float>(float x) { return rsqrtf(x); }
+
+template<>
+__device__ inline double myRsqrt<double>(double x) { return rsqrt(x); }
+
+//=============================================================================
+// 3x3 Matrix operations (row-major storage)
+//=============================================================================
+
+/// C = A * B
+template<typename T>
+__device__ inline void mat3Mul(const T* A, const T* B, T* C)
+{
+    #pragma unroll
+    for (int i = 0; i < 3; ++i)
+    {
+        #pragma unroll
+        for (int j = 0; j < 3; ++j)
+        {
+            C[i * 3 + j] = A[i * 3 + 0] * B[0 * 3 + j]
+                         + A[i * 3 + 1] * B[1 * 3 + j]
+                         + A[i * 3 + 2] * B[2 * 3 + j];
+        }
+    }
+}
+
+/// C = A * B^T
+template<typename T>
+__device__ inline void mat3MulTranspose(const T* A, const T* BT, T* C)
+{
+    #pragma unroll
+    for (int i = 0; i < 3; ++i)
+    {
+        #pragma unroll
+        for (int j = 0; j < 3; ++j)
+        {
+            C[i * 3 + j] = A[i * 3 + 0] * BT[j * 3 + 0]
+                         + A[i * 3 + 1] * BT[j * 3 + 1]
+                         + A[i * 3 + 2] * BT[j * 3 + 2];
+        }
+    }
+}
+
+/// C = A^T * B
+template<typename T>
+__device__ inline void mat3TransposeMul(const T* A, const T* B, T* C)
+{
+    #pragma unroll
+    for (int i = 0; i < 3; ++i)
+    {
+        #pragma unroll
+        for (int j = 0; j < 3; ++j)
+        {
+            C[i * 3 + j] = A[0 * 3 + i] * B[0 * 3 + j]
+                         + A[1 * 3 + i] * B[1 * 3 + j]
+                         + A[2 * 3 + i] * B[2 * 3 + j];
+        }
+    }
+}
+
+/// out = R * in (rotate a 3D vector)
+template<typename T>
+__device__ inline void rotateVector(const T* R, const T* in, T* out)
+{
+    #pragma unroll
+    for (int i = 0; i < 3; ++i)
+    {
+        out[i] = R[i * 3 + 0] * in[0]
+               + R[i * 3 + 1] * in[1]
+               + R[i * 3 + 2] * in[2];
+    }
+}
+
+/// out = R^T * in (rotate a 3D vector by transpose)
+template<typename T>
+__device__ inline void rotateVectorTranspose(const T* R, const T* in, T* out)
+{
+    #pragma unroll
+    for (int i = 0; i < 3; ++i)
+    {
+        out[i] = R[0 * 3 + i] * in[0]
+               + R[1 * 3 + i] * in[1]
+               + R[2 * 3 + i] * in[2];
+    }
+}
+
+//=============================================================================
+// Rotation frame computation
+//=============================================================================
+
+/// Compute rotation frame from first 3 nodes (for Triangle, Quad, Tetrahedron)
+template<typename T>
+__device__ inline void computeTriangleFrame(const T* pos, T* frame)
+{
+    // X axis: normalized (p1 - p0)
+    T ax = pos[3] - pos[0], ay = pos[4] - pos[1], az = pos[5] - pos[2];
+    T invLen = myRsqrt(ax * ax + ay * ay + az * az);
+    ax *= invLen; ay *= invLen; az *= invLen;
+
+    // Temp vector b = p2 - p0
+    T bx = pos[6] - pos[0], by = pos[7] - pos[1], bz = pos[8] - pos[2];
+
+    // Z axis: normalized cross(a, b)
+    T cx = ay * bz - az * by;
+    T cy = az * bx - ax * bz;
+    T cz = ax * by - ay * bx;
+    invLen = myRsqrt(cx * cx + cy * cy + cz * cz);
+    cx *= invLen; cy *= invLen; cz *= invLen;
+
+    // Y axis: cross(z, x)
+    bx = cy * az - cz * ay;
+    by = cz * ax - cx * az;
+    bz = cx * ay - cy * ax;
+
+    // Store row-major: frame[row][col] = frame[row * 3 + col]
+    frame[0] = ax; frame[1] = ay; frame[2] = az;
+    frame[3] = bx; frame[4] = by; frame[5] = bz;
+    frame[6] = cx; frame[7] = cy; frame[8] = cz;
+}
+
+/// Compute rotation frame from 8 hexahedron nodes
+template<typename T>
+__device__ inline void computeHexahedronFrame(const T* pos, T* frame)
+{
+    const T quarter = T(0.25);
+
+    // Average X direction from 4 edge pairs
+    T ax = ((pos[1*3+0] - pos[0*3+0]) + (pos[2*3+0] - pos[3*3+0])
+          + (pos[5*3+0] - pos[4*3+0]) + (pos[6*3+0] - pos[7*3+0])) * quarter;
+    T ay = ((pos[1*3+1] - pos[0*3+1]) + (pos[2*3+1] - pos[3*3+1])
+          + (pos[5*3+1] - pos[4*3+1]) + (pos[6*3+1] - pos[7*3+1])) * quarter;
+    T az = ((pos[1*3+2] - pos[0*3+2]) + (pos[2*3+2] - pos[3*3+2])
+          + (pos[5*3+2] - pos[4*3+2]) + (pos[6*3+2] - pos[7*3+2])) * quarter;
+
+    // Average Y direction
+    T bx = ((pos[3*3+0] - pos[0*3+0]) + (pos[2*3+0] - pos[1*3+0])
+          + (pos[7*3+0] - pos[4*3+0]) + (pos[6*3+0] - pos[5*3+0])) * quarter;
+    T by = ((pos[3*3+1] - pos[0*3+1]) + (pos[2*3+1] - pos[1*3+1])
+          + (pos[7*3+1] - pos[4*3+1]) + (pos[6*3+1] - pos[5*3+1])) * quarter;
+    T bz = ((pos[3*3+2] - pos[0*3+2]) + (pos[2*3+2] - pos[1*3+2])
+          + (pos[7*3+2] - pos[4*3+2]) + (pos[6*3+2] - pos[5*3+2])) * quarter;
+
+    // Normalize X
+    T invLen = myRsqrt(ax * ax + ay * ay + az * az);
+    ax *= invLen; ay *= invLen; az *= invLen;
+
+    // Z = normalized cross(X, Y)
+    T cx = ay * bz - az * by;
+    T cy = az * bx - ax * bz;
+    T cz = ax * by - ay * bx;
+    invLen = myRsqrt(cx * cx + cy * cy + cz * cz);
+    cx *= invLen; cy *= invLen; cz *= invLen;
+
+    // Y = cross(Z, X)
+    bx = cy * az - cz * ay;
+    by = cz * ax - cx * az;
+    bz = cx * ay - cy * ax;
+
+    frame[0] = ax; frame[1] = ay; frame[2] = az;
+    frame[3] = bx; frame[4] = by; frame[5] = bz;
+    frame[6] = cx; frame[7] = cy; frame[8] = cz;
+}
+
+//=============================================================================
+// Element data gathering
+//=============================================================================
+
+/// Gather positions for one element from global arrays (SoA layout)
+template<typename T, int NNodes, int Dim>
+__device__ inline void gatherElementData(
+    const int* elements, int nbElem, int elemId,
+    const T* globalData,
+    T* localData)
+{
+    #pragma unroll
+    for (int n = 0; n < NNodes; ++n)
+    {
+        const int nodeId = elements[n * nbElem + elemId];
+        #pragma unroll
+        for (int d = 0; d < Dim; ++d)
+            localData[n * Dim + d] = globalData[nodeId * Dim + d];
+    }
+}
+
+/// Gather displacement (x - x0) for one element
+template<typename T, int NNodes, int Dim>
+__device__ inline void gatherElementDisplacement(
+    const int* elements, int nbElem, int elemId,
+    const T* x, const T* x0,
+    T* disp)
+{
+    #pragma unroll
+    for (int n = 0; n < NNodes; ++n)
+    {
+        const int nodeId = elements[n * nbElem + elemId];
+        #pragma unroll
+        for (int d = 0; d < Dim; ++d)
+            disp[n * Dim + d] = x[nodeId * Dim + d] - x0[nodeId * Dim + d];
+    }
+}
+
+//=============================================================================
+// Element center computation
+//=============================================================================
+
+/// Compute center of element positions
+template<typename T, int NNodes, int Dim>
+__device__ inline void computeElementCenter(const T* pos, T* center)
+{
+    const T invN = T(1) / T(NNodes);
+
+    #pragma unroll
+    for (int d = 0; d < Dim; ++d)
+        center[d] = T(0);
+
+    #pragma unroll
+    for (int n = 0; n < NNodes; ++n)
+    {
+        #pragma unroll
+        for (int d = 0; d < Dim; ++d)
+            center[d] += pos[n * Dim + d];
+    }
+
+    #pragma unroll
+    for (int d = 0; d < Dim; ++d)
+        center[d] *= invN;
+}
+
+//=============================================================================
+// Corotational displacement computation
+//=============================================================================
+
+/// Compute corotational displacement: disp = R^T * (x - center) - (x0 - center0)
+template<typename T, int NNodes, int Dim>
+__device__ inline void computeCorotationalDisplacement(
+    const T* R,
+    const T* x, const T* x0,
+    const T* center, const T* center0,
+    T* disp)
+{
+    #pragma unroll
+    for (int n = 0; n < NNodes; ++n)
+    {
+        // diff = x_n - center
+        T diff[Dim];
+        #pragma unroll
+        for (int d = 0; d < Dim; ++d)
+            diff[d] = x[n * Dim + d] - center[d];
+
+        // rotated = R^T * diff
+        #pragma unroll
+        for (int di = 0; di < Dim; ++di)
+        {
+            T rotated = T(0);
+            #pragma unroll
+            for (int dj = 0; dj < Dim; ++dj)
+                rotated += R[dj * Dim + di] * diff[dj];
+            disp[n * Dim + di] = rotated - (x0[n * Dim + di] - center0[di]);
+        }
+    }
+}
+
+/// Compute R^T * dx for each node (for addDForce)
+template<typename T, int NNodes, int Dim>
+__device__ inline void rotateDisplacementTranspose(
+    const T* R,
+    const int* elements, int nbElem, int elemId,
+    const T* dx,
+    T* rdx)
+{
+    #pragma unroll
+    for (int n = 0; n < NNodes; ++n)
+    {
+        const int nodeId = elements[n * nbElem + elemId];
+
+        T nodeDx[Dim];
+        #pragma unroll
+        for (int d = 0; d < Dim; ++d)
+            nodeDx[d] = dx[nodeId * Dim + d];
+
+        #pragma unroll
+        for (int di = 0; di < Dim; ++di)
+        {
+            T sum = T(0);
+            #pragma unroll
+            for (int dj = 0; dj < Dim; ++dj)
+                sum += R[dj * Dim + di] * nodeDx[dj];
+            rdx[n * Dim + di] = sum;
+        }
+    }
+}
+
+//=============================================================================
+// Symmetric block-matrix multiply
+//=============================================================================
+
+/**
+ * Symmetric block-matrix multiply: out = K * in
+ *
+ * K is stored in upper-triangle block format:
+ *   symIdx = ni * NNodes - ni*(ni-1)/2 + (nj - ni)  for nj >= ni
+ *   K[symIdx * Dim * Dim + di * Dim + dj] for each element
+ */
+template<typename T, int NNodes, int Dim>
+__device__ inline void symBlockMatMul(const T* K, const T* in, T* out)
+{
+    // Initialize output to zero
+    #pragma unroll
+    for (int i = 0; i < NNodes * Dim; ++i)
+        out[i] = T(0);
+
+    #pragma unroll
+    for (int ni = 0; ni < NNodes; ++ni)
+    {
+        const int diagIdx = ni * NNodes - ni * (ni - 1) / 2;
+
+        // Diagonal block: Kii * in_i -> out_i
+        {
+            const T* Kii = K + diagIdx * Dim * Dim;
+            #pragma unroll
+            for (int di = 0; di < Dim; ++di)
+            {
+                T sum = T(0);
+                #pragma unroll
+                for (int dj = 0; dj < Dim; ++dj)
+                    sum += Kii[di * Dim + dj] * in[ni * Dim + dj];
+                out[ni * Dim + di] += sum;
+            }
+        }
+
+        // Off-diagonal blocks (symmetric: Kij and Kij^T)
+        #pragma unroll
+        for (int nj = ni + 1; nj < NNodes; ++nj)
+        {
+            const int symIdx = diagIdx + (nj - ni);
+            const T* Kij = K + symIdx * Dim * Dim;
+
+            // Kij * in_j -> out_i
+            #pragma unroll
+            for (int di = 0; di < Dim; ++di)
+            {
+                T sum = T(0);
+                #pragma unroll
+                for (int dj = 0; dj < Dim; ++dj)
+                    sum += Kij[di * Dim + dj] * in[nj * Dim + dj];
+                out[ni * Dim + di] += sum;
+            }
+
+            // Kij^T * in_i -> out_j
+            #pragma unroll
+            for (int dj = 0; dj < Dim; ++dj)
+            {
+                T sum = T(0);
+                #pragma unroll
+                for (int di = 0; di < Dim; ++di)
+                    sum += Kij[di * Dim + dj] * in[ni * Dim + di];
+                out[nj * Dim + dj] += sum;
+            }
+        }
+    }
+}
+
+//=============================================================================
+// Force output with rotation
+//=============================================================================
+
+/// Rotate local forces to global frame and write: out = scale * R * localForce
+template<typename T, int NNodes, int Dim>
+__device__ inline void rotateAndWriteForce(
+    const T* R,
+    const T* localForce,
+    T* out,
+    T scale)
+{
+    #pragma unroll
+    for (int n = 0; n < NNodes; ++n)
+    {
+        #pragma unroll
+        for (int di = 0; di < Dim; ++di)
+        {
+            T sum = T(0);
+            #pragma unroll
+            for (int dj = 0; dj < Dim; ++dj)
+                sum += R[di * Dim + dj] * localForce[n * Dim + dj];
+            out[n * Dim + di] = scale * sum;
+        }
+    }
+}
+
+/// Write negated force (for linear case without rotation): out = scale * localForce
+template<typename T, int NNodes, int Dim>
+__device__ inline void writeForce(const T* localForce, T* out, T scale)
+{
+    #pragma unroll
+    for (int i = 0; i < NNodes * Dim; ++i)
+        out[i] = scale * localForce[i];
+}
+
+//=============================================================================
+// Gather kernel for accumulating per-vertex forces
+//=============================================================================
+
+/**
+ * Gather per-vertex forces from per-element contributions.
+ * velems[slot * nbVertex + vertexId] contains (elemId * NNodes + localNode + 1), 0 = end
+ */
+template<typename T, int Dim>
+__global__ void ElementFEM_gatherForce_kernel(
+    int nbVertex,
+    int maxElemPerVertex,
+    const int* __restrict__ velems,
+    const T* __restrict__ eforce,
+    T* df)
+{
+    const int vertexId = blockIdx.x * blockDim.x + threadIdx.x;
+    if (vertexId >= nbVertex) return;
+
+    T acc[Dim];
+    #pragma unroll
+    for (int d = 0; d < Dim; ++d)
+        acc[d] = T(0);
+
+    for (int s = 0; s < maxElemPerVertex; ++s)
+    {
+        const int idx = velems[s * nbVertex + vertexId];
+        if (idx == 0) break;
+        const int base = (idx - 1) * Dim;
+        #pragma unroll
+        for (int d = 0; d < Dim; ++d)
+            acc[d] += eforce[base + d];
+    }
+
+    #pragma unroll
+    for (int d = 0; d < Dim; ++d)
+        df[vertexId * Dim + d] += acc[d];
+}
+
+} // namespace sofa::gpu::cuda
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp
new file mode 100644
index 00000000000..d8d3b9ef1c3
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cpp
@@ -0,0 +1,101 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#include <SofaCUDA/component/config.h>
+
+#include <sofa/gpu/cuda/CudaTypes.h>
+#include <SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl>
+#include <sofa/core/ObjectFactory.h>
+
+namespace sofa::component::solidmechanics::fem::elastic
+{
+
+using namespace sofa::gpu::cuda;
+
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Edge>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Triangle>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Quad>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Tetrahedron>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Hexahedron>;
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Edge>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Triangle>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Quad>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Tetrahedron>;
+template class SOFACUDA_COMPONENT_API CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Hexahedron>;
+#endif
+
+} // namespace sofa::component::solidmechanics::fem::elastic
+
+namespace sofa::gpu::cuda
+{
+
+void registerElementLinearSmallStrainFEMForceField(sofa::core::ObjectFactory* factory)
+{
+    using namespace sofa::component::solidmechanics::fem::elastic;
+
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for EdgeLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Edge> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for TriangleLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Triangle> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for QuadLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Quad> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for TetrahedronLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Tetrahedron> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA for HexahedronLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3fTypes, sofa::geometry::Hexahedron> >()
+    );
+
+#ifdef SOFA_GPU_CUDA_DOUBLE
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for EdgeLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Edge> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for TriangleLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Triangle> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for QuadLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Quad> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for TetrahedronLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Tetrahedron> >()
+    );
+    factory->registerObjects(sofa::core::ObjectRegistrationData(
+        "Supports GPU-side computations using CUDA (double) for HexahedronLinearSmallStrainFEMForceField")
+        .add< CudaElementLinearSmallStrainFEMForceField<CudaVec3dTypes, sofa::geometry::Hexahedron> >()
+    );
+#endif
+}
+
+} // namespace sofa::gpu::cuda
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu
new file mode 100644
index 00000000000..9d0195ab580
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.cu
@@ -0,0 +1,189 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#include <sofa/gpu/cuda/CudaCommon.h>
+#include <sofa/gpu/cuda/CudaMath.h>
+#include "CudaElementFEMKernelUtils.cuh"
+
+namespace sofa::gpu::cuda
+{
+
+/**
+ * Kernel for addForce: f = -K * (x - x0)
+ */
+template<typename T, int NNodes, int Dim>
+__global__ void ElementLinearSmallStrainFEMForceField_computeForce_kernel(
+    int nbElem,
+    const int* __restrict__ elements,
+    const T* __restrict__ stiffness,
+    const T* __restrict__ x,
+    const T* __restrict__ x0,
+    T* __restrict__ eforce)
+{
+    constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2;
+
+    const int elemId = blockIdx.x * blockDim.x + threadIdx.x;
+    if (elemId >= nbElem) return;
+
+    // Gather displacement (x - x0)
+    T disp[NNodes * Dim];
+    gatherElementDisplacement<T, NNodes, Dim>(elements, nbElem, elemId, x, x0, disp);
+
+    // Multiply by stiffness matrix
+    const T* K = stiffness + elemId * NSymBlocks * Dim * Dim;
+    T edf[NNodes * Dim];
+    symBlockMatMul<T, NNodes, Dim>(K, disp, edf);
+
+    // Write negated force
+    T* out = eforce + elemId * NNodes * Dim;
+    writeForce<T, NNodes, Dim>(edf, out, T(-1));
+}
+
+/**
+ * Kernel for addDForce: df = -kFactor * K * dx
+ */
+template<typename T, int NNodes, int Dim>
+__global__ void ElementLinearSmallStrainFEMForceField_computeDForce_kernel(
+    int nbElem,
+    const int* __restrict__ elements,
+    const T* __restrict__ stiffness,
+    const T* __restrict__ dx,
+    T* __restrict__ eforce,
+    T kFactor)
+{
+    constexpr int NSymBlocks = NNodes * (NNodes + 1) / 2;
+
+    const int elemId = blockIdx.x * blockDim.x + threadIdx.x;
+    if (elemId >= nbElem) return;
+
+    // Gather displacement increment
+    T edx[NNodes * Dim];
+    gatherElementData<T, NNodes, Dim>(elements, nbElem, elemId, dx, edx);
+
+    // Multiply by stiffness matrix
+    const T* K = stiffness + elemId * NSymBlocks * Dim * Dim;
+    T edf[NNodes * Dim];
+    symBlockMatMul<T, NNodes, Dim>(K, edx, edf);
+
+    // Write scaled negated force
+    T* out = eforce + elemId * NNodes * Dim;
+    writeForce<T, NNodes, Dim>(edf, out, -kFactor);
+}
+
+// ===================== Launch functions =====================
+
+template<typename T, int NNodes, int Dim>
+void ElementLinearSmallStrainFEMForceFieldCuda_addForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* stiffness,
+    const void* x,
+    const void* x0,
+    void* f,
+    void* eforce,
+    const void* velems)
+{
+    const int computeThreads = 64;
+    int numBlocks = (nbElem + computeThreads - 1) / computeThreads;
+    ElementLinearSmallStrainFEMForceField_computeForce_kernel<T, NNodes, Dim>
+        <<<numBlocks, computeThreads>>>(
+            nbElem,
+            (const int*)elements,
+            (const T*)stiffness,
+            (const T*)x,
+            (const T*)x0,
+            (T*)eforce);
+    mycudaDebugError("ElementLinearSmallStrainFEMForceField_computeForce_kernel");
+
+    const int gatherThreads = 256;
+    numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads;
+    ElementFEM_gatherForce_kernel<T, Dim>
+        <<<numBlocks, gatherThreads>>>(
+            nbVertex,
+            maxElemPerVertex,
+            (const int*)velems,
+            (const T*)eforce,
+            (T*)f);
+    mycudaDebugError("ElementFEM_gatherForce_kernel");
+}
+
+template<typename T, int NNodes, int Dim>
+void ElementLinearSmallStrainFEMForceFieldCuda_addDForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* stiffness,
+    const void* dx,
+    void* df,
+    void* eforce,
+    const void* velems,
+    T kFactor)
+{
+    const int computeThreads = 64;
+    int numBlocks = (nbElem + computeThreads - 1) / computeThreads;
+    ElementLinearSmallStrainFEMForceField_computeDForce_kernel<T, NNodes, Dim>
+        <<<numBlocks, computeThreads>>>(
+            nbElem,
+            (const int*)elements,
+            (const T*)stiffness,
+            (const T*)dx,
+            (T*)eforce,
+            kFactor);
+    mycudaDebugError("ElementLinearSmallStrainFEMForceField_computeDForce_kernel");
+
+    const int gatherThreads = 256;
+    numBlocks = (nbVertex + gatherThreads - 1) / gatherThreads;
+    ElementFEM_gatherForce_kernel<T, Dim>
+        <<<numBlocks, gatherThreads>>>(
+            nbVertex,
+            maxElemPerVertex,
+            (const int*)velems,
+            (const T*)eforce,
+            (T*)df);
+    mycudaDebugError("ElementFEM_gatherForce_kernel");
+}
+
+// ===================== Explicit template instantiations =====================
+
+#define INSTANTIATE_LINEAR(T, NNodes) \
+    template void ElementLinearSmallStrainFEMForceFieldCuda_addForce<T, NNodes, 3>( \
+        unsigned int, unsigned int, unsigned int, const void*, const void*, \
+        const void*, const void*, void*, void*, const void*); \
+    template void ElementLinearSmallStrainFEMForceFieldCuda_addDForce<T, NNodes, 3>( \
+        unsigned int, unsigned int, unsigned int, const void*, const void*, \
+        const void*, void*, void*, const void*, T);
+
+INSTANTIATE_LINEAR(float, 2)
+INSTANTIATE_LINEAR(float, 3)
+INSTANTIATE_LINEAR(float, 4)
+INSTANTIATE_LINEAR(float, 8)
+
+INSTANTIATE_LINEAR(double, 2)
+INSTANTIATE_LINEAR(double, 3)
+INSTANTIATE_LINEAR(double, 4)
+INSTANTIATE_LINEAR(double, 8)
+
+#undef INSTANTIATE_LINEAR
+
+} // namespace sofa::gpu::cuda
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h
new file mode 100644
index 00000000000..45d119846e8
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h
@@ -0,0 +1,128 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#pragma once
+
+#include <sofa/gpu/cuda/CudaTypes.h>
+#include <sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.h>
+
+namespace sofa::gpu::cuda
+{
+
+template<typename T, int NNodes, int Dim>
+void ElementLinearSmallStrainFEMForceFieldCuda_addForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* stiffness,
+    const void* x,
+    const void* x0,
+    void* f,
+    void* eforce,
+    const void* velems);
+
+template<typename T, int NNodes, int Dim>
+void ElementLinearSmallStrainFEMForceFieldCuda_addDForce(
+    unsigned int nbElem,
+    unsigned int nbVertex,
+    unsigned int maxElemPerVertex,
+    const void* elements,
+    const void* stiffness,
+    const void* dx,
+    void* df,
+    void* eforce,
+    const void* velems,
+    T kFactor);
+
+} // namespace sofa::gpu::cuda
+
+namespace sofa::component::solidmechanics::fem::elastic
+{
+
+/**
+ * CUDA-accelerated version of ElementLinearSmallStrainFEMForceField.
+ *
+ * Works with any element type (Edge, Triangle, Quad, Tetrahedron, Hexahedron).
+ * Both addForce and addDForce run entirely on GPU.
+ *
+ * Uses a two-kernel approach for addDForce:
+ *   Kernel 1: compute per-element forces (1 thread/element, fully unrolled)
+ *   Kernel 2: gather per-vertex (1 thread/vertex, no atomics)
+ *
+ * Compared to the corotational version, no rotation matrices are needed.
+ */
+template<class DataTypes, class ElementType>
+class CudaElementLinearSmallStrainFEMForceField
+    : public ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>
+{
+public:
+    SOFA_CLASS(
+        SOFA_TEMPLATE2(CudaElementLinearSmallStrainFEMForceField, DataTypes, ElementType),
+        SOFA_TEMPLATE2(ElementLinearSmallStrainFEMForceField, DataTypes, ElementType));
+
+    using Real = sofa::Real_t<DataTypes>;
+    using Coord = sofa::Coord_t<DataTypes>;
+    using Deriv = sofa::Deriv_t<DataTypes>;
+    using VecCoord = sofa::VecCoord_t<DataTypes>;
+    using VecDeriv = sofa::VecDeriv_t<DataTypes>;
+
+    static const std::string GetCustomClassName()
+    {
+        return ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::GetCustomClassName();
+    }
+
+    static const std::string GetCustomTemplateName()
+    {
+        return DataTypes::Name();
+    }
+
+    void init() override;
+
+    void addForce(
+        const sofa::core::MechanicalParams* mparams,
+        sofa::DataVecDeriv_t<DataTypes>& f,
+        const sofa::DataVecCoord_t<DataTypes>& x,
+        const sofa::DataVecDeriv_t<DataTypes>& v) override;
+
+    void addDForce(
+        const sofa::core::MechanicalParams* mparams,
+        sofa::DataVecDeriv_t<DataTypes>& df,
+        const sofa::DataVecDeriv_t<DataTypes>& dx) override;
+
+protected:
+
+    CudaElementLinearSmallStrainFEMForceField() = default;
+
+    void uploadStiffnessAndConnectivity();
+
+    gpu::cuda::CudaVector<Real> m_gpuStiffness;      ///< Symmetric block-format stiffness per element
+    gpu::cuda::CudaVector<int>  m_gpuElements;        ///< SoA connectivity: elements[nodeIdx * nbElem + elemId]
+    gpu::cuda::CudaVector<Real> m_gpuElementForce;    ///< Intermediate per-element per-node force buffer
+    gpu::cuda::CudaVector<int>  m_gpuVelems;          ///< SoA vertex-to-element mapping, 0-terminated
+
+    unsigned int m_maxElemPerVertex = 0;
+    unsigned int m_nbVertices = 0;
+
+    bool m_gpuDataUploaded = false;
+};
+
+} // namespace sofa::component::solidmechanics::fem::elastic
diff --git a/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl
new file mode 100644
index 00000000000..f27c06b92e7
--- /dev/null
+++ b/applications/plugins/SofaCUDA/Component/src/SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.inl
@@ -0,0 +1,234 @@
+/******************************************************************************
+*                 SOFA, Simulation Open-Framework Architecture                *
+*                    (c) 2006 INRIA, USTL, UJF, CNRS, MGH                     *
+*                                                                             *
+* This program is free software; you can redistribute it and/or modify it     *
+* under the terms of the GNU Lesser General Public License as published by    *
+* the Free Software Foundation; either version 2.1 of the License, or (at     *
+* your option) any later version.                                             *
+*                                                                             *
+* This program is distributed in the hope that it will be useful, but WITHOUT *
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or       *
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License *
+* for more details.                                                           *
+*                                                                             *
+* You should have received a copy of the GNU Lesser General Public License    *
+* along with this program. If not, see <http://www.gnu.org/licenses/>.        *
+*******************************************************************************
+* Authors: The SOFA Team and external contributors (see Authors.txt)          *
+*                                                                             *
+* Contact information: contact@sofa-framework.org                             *
+******************************************************************************/
+#pragma once
+#include <SofaCUDA/component/solidmechanics/fem/elastic/CudaElementLinearSmallStrainFEMForceField.h>
+#include <sofa/component/solidmechanics/fem/elastic/ElementLinearSmallStrainFEMForceField.inl>
+#include <sofa/core/behavior/ForceField.inl>
+#include <cstring>
+
+namespace sofa::component::solidmechanics::fem::elastic
+{
+
+template<class DataTypes, class ElementType>
+void CudaElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::init()
+{
+    ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::init();
+
+    if (!this->isComponentStateInvalid())
+    {
+        uploadStiffnessAndConnectivity();
+    }
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::uploadStiffnessAndConnectivity()
+{
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+
+    if (!this->l_topology) return;
+
+    const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
+    const auto& assembledMatrices = this->m_assembledStiffnessMatrices;
+
+    const auto nbElem = elements.size();
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    // Find number of vertices
+    unsigned int maxNodeId = 0;
+    for (std::size_t e = 0; e < nbElem; ++e)
+    {
+        const auto& element = elements[e];
+        for (unsigned int n = 0; n < nNodes; ++n)
+        {
+            if (static_cast<unsigned int>(element[n]) > maxNodeId)
+                maxNodeId = static_cast<unsigned int>(element[n]);
+        }
+    }
+    m_nbVertices = maxNodeId + 1;
+
+    // Upload stiffness matrices in symmetric upper-triangle block format:
+    // Only blocks (ni, nj) with nj >= ni are stored.
+    // symIdx = ni * nNodes - ni*(ni-1)/2 + (nj - ni)
+    // K[symIdx * dim * dim + di * dim + dj] per element
+    constexpr auto nSymBlocks = nNodes * (nNodes + 1) / 2;
+    m_gpuStiffness.resize(nbElem * nSymBlocks * dim * dim);
+    {
+        auto* dst = m_gpuStiffness.hostWrite();
+        for (std::size_t e = 0; e < nbElem; ++e)
+        {
+            const auto& K = assembledMatrices[e];
+            for (unsigned int ni = 0; ni < nNodes; ++ni)
+            {
+                const unsigned int diagIdx = ni * nNodes - ni * (ni - 1) / 2;
+                for (unsigned int nj = ni; nj < nNodes; ++nj)
+                {
+                    const unsigned int symIdx = diagIdx + (nj - ni);
+                    for (unsigned int di = 0; di < dim; ++di)
+                        for (unsigned int dj = 0; dj < dim; ++dj)
+                            dst[e * nSymBlocks * dim * dim
+                                + symIdx * dim * dim
+                                + di * dim + dj]
+                                = static_cast<Real>(K[ni * dim + di][nj * dim + dj]);
+                }
+            }
+        }
+    }
+
+    // Upload element connectivity in SoA layout:
+    // elements[nodeIdx * nbElem + elemId] = global node index
+    m_gpuElements.resize(nNodes * nbElem);
+    {
+        auto* dst = m_gpuElements.hostWrite();
+        for (std::size_t e = 0; e < nbElem; ++e)
+        {
+            const auto& element = elements[e];
+            for (unsigned int n = 0; n < nNodes; ++n)
+                dst[n * nbElem + e] = static_cast<int>(element[n]);
+        }
+    }
+
+    // Build vertex-to-element mapping (velems)
+    std::vector<std::vector<int>> vertexElems(m_nbVertices);
+    for (std::size_t e = 0; e < nbElem; ++e)
+    {
+        const auto& element = elements[e];
+        for (unsigned int n = 0; n < nNodes; ++n)
+        {
+            const int nodeId = static_cast<int>(element[n]);
+            vertexElems[nodeId].push_back(
+                static_cast<int>(e * nNodes + n + 1));
+        }
+    }
+
+    m_maxElemPerVertex = 0;
+    for (const auto& ve : vertexElems)
+    {
+        if (ve.size() > m_maxElemPerVertex)
+            m_maxElemPerVertex = static_cast<unsigned int>(ve.size());
+    }
+
+    m_gpuVelems.resize(m_maxElemPerVertex * m_nbVertices);
+    {
+        auto* dst = m_gpuVelems.hostWrite();
+        std::memset(dst, 0, m_maxElemPerVertex * m_nbVertices * sizeof(int));
+        for (std::size_t v = 0; v < m_nbVertices; ++v)
+        {
+            for (std::size_t s = 0; s < vertexElems[v].size(); ++s)
+                dst[s * m_nbVertices + v] = vertexElems[v][s];
+        }
+    }
+
+    // Allocate intermediate per-element force buffer
+    m_gpuElementForce.resize(nbElem * nNodes * dim);
+
+    m_gpuDataUploaded = true;
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::addForce(
+    const sofa::core::MechanicalParams* mparams,
+    sofa::DataVecDeriv_t<DataTypes>& d_f,
+    const sofa::DataVecCoord_t<DataTypes>& d_x,
+    const sofa::DataVecDeriv_t<DataTypes>& d_v)
+{
+    if (this->isComponentStateInvalid())
+        return;
+
+    if (!m_gpuDataUploaded)
+    {
+        ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::addForce(mparams, d_f, d_x, d_v);
+        return;
+    }
+
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    VecDeriv& f = *d_f.beginEdit();
+    const VecCoord& x = d_x.getValue();
+
+    if (f.size() < x.size())
+        f.resize(x.size());
+
+    auto restPositionAccessor = this->mstate->readRestPositions();
+    const VecCoord& x0 = restPositionAccessor.ref();
+
+    const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
+    const auto nbElem = static_cast<unsigned int>(elements.size());
+    const auto nbVertex = static_cast<unsigned int>(x.size());
+
+    gpu::cuda::ElementLinearSmallStrainFEMForceFieldCuda_addForce<Real, nNodes, dim>(
+        nbElem, nbVertex, m_maxElemPerVertex,
+        m_gpuElements.deviceRead(), m_gpuStiffness.deviceRead(),
+        x.deviceRead(), x0.deviceRead(),
+        f.deviceWrite(), m_gpuElementForce.deviceWrite(),
+        m_gpuVelems.deviceRead());
+
+    d_f.endEdit();
+}
+
+template<class DataTypes, class ElementType>
+void CudaElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::addDForce(
+    const sofa::core::MechanicalParams* mparams,
+    sofa::DataVecDeriv_t<DataTypes>& d_df,
+    const sofa::DataVecDeriv_t<DataTypes>& d_dx)
+{
+    if (this->isComponentStateInvalid())
+        return;
+
+    if (!m_gpuDataUploaded)
+    {
+        // Fallback to CPU if GPU data not ready
+        ElementLinearSmallStrainFEMForceField<DataTypes, ElementType>::addDForce(mparams, d_df, d_dx);
+        return;
+    }
+
+    using trait = sofa::component::solidmechanics::fem::elastic::trait<DataTypes, ElementType>;
+    constexpr auto nNodes = trait::NumberOfNodesInElement;
+    constexpr auto dim = trait::spatial_dimensions;
+
+    VecDeriv& df = *d_df.beginEdit();
+    const VecDeriv& dx = d_dx.getValue();
+
+    if (df.size() < dx.size())
+        df.resize(dx.size());
+
+    const auto kFactor = static_cast<Real>(
+        sofa::core::mechanicalparams::kFactorIncludingRayleighDamping(
+            mparams, this->rayleighStiffness.getValue()));
+
+    const auto& elements = trait::FiniteElement::getElementSequence(*this->l_topology);
+    const auto nbElem = static_cast<unsigned int>(elements.size());
+    const auto nbVertex = static_cast<unsigned int>(dx.size());
+
+    gpu::cuda::ElementLinearSmallStrainFEMForceFieldCuda_addDForce<Real, nNodes, dim>(
+        nbElem, nbVertex, m_maxElemPerVertex,
+        m_gpuElements.deviceRead(), m_gpuStiffness.deviceRead(),
+        dx.deviceRead(), df.deviceWrite(),
+        m_gpuElementForce.deviceWrite(), m_gpuVelems.deviceRead(),
+        kFactor);
+
+    d_df.endEdit();
+}
+
+} // namespace sofa::component::solidmechanics::fem::elastic
diff --git a/applications/plugins/SofaCUDA/examples/CudaElementCorotationalFEMForceField.scn b/applications/plugins/SofaCUDA/examples/CudaElementCorotationalFEMForceField.scn
new file mode 100644
index 00000000000..7ecf5da0d41
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/CudaElementCorotationalFEMForceField.scn
@@ -0,0 +1,102 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop />
+    <DefaultVisualManagerLoop />
+
+<!--     <Node name="Hexahedron">
+        <EulerImplicitSolver name="odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="250" name="linear_solver" tolerance="1.0e-12" threshold="1.0e-12" />
+
+        <RegularGridTopology name="grid" n="80 20 20" min="0 6 -2" max="16 10 2" />
+        <MechanicalObject template="CudaVec3f" name="state"/>
+
+        <HexahedronSetTopologyContainer name="Container" src="@grid"/>
+        <HexahedronSetTopologyModifier name="Modifier" />
+
+        <DiagonalMass totalMass="50.0" />
+
+        <BoxROI name="box_roi" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+        <FixedProjectiveConstraint indices="@box_roi.indices" />
+
+        <HexahedronCorotationalFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+    </Node> -->
+
+    <Node name="Tetrahedron">
+        <Node name="Beam">
+            <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+            <TetrahedronSetTopologyContainer name="BeamTopo" />
+            <TetrahedronSetTopologyModifier name="Modifier" />
+
+            <Hexa2TetraTopologicalMapping input="@grid" output="@BeamTopo" />
+        </Node>
+        
+        
+        <Node name="TetrahedronFEMForceField-GPU-Green">
+            <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+            <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-6" threshold="1.0e-6" />
+            
+            <MechanicalObject position="@../Beam/grid.position" name="Volume" template="CudaVec3f"/>
+
+            <TetrahedronSetTopologyContainer name="Container" src="@../Beam/BeamTopo"/>
+            <TetrahedronSetTopologyModifier name="Modifier" />
+            <TetrahedronSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+            <DiagonalMass totalMass="50.0" />
+            <BoxROI name="ROI1" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+            
+            <FixedProjectiveConstraint indices="@ROI1.indices" />
+            <TetrahedronCorotationalFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+        </Node>  
+    </Node>
+    
+<!--     <Node name="Triangle">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-6" threshold="1.0e-6" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <TriangleSetTopologyContainer name="Container" src="@tissue"/>
+        <TriangleSetTopologyModifier name="Modifier" />
+        <TriangleSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <TriangleCorotationalFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node> -->
+
+<!--     <Node name="Quad">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-6" threshold="1.0e-6" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <QuadSetTopologyContainer name="Container" src="@tissue"/>
+        <QuadSetTopologyModifier name="Modifier" />
+        <QuadSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <QuadCorotationalFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node> -->
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/CudaElementLinearSmallStrainFEMForceField.scn b/applications/plugins/SofaCUDA/examples/CudaElementLinearSmallStrainFEMForceField.scn
new file mode 100644
index 00000000000..39a5017048d
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/CudaElementLinearSmallStrainFEMForceField.scn
@@ -0,0 +1,101 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop/>
+
+<!--     <Node name="Hexahedron">
+        <EulerImplicitSolver name="odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="250" name="linear_solver" tolerance="1.0e-12" threshold="1.0e-12" />
+
+        <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+        <MechanicalObject template="CudaVec3f" name="state"/>
+
+        <HexahedronSetTopologyContainer name="Container" src="@grid"/>
+        <HexahedronSetTopologyModifier name="Modifier" />
+
+        <DiagonalMass totalMass="50.0" />
+
+        <BoxROI name="box_roi" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+        <FixedProjectiveConstraint indices="@box_roi.indices" />
+
+        <HexahedronLinearSmallStrainFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+    </Node> -->
+
+    <Node name="Tetrahedron">
+        <Node name="Beam">
+            <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+            <TetrahedronSetTopologyContainer name="BeamTopo" />
+            <TetrahedronSetTopologyModifier name="Modifier" />
+
+            <Hexa2TetraTopologicalMapping input="@grid" output="@BeamTopo" />
+        </Node>
+        
+        
+        <Node name="TetrahedronFEMForceField-GPU-Green">
+            <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+            <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-6" threshold="1.0e-6" />
+            
+            <MechanicalObject position="@../Beam/grid.position" name="Volume" template="CudaVec3f"/>
+
+            <TetrahedronSetTopologyContainer name="Container" src="@../Beam/BeamTopo"/>
+            <TetrahedronSetTopologyModifier name="Modifier" />
+            <TetrahedronSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+            <DiagonalMass totalMass="50.0" />
+            <BoxROI name="ROI1" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+            
+            <FixedProjectiveConstraint indices="@ROI1.indices" />
+            <TetrahedronLinearSmallStrainFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+        </Node>  
+    </Node>
+
+<!--     <Node name="Triangle">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-6" threshold="1.0e-6" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <TriangleSetTopologyContainer name="Container" src="@tissue"/>
+        <TriangleSetTopologyModifier name="Modifier" />
+        <TriangleSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <TriangleLinearSmallStrainFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node> -->
+
+<!--     <Node name="Quad">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-6" threshold="1.0e-6" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <QuadSetTopologyContainer name="Container" src="@tissue"/>
+        <QuadSetTopologyModifier name="Modifier" />
+        <QuadSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <QuadLinearSmallStrainFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node> -->
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_hexa.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_hexa.scn
new file mode 100644
index 00000000000..a08c65e0024
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_hexa.scn
@@ -0,0 +1,37 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop />
+    <DefaultVisualManagerLoop />
+
+    <Node name="Hexahedron">
+        <EulerImplicitSolver name="odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+
+        <RegularGridTopology name="grid" n="80 20 20" min="0 6 -2" max="16 10 2" />
+        <MechanicalObject template="CudaVec3f" name="state"/>
+
+        <HexahedronSetTopologyContainer name="Container" src="@grid"/>
+        <HexahedronSetTopologyModifier name="Modifier" />
+
+        <DiagonalMass totalMass="50.0" />
+
+        <BoxROI name="box_roi" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+        <FixedProjectiveConstraint indices="@box_roi.indices" />
+
+        <HexahedronCorotationalFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+    </Node>
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_quad.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_quad.scn
new file mode 100644
index 00000000000..047a96624f9
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_quad.scn
@@ -0,0 +1,36 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop />
+    <DefaultVisualManagerLoop />
+
+    <Node name="Quad">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <QuadSetTopologyContainer name="Container" src="@tissue"/>
+        <QuadSetTopologyModifier name="Modifier" />
+        <QuadSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <QuadCorotationalFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node>
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_tetra.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_tetra.scn
new file mode 100644
index 00000000000..57a39e63286
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_tetra.scn
@@ -0,0 +1,47 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop />
+    <DefaultVisualManagerLoop />
+
+    <Node name="Tetrahedron">
+        <Node name="Beam">
+            <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+            <TetrahedronSetTopologyContainer name="BeamTopo" />
+            <TetrahedronSetTopologyModifier name="Modifier" />
+
+            <Hexa2TetraTopologicalMapping input="@grid" output="@BeamTopo" />
+        </Node>
+        
+        
+        <Node name="Simulated">
+            <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+            <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+            
+            <MechanicalObject position="@../Beam/grid.position" name="Volume" template="CudaVec3f"/>
+
+            <TetrahedronSetTopologyContainer name="Container" src="@../Beam/BeamTopo"/>
+            <TetrahedronSetTopologyModifier name="Modifier" />
+            <TetrahedronSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+            <DiagonalMass totalMass="50.0" />
+            <BoxROI name="ROI1" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+            
+            <FixedProjectiveConstraint indices="@ROI1.indices" />
+            <TetrahedronCorotationalFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+        </Node>  
+    </Node>
+    
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_triangle.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_triangle.scn
new file mode 100644
index 00000000000..0a47b7e393d
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementCorotationalFEMForceField_triangle.scn
@@ -0,0 +1,35 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop />
+    <DefaultVisualManagerLoop />
+
+    <Node name="Triangle">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <TriangleSetTopologyContainer name="Container" src="@tissue"/>
+        <TriangleSetTopologyModifier name="Modifier" />
+        <TriangleSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <TriangleCorotationalFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node>
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_hexa.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_hexa.scn
new file mode 100644
index 00000000000..228e4a08943
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_hexa.scn
@@ -0,0 +1,36 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop/>
+
+    <Node name="Hexahedron">
+        <EulerImplicitSolver name="odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="250" name="linear_solver" tolerance="1.0e-12" threshold="1.0e-12" />
+
+        <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+        <MechanicalObject template="CudaVec3f" name="state"/>
+
+        <HexahedronSetTopologyContainer name="Container" src="@grid"/>
+        <HexahedronSetTopologyModifier name="Modifier" />
+
+        <DiagonalMass totalMass="50.0" />
+
+        <BoxROI name="box_roi" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+        <FixedProjectiveConstraint indices="@box_roi.indices" />
+
+        <HexahedronLinearSmallStrainFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+    </Node>
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_quad.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_quad.scn
new file mode 100644
index 00000000000..14babc5d207
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_quad.scn
@@ -0,0 +1,35 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop/>
+
+    <Node name="Quad">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <QuadSetTopologyContainer name="Container" src="@tissue"/>
+        <QuadSetTopologyModifier name="Modifier" />
+        <QuadSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <QuadLinearSmallStrainFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node>
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_tetra.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_tetra.scn
new file mode 100644
index 00000000000..eac98ff93ac
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_tetra.scn
@@ -0,0 +1,46 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop/>
+
+    <Node name="Tetrahedron">
+        <Node name="Beam">
+            <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+            <TetrahedronSetTopologyContainer name="BeamTopo" />
+            <TetrahedronSetTopologyModifier name="Modifier" />
+
+            <Hexa2TetraTopologicalMapping input="@grid" output="@BeamTopo" />
+        </Node>
+        
+        
+        <Node name="TetrahedronFEMForceField-GPU-Green">
+            <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+            
+            <MechanicalObject position="@../Beam/grid.position" name="Volume" template="CudaVec3f"/>
+
+            <TetrahedronSetTopologyContainer name="Container" src="@../Beam/BeamTopo"/>
+            <TetrahedronSetTopologyModifier name="Modifier" />
+            <TetrahedronSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+            <DiagonalMass totalMass="50.0" />
+            <BoxROI name="ROI1" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+            
+            <FixedProjectiveConstraint indices="@ROI1.indices" />
+            <TetrahedronLinearSmallStrainFEMForceField name="FEM" template="CudaVec3f"
+                                              youngModulus="2000" poissonRatio="0.3" />
+        </Node>  
+    </Node>
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_triangle.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_triangle.scn
new file mode 100644
index 00000000000..03c6955697c
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/CudaElementLinearSmallStrainFEMForceField_triangle.scn
@@ -0,0 +1,35 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9.81 0" dt="0.01">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/>
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/>
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/>
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/>
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/>
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/>
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/>
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/>
+    <RequiredPlugin pluginName="SofaCUDA"/>
+
+    <VisualStyle displayFlags="showBehaviorModels showForceFields" />
+    <DefaultAnimationLoop/>
+
+    <Node name="Triangle">
+        <RegularGridTopology name="tissue" n="100 100 1" min="0 0 0" max="10 10 0" />
+
+        <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+        <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+        
+        <MechanicalObject position="@../tissue.position" name="dofs" template="CudaVec3f"/>
+
+        <TriangleSetTopologyContainer name="Container" src="@tissue"/>
+        <TriangleSetTopologyModifier name="Modifier" />
+        <TriangleSetGeometryAlgorithms name="GeomAlgo" template="CudaVec3f" />
+
+        <DiagonalMass massDensity="0.15" template="CudaVec3f,CudaVec3f"/>
+        <FixedProjectiveConstraint indices="9900 9901 9902 9903 9996 9997 9998 9999" />
+
+        <TriangleLinearSmallStrainFEMForceField name="FEM" youngModulus="600" poissonRatio="0.3" method="large" template="CudaVec3f"/>
+    </Node>
+
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py
new file mode 100644
index 00000000000..aec2063fbe6
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py
@@ -0,0 +1,93 @@
+import Sofa
+
+import os
+import numpy as np
+from utilities import generate_regular_grid
+
+g_grid_min_corner=(0, 6, -2)
+g_grid_max_corner=(16, 10, 2)
+
+g_fem_version = os.environ.get('FEM_VERSION', 'new') #either 'new' or 'legacy'
+g_fem_template = os.environ.get('FEM_TEMPLATE', 'Vec3d')
+
+# default is (76, 16, 16)
+g_grid_nx = int(os.environ.get('NX', '76'))
+g_grid_ny = int(os.environ.get('NY', '16'))
+g_grid_nz = int(os.environ.get('NZ', '16'))
+
+g_nb_steps = int(os.environ.get('NBSTEPS', '1000'))
+
+def createScene(root_node):
+    root_node.name = "root"
+    root_node.gravity = (0, -9, 0)
+    root_node.dt = 0.01
+
+    plugin_node = root_node.addChild('Plugins')
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Engine.Select")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.LinearSolver.Iterative")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.ODESolver.Backward")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.StateContainer")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Dynamic")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Grid")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Visual")
+    plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Constraint.Projective') # Needed to use components [FixedProjectiveConstraint]  
+    plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Mass') # Needed to use components [DiagonalMass]  
+    plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.SolidMechanics.FEM.Elastic') # Needed to use components [HexahedronCorotationalFEMForceField]
+    plugin_node.addObject('RequiredPlugin', pluginName='SofaCUDA.Component')
+    plugin_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields")
+
+    root_node.addObject('DefaultAnimationLoop')
+    root_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields")
+
+    grid_nodes, grid_hexa = generate_regular_grid(nx=g_grid_nx, ny=g_grid_ny, nz=g_grid_nz, min_corner=g_grid_min_corner, max_corner=g_grid_max_corner)
+
+    hexahedron_node = root_node.addChild('Hexahedron')
+    hexahedron_node.addObject('EulerImplicitSolver', rayleighStiffness="0.1", rayleighMass="0.1")
+    hexahedron_node.addObject('CGLinearSolver', iterations="250", name="linear_solver", tolerance="1.0e-12", threshold="1.0e-12")
+    hexahedron_node.addObject('MechanicalObject', name="ms", template=g_fem_template, position=grid_nodes)
+    hexahedron_node.addObject('HexahedronSetTopologyContainer', hexahedra=grid_hexa)
+    hexahedron_node.addObject('DiagonalMass', totalMass="50.0")
+    hexahedron_node.addObject('BoxROI', name="boxroi1", box="-0.1 5 -3 0.1 11 3", drawBoxes="1")
+    hexahedron_node.addObject('FixedProjectiveConstraint', indices="@boxroi1.indices")
+    if g_fem_version == "legacy":
+        hexahedron_node.addObject('HexahedronFEMForceField', name="LegacyFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3", method="large")
+    if g_fem_version == "new":
+        hexahedron_node.addObject('HexahedronCorotationalFEMForceField', name="NewFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3")
+
+def main():
+
+    enable_gui = False
+
+    try:
+        import Sofa.Gui
+        import SofaImGui
+    except:
+        enable_gui = False
+    
+    root = Sofa.Core.Node("root")
+    createScene(root)
+    
+    Sofa.Simulation.initRoot(root)
+
+    if enable_gui:
+        Sofa.Gui.GUIManager.Init("myscene","imgui")
+        Sofa.Gui.GUIManager.createGUI(root, __file__)
+        Sofa.Gui.GUIManager.MainLoop(root)
+        Sofa.Gui.GUIManager.closeGUI()
+    else:
+        import time
+
+        print(f"Running on {g_nb_steps} steps...")
+        start_timer = time.time()
+
+        for iteration in range(g_nb_steps):
+            Sofa.Simulation.animate(root, root.dt.value)
+
+        stop_timer = time.time()
+        print(f"... Done.")
+        print(f"{g_nb_steps} steps done in {stop_timer - start_timer:.3}s ({g_nb_steps/(stop_timer - start_timer):.5} fps).")
+
+
+# Function used only if this script is called from a python environment
+if __name__ == '__main__':
+    main()
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py.view b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py.view
new file mode 100644
index 00000000000..1e9c7f6670c
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Hexahedron_corotational.py.view
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Camera version="1.0">
+    <!--Vector of 3 reals (x, y, z)-->
+    <position value="16.2475 12.9323 9.34711"/>
+    <!--Quaternion (x, y, z, w)-->
+    <orientation value="-0.208716 0.28044 0.0158216 0.936772"/>
+    <!--Real-->
+    <fieldOfView value="45"/>
+    <!--Real-->
+    <distance value="19.4175"/>
+    <!--Real-->
+    <zNear value="0.212191"/>
+    <!--Real-->
+    <zFar value="25.9888"/>
+    <!--Int (0 -> Perspective, 1 -> Orthographic)-->
+    <projectionType value="Perspective"/>
+</Camera>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py
new file mode 100644
index 00000000000..d9480c34dfa
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py
@@ -0,0 +1,94 @@
+import Sofa
+
+import os
+import numpy as np
+from utilities import generate_regular_grid, hexa_to_tetra
+
+g_grid_min_corner=(0, 6, -2)
+g_grid_max_corner=(16, 10, 2)
+
+g_fem_version = os.environ.get('FEM_VERSION', 'new') #either 'new' or 'legacy'
+g_fem_template = os.environ.get('FEM_TEMPLATE', 'Vec3d')
+
+# default is (76, 16, 16)
+g_grid_nx = int(os.environ.get('NX', '76'))
+g_grid_ny = int(os.environ.get('NY', '16'))
+g_grid_nz = int(os.environ.get('NZ', '16'))
+
+g_nb_steps = int(os.environ.get('NBSTEPS', '1000'))
+
+def createScene(root_node):
+    root_node.name = "root"
+    root_node.gravity = (0, -9, 0)
+    root_node.dt = 0.01
+
+    plugin_node = root_node.addChild('Plugins')
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Engine.Select")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.LinearSolver.Iterative")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.ODESolver.Backward")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.StateContainer")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Dynamic")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Topology.Container.Grid")
+    plugin_node.addObject('RequiredPlugin', pluginName="Sofa.Component.Visual")
+    plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Constraint.Projective') # Needed to use components [FixedProjectiveConstraint]  
+    plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.Mass') # Needed to use components [DiagonalMass]  
+    plugin_node.addObject('RequiredPlugin', pluginName='Sofa.Component.SolidMechanics.FEM.Elastic') # Needed to use components [TetrahedronCorotationalFEMForceField]
+    plugin_node.addObject('RequiredPlugin', pluginName='SofaCUDA.Component')
+    plugin_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields")
+
+    root_node.addObject('DefaultAnimationLoop')
+    root_node.addObject('VisualStyle', displayFlags="showBehaviorModels showForceFields")
+
+    grid_nodes, grid_hexa = generate_regular_grid(nx=g_grid_nx, ny=g_grid_ny, nz=g_grid_nz, min_corner=g_grid_min_corner, max_corner=g_grid_max_corner)
+    grid_tetra = hexa_to_tetra(grid_hexa)
+    
+    tetrahedron_node = root_node.addChild('Tetrahedron')
+    tetrahedron_node.addObject('EulerImplicitSolver', rayleighStiffness="0.1", rayleighMass="0.1")
+    tetrahedron_node.addObject('CGLinearSolver', iterations="250", name="linear_solver", tolerance="1.0e-12", threshold="1.0e-12")
+    tetrahedron_node.addObject('MechanicalObject', name="ms", template=g_fem_template, position=grid_nodes)
+    tetrahedron_node.addObject('TetrahedronSetTopologyContainer', tetrahedra=grid_tetra)
+    tetrahedron_node.addObject('DiagonalMass', totalMass="50.0")
+    tetrahedron_node.addObject('BoxROI', name="boxroi1", box="-0.1 5 -3 0.1 11 3", drawBoxes="1")
+    tetrahedron_node.addObject('FixedProjectiveConstraint', indices="@boxroi1.indices")
+    if g_fem_version == "legacy":
+        tetrahedron_node.addObject('TetrahedronFEMForceField', name="LegacyFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3", method="large")
+    if g_fem_version == "new":
+        tetrahedron_node.addObject('TetrahedronCorotationalFEMForceField', name="NewFEM", template=g_fem_template, youngModulus="4000", poissonRatio="0.3")
+
+def main():
+
+    enable_gui = False
+
+    try:
+        import Sofa.Gui
+        import SofaImGui
+    except:
+        enable_gui = False
+    
+    root = Sofa.Core.Node("root")
+    createScene(root)
+    
+    Sofa.Simulation.initRoot(root)
+
+    if enable_gui:
+        Sofa.Gui.GUIManager.Init("myscene","imgui")
+        Sofa.Gui.GUIManager.createGUI(root, __file__)
+        Sofa.Gui.GUIManager.MainLoop(root)
+        Sofa.Gui.GUIManager.closeGUI()
+    else:
+        import time
+
+        print(f"Running on {g_nb_steps} steps...")
+        start_timer = time.time()
+
+        for iteration in range(g_nb_steps):
+            Sofa.Simulation.animate(root, root.dt.value)
+
+        stop_timer = time.time()
+        print(f"... Done.")
+        print(f"{g_nb_steps} steps done in {stop_timer - start_timer:.3}s ({g_nb_steps/(stop_timer - start_timer):.5} fps).")
+
+
+# Function used only if this script is called from a python environment
+if __name__ == '__main__':
+    main()
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py.view b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py.view
new file mode 100644
index 00000000000..433112afafd
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/Tetrahedron_corotational.py.view
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Camera version="1.0">
+    <!--Vector of 3 reals (x, y, z)-->
+    <position value="17.3888 11.1211 10.0501"/>
+    <!--Quaternion (x, y, z, w)-->
+    <orientation value="-0.136912 0.285982 -0 0.948404"/>
+    <!--Real-->
+    <fieldOfView value="45"/>
+    <!--Real-->
+    <distance value="20.435"/>
+    <!--Real-->
+    <zNear value="0.212191"/>
+    <!--Real-->
+    <zFar value="27.026"/>
+    <!--Int (0 -> Perspective, 1 -> Orthographic)-->
+    <projectionType value="Perspective"/>
+</Camera>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/utilities.py b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/utilities.py
new file mode 100644
index 00000000000..3bdf301c641
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/benchmarks/utilities.py
@@ -0,0 +1,114 @@
+import numpy as np
+
+def generate_regular_grid(nx=10, ny=10, nz=10, min_corner=(0, 0, 0), max_corner=(1, 1, 1)):                                                                                                                       
+    """                                                                                                                                                                                                  
+    Generate a regular grid of hexahedra.                                                                                                                                                                
+                                                                                                                                                                                                       
+    Args:                                                                                                                                                                                                
+      nx, ny, nz: Number of vertices in each direction (grid resolution)                                                                                                                               
+      min_corner: (xmin, ymin, zmin) tuple                                                                                                                                                             
+      max_corner: (xmax, ymax, zmax) tuple                                                                                                                                                             
+                                                                                                                                                                                                       
+    Returns:                                                                                                                                                                                             
+      points: array of shape (nx*ny*nz, 3) - vertex positions                                                                                                                                          
+      hexahedra: array of shape ((nx-1)*(ny-1)*(nz-1), 8) - hexahedra indices                                                                                                                          
+    """
+    xmin, ymin, zmin = min_corner                                                                                                                                                                        
+    xmax, ymax, zmax = max_corner                                                                                                                                                                        
+                                                                                                                                                                                                       
+    # Compute spacing                                                                                                                                                                                    
+    dx = (xmax - xmin) / (nx - 1) if nx > 1 else 0                                                                                                                                                       
+    dy = (ymax - ymin) / (ny - 1) if ny > 1 else 0                                                                                                                                                       
+    dz = (zmax - zmin) / (nz - 1) if nz > 1 else 0                                                                                                                                                       
+                                                                                                                                                                                                       
+    # Generate points                                                                                                                                                                                    
+    points = []                                                                                                                                                                                          
+    for k in range(nz):                                                                                                                                                                                  
+      for j in range(ny):                                                                                                                                                                              
+          for i in range(nx):                                                                                                                                                                          
+              points.append([xmin + i*dx, ymin + j*dy, zmin + k*dz])                                                                                                                                   
+    points = np.array(points)                                                                                                                                                                            
+                                                                                                                                                                                                       
+    # Helper to get point index from grid coordinates                                                                                                                                                    
+    def point_index(i, j, k):                                                                                                                                                                            
+      return nx * (ny * k + j) + i                                                                                                                                                                     
+                                                                                                                                                                                                       
+    # Generate hexahedra (8 vertices per hexa, in SOFA convention)                                                                                                                                       
+    hexahedra = []                                                                                                                                                                                       
+    for k in range(nz - 1):                                                                                                                                                                              
+      for j in range(ny - 1):                                                                                                                                                                          
+          for i in range(nx - 1):                                                                                                                                                                      
+              hexa = [                                                                                                                                                                                 
+                  point_index(i,   j,   k),                                                                                                                                                            
+                  point_index(i+1, j,   k),                                                                                                                                                            
+                  point_index(i+1, j+1, k),                                                                                                                                                            
+                  point_index(i,   j+1, k),                                                                                                                                                            
+                  point_index(i,   j,   k+1),                                                                                                                                                          
+                  point_index(i+1, j,   k+1),                                                                                                                                                          
+                  point_index(i+1, j+1, k+1),                                                                                                                                                          
+                  point_index(i,   j+1, k+1),                                                                                                                                                          
+              ]                                                                                                                                                                                        
+              hexahedra.append(hexa)                                                                                                                                                                   
+    hexahedra = np.array(hexahedra)                                                                                                                                                                      
+                                                                                                                                                                                                       
+    return points, hexahedra
+
+def hexa_to_tetra(hexahedra):                                                                                                                                                                            
+    """                                                                                                                                                                                                  
+    Convert hexahedra to tetrahedra.                                                                                                                                                                     
+                                                                                                                                                                                                       
+    Each hexahedron is split into 5 tetrahedra.                                                                                                                                                          
+                                                                                                                                                                                                       
+    Args:       
+      hexahedra: array of shape (N, 8) - hexahedra vertex indices
+
+    Returns:
+      tetrahedra: array of shape (N*5, 4) - tetrahedra vertex indices
+    """
+    tetrahedra = []
+
+    # 5-tetra decomposition using diagonal 1-3-4-6
+    splits = [
+      [0, 1, 3, 4],
+      [1, 2, 3, 6],
+      [1, 4, 5, 6],
+      [3, 4, 6, 7],
+      [1, 3, 4, 6],  # central tetrahedron                                                                                                                                                             
+    ]
+                                                                                                                                                                                                       
+    for hexa in hexahedra:
+      for split in splits:                                                                                                                                                                             
+          tetrahedra.append([hexa[i] for i in split])
+                                                                                                                                                                                                       
+    return np.array(tetrahedra)
+
+def hexa_to_tetra_symmetric(hexahedra):                                                                                                                                                                  
+    """                                                                                                                                                                                                  
+    Convert hexahedra to tetrahedra using symmetric 6-tetra decomposition.                                                                                                                               
+
+    Each hexahedron is split into 6 tetrahedra around the space diagonal (0-6).                                                                                                                          
+    Better symmetry properties for FEM simulations.
+
+    Args:
+      hexahedra: array of shape (N, 8) - hexahedra vertex indices
+
+    Returns:
+      tetrahedra: array of shape (N*6, 4) - tetrahedra vertex indices
+    """
+    tetrahedra = []
+
+    # 6-tetra symmetric decomposition around diagonal 0-6
+    splits = [
+      [0, 1, 2, 6],
+      [0, 2, 3, 6],
+      [0, 3, 7, 6],
+      [0, 7, 4, 6],
+      [0, 4, 5, 6],
+      [0, 5, 1, 6],
+    ]
+
+    for hexa in hexahedra:
+      for split in splits:
+          tetrahedra.append([hexa[i] for i in split])
+
+    return np.array(tetrahedra)
\ No newline at end of file
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn
new file mode 100644
index 00000000000..6875f9a8849
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn
@@ -0,0 +1,95 @@
+<?xml version="1.0" ?>
+<Node name="root" gravity="0 -9 0" dt="0.02">
+    <RequiredPlugin pluginName="Sofa.Component.Constraint.Projective"/> <!-- Needed to use components [FixedProjectiveConstraint] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Engine.Select"/> <!-- Needed to use components [BoxROI] -->  
+    <RequiredPlugin pluginName="Sofa.Component.LinearSolver.Iterative"/> <!-- Needed to use components [CGLinearSolver] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Mapping.Linear"/> <!-- Needed to use components [IdentityMapping] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Mass"/> <!-- Needed to use components [DiagonalMass] -->  
+    <RequiredPlugin pluginName="Sofa.Component.ODESolver.Backward"/> <!-- Needed to use components [EulerImplicitSolver] -->  
+    <RequiredPlugin pluginName="Sofa.Component.SolidMechanics.FEM.Elastic"/> <!-- Needed to use components [TetrahedronCorotationalFEMForceField] -->  
+    <RequiredPlugin pluginName="Sofa.Component.StateContainer"/> <!-- Needed to use components [MechanicalObject] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Dynamic"/> <!-- Needed to use components [HexahedronSetTopologyContainer,HexahedronSetTopologyModifier,QuadSetTopologyContainer,QuadSetTopologyModifier] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Container.Grid"/> <!-- Needed to use components [RegularGridTopology] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Topology.Mapping"/> <!-- Needed to use components [Hexa2QuadTopologicalMapping] -->  
+    <RequiredPlugin pluginName="Sofa.Component.Visual"/> <!-- Needed to use components [VisualStyle] -->  
+    <RequiredPlugin pluginName="Sofa.GL.Component.Rendering3D"/> <!-- Needed to use components [OglModel] -->
+    <RequiredPlugin pluginName="SofaCUDA"/> <!-- Needed to use components [OglModel] -->
+  
+    <VisualStyle displayFlags="showBehaviorModels showVisual" />
+    
+    <DefaultAnimationLoop />
+    <DefaultVisualManagerLoop />
+    
+    <Node name="TetrahedronCorotationalFEMForceField-GPU-Green">
+        <Node name="Beam">
+            <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+            <TetrahedronSetTopologyContainer name="BeamTopo" />
+            <TetrahedronSetTopologyModifier name="Modifier" />
+
+            <Hexa2TetraTopologicalMapping input="@grid" output="@BeamTopo" />
+        </Node>
+        
+        <Node name="Simulated">
+            <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+            <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+            
+            <MechanicalObject position="@../Beam/grid.position" name="Volume" template="CudaVec3f"/>
+
+            <TetrahedronSetTopologyContainer name="Container" src="@../Beam/BeamTopo"/>
+
+            <DiagonalMass totalMass="50.0" />
+            <BoxROI name="ROI1" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+            
+            <FixedProjectiveConstraint indices="@ROI1.indices" />
+            <TetrahedronCorotationalFEMForceField name="FEM" template="CudaVec3f" youngModulus="2000" poissonRatio="0.3" />
+            <Node name="surface">
+                <TriangleSetTopologyContainer name="Container" />
+                <TriangleSetTopologyModifier name="Modifier" />
+                <TriangleSetGeometryAlgorithms template="CudaVec3f" name="GeomAlgo" />
+                
+                <Tetra2TriangleTopologicalMapping input="@../Container" output="@Container" />
+                <Node name="Visu">
+                    <OglModel name="Visual" color="green" />
+                    <IdentityMapping input="@../../Volume" output="@Visual" />
+                </Node>
+            </Node>
+        </Node>  
+    </Node>
+    
+
+    <Node name="TetrahedronCorotationalFEMForceField-CPU-red">
+        <Node name="Beam">
+            <RegularGridTopology name="grid" n="40 10 10" min="0 6 -2" max="16 10 2" />
+            <TetrahedronSetTopologyContainer name="BeamTopo" />
+            <TetrahedronSetTopologyModifier name="Modifier" />
+
+            <Hexa2TetraTopologicalMapping input="@grid" output="@BeamTopo" />
+        </Node>
+        
+        <Node name="Simulated">
+            <EulerImplicitSolver name="cg_odesolver" rayleighStiffness="0.1" rayleighMass="0.1" />
+            <CGLinearSolver iterations="20" name="linear solver" tolerance="1.0e-12" threshold="1.0e-12" />
+            
+            <MechanicalObject position="@../Beam/grid.position" name="Volume" template="Vec3d"/>
+
+            <TetrahedronSetTopologyContainer name="Container" src="@../Beam/BeamTopo"/>
+
+            <DiagonalMass totalMass="50.0" />
+            <BoxROI name="ROI1" box="-0.1 5 -3 0.1 11 3" drawBoxes="1" />
+            
+            <FixedProjectiveConstraint indices="@ROI1.indices" />
+            <TetrahedronCorotationalFEMForceField name="FEM" youngModulus="2000" poissonRatio="0.3" />
+            <Node name="surface">
+                <TriangleSetTopologyContainer name="Container" />
+                <TriangleSetTopologyModifier name="Modifier" />
+                <TriangleSetGeometryAlgorithms template="Vec3d" name="GeomAlgo" />
+                
+                <Tetra2TriangleTopologicalMapping input="@../Container" output="@Container" />
+                <Node name="Visu">
+                    <OglModel name="Visual" color="red" />
+                    <IdentityMapping input="@../../Volume" output="@Visual" />
+                </Node>
+            </Node>
+        </Node> 
+    </Node>
+</Node>
diff --git a/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn.view b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn.view
new file mode 100644
index 00000000000..3bf10e74929
--- /dev/null
+++ b/applications/plugins/SofaCUDA/examples/ElementFEMForcefield/cpu-gpu_validation/ElementCorotationalFEMForceField_tetra_cpu_gpu.scn.view
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Camera version="1.0">
+    <!--Vector of 3 reals (x, y, z)-->
+    <position value="16.6922 13.3665 14.4075"/>
+    <!--Quaternion (x, y, z, w)-->
+    <orientation value="-0.209766 0.206744 -0.0247238 0.955324"/>
+    <!--Real-->
+    <fieldOfView value="45"/>
+    <!--Real-->
+    <distance value="23.4743"/>
+    <!--Real-->
+    <zNear value="0.212191"/>
+    <!--Real-->
+    <zFar value="29.6313"/>
+    <!--Int (0 -> Perspective, 1 -> Orthographic)-->
+    <projectionType value="Perspective"/>
+</Camera>
diff --git a/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml b/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml
index 4e1fab99d9e..2133c327c4a 100644
--- a/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml
+++ b/examples/Validation/cantilever_beam/CantileverBeam_ElementFEMForceField.xml
@@ -27,7 +27,7 @@
 
     <EulerImplicitSolver name="backward_Euler" rayleighStiffness="0.1" rayleighMass="0.1" />
 
-    <RegularGridTopology name="grid" min="-0.01 -0.01 0" max="0.01 0.01 0.2" n="5 5 30"/>
+    <RegularGridTopology name="grid" min="-0.01 -0.01 0" max="0.01 0.01 0.2" n="10 10 60"/>
     <MechanicalObject template="Vec3" name="state" showObject="true"/>
 
     <MeshMatrixMass massDensity="1100" topology="@grid"/>