Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion Engine/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ torqueAddSourceDirectories("windowManager" "windowManager/torque" "windowManager
torqueAddSourceDirectories("scene" "scene/culling" "scene/zones" "scene/mixin")

# Handle math
torqueAddSourceDirectories("math" "math/util")
torqueAddSourceDirectories("math" "math/util" "math/public" "math/impl") # note impl must skip the .inl files, never use them in engine code.

# Handle persistence
set(TORQUE_INCLUDE_DIRECTORIES ${TORQUE_INCLUDE_DIRECTORIES} "persistence/rapidjson")
Expand Down Expand Up @@ -496,6 +496,37 @@ else()
set_target_properties(${TORQUE_APP_NAME} PROPERTIES LINK_FLAGS "-Wl,-rpath,./")
endif()

string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" ARCH)

set(IS_X86 FALSE)
set(IS_ARM FALSE)

if(ARCH MATCHES "x86_64|amd64|i[3-6]86")
set(IS_X86 TRUE)
endif()

if(ARCH MATCHES "arm64|aarch64")
set(IS_ARM TRUE)
endif()

# always available
add_math_backend(scalar MATH_SIMD_SCALAR)
message(STATUS "Processor: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "IS_X86=${IS_X86}")
message(STATUS "IS_ARM=${IS_ARM}")

# x86 family
if(IS_X86)
add_math_backend(sse2 MATH_SIMD_SSE2)
add_math_backend(sse41 MATH_SIMD_SSE41)
add_math_backend(avx MATH_SIMD_AVX)
add_math_backend(avx2 MATH_SIMD_AVX2)
endif()

# ARM family
if(IS_ARM)
add_math_backend(neon MATH_SIMD_NEON)
endif()

if(MSVC)
# Match projectGenerator naming for executables
Expand Down
4 changes: 2 additions & 2 deletions Engine/source/environment/meshRoad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3406,7 +3406,7 @@ MatrixF MeshRoad::getNodeTransform( U32 idx )
mat.setColumn( 2, node.normal );
mat.setColumn( 3, node.point );

AssertFatal( m_matF_determinant( mat ) != 0.0f, "no inverse!");
AssertFatal(mat.determinant() != 0.0f, "no inverse!");

return mat;
}
Expand Down Expand Up @@ -3456,7 +3456,7 @@ void MeshRoad::calcSliceTransform( U32 idx, MatrixF &mat )
mat.setColumn( 2, slice.normal );
mat.setColumn( 3, slice.p1 );

AssertFatal( m_matF_determinant( mat ) != 0.0f, "no inverse!");
AssertFatal(mat.determinant() != 0.0f, "no inverse!");
}

F32 MeshRoad::getRoadLength() const
Expand Down
2 changes: 1 addition & 1 deletion Engine/source/environment/river.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2139,7 +2139,7 @@ MatrixF River::getNodeTransform( U32 idx ) const
mat.setColumn( 2, node.normal );
mat.setColumn( 3, node.point );

AssertFatal( m_matF_determinant( mat ) != 0.0f, "no inverse!");
AssertFatal( mat.determinant() != 0.0f, "no inverse!");

return mat;
}
Expand Down
122 changes: 122 additions & 0 deletions Engine/source/math/impl/float3_impl.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#pragma once
#include <cmath> // for sqrtf, etc.
#include "../mConstants.h"

// Safely loads a float3 -> simd 4 lane backend
namespace math_backend::float3
{
//----------------------------------------------------------
// Add two float4 vectors: r = a + b
inline void float3_add_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vb = v_load3_vec(b);
f32x4 vr = v_add(va, vb);
v_store3(r, vr);
}

// Subtract: r = a - b
inline void float3_sub_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vb = v_load3_vec(b);
f32x4 vr = v_sub(va, vb);
v_store3(r, vr);
}

// Multiply element-wise: r = a * b
inline void float3_mul_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vb = v_load3_vec(b);
f32x4 vr = v_mul(va, vb);
v_store3(r, vr);
}

// Multiply by scalar: r = a * s
inline void float3_mul_scalar_impl(const float* a, float s, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vs = v_set1(s);
f32x4 vr = v_mul(va, vs);
v_store3(r, vr);
}

// Divide element-wise: r = a / b
inline void float3_div_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vb = v_load3_vec(b);
f32x4 vr = v_div(va, vb);
v_store3(r, vr);
}

// Divide by scalar: r = a / s
inline void float3_div_scalar_impl(const float* a, float s, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vs = v_set1(s);
f32x4 vr = v_div(va, vs);
v_store3(r, vr);
}

// Dot product: returns scalar
inline float float3_dot_impl(const float* a, const float* b)
{
f32x4 va = v_load3_vec(a);
f32x4 vb = v_load3_vec(b);
f32x4 vdot = v_dot3(va, vb);
return v_extract0(vdot); // first lane is the sum of 3 elements
}

// Length squared
inline float float3_length_squared_impl(const float* a)
{
return float3_dot_impl(a, a);
}

// Length
inline float float3_length_impl(const float* a)
{
return std::sqrt(float3_length_squared_impl(a));
}

// Normalize in-place
inline void float3_normalize_impl(float* a)
{
f32x4 va = v_load3_vec(a);
f32x4 vr = v_normalize3(va);
v_store3(a, vr);
}

// Normalize with magnitude: r = normalize(a) * r
inline void float3_normalize_mag_impl(float* a, float r)
{
f32x4 va = v_load3_vec(a);

// invLen = r / sqrt(dot(a,a)) = r * rsqrt(dot(a,a))
f32x4 invLen = v_mul(v_set1(r), v_rsqrt_nr(v_dot3(va, va)));

f32x4 vnorm = v_mul(va, invLen);
v_store3(a, vnorm);
}

// Linear interpolation: r = from + (to - from) * f
inline void float3_lerp_impl(const float* from, const float* to, float f, float* r)
{
f32x4 vfrom = v_load3_vec(from);
f32x4 vto = v_load3_vec(to);
f32x4 vf = v_set1(f);
f32x4 vr = v_add(vfrom, v_mul(vf, v_sub(vto, vfrom)));
v_store3(r, vr);
}

inline void float3_cross_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load3_vec(a);
f32x4 vb = v_load3_vec(b);
f32x4 vcross = v_cross(va, vb);
v_store3(r, vcross);
}

}
123 changes: 123 additions & 0 deletions Engine/source/math/impl/float4_impl.inl
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#pragma once
#include <cmath> // for sqrtf, etc.
#include "../mConstants.h"

namespace math_backend::float4
{

//----------------------------------------------------------
// Add two float4 vectors: r = a + b
inline void float4_add_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load(a);
f32x4 vb = v_load(b);
f32x4 vr = v_add(va, vb);
v_store(r, vr);
}

// Subtract: r = a - b
inline void float4_sub_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load(a);
f32x4 vb = v_load(b);
f32x4 vr = v_sub(va, vb);
v_store(r, vr);
}

// Multiply element-wise: r = a * b
inline void float4_mul_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load(a);
f32x4 vb = v_load(b);
f32x4 vr = v_mul(va, vb);
v_store(r, vr);
}

// Multiply by scalar: r = a * s
inline void float4_mul_scalar_impl(const float* a, float s, float* r)
{
f32x4 va = v_load(a);
f32x4 vs = v_set1(s);
f32x4 vr = v_mul(va, vs);
v_store(r, vr);
}

// Divide element-wise: r = a / b
inline void float4_div_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load(a);
f32x4 vb = v_load(b);
f32x4 vr = v_div(va, vb);
v_store(r, vr);
}

// Divide by scalar: r = a / s
inline void float4_div_scalar_impl(const float* a, float s, float* r)
{
f32x4 va = v_load(a);
f32x4 vs = v_set1(s);
f32x4 vr = v_div(va, vs);
v_store(r, vr);
}

// Dot product: returns scalar
inline float float4_dot_impl(const float* a, const float* b)
{
f32x4 va = v_load(a);
f32x4 vb = v_load(b);
f32x4 vdot = v_dot4(va, vb); // calls ISA-specific implementation
return v_extract0(vdot);
}

// Length squared
inline float float4_length_squared_impl(const float* a)
{
return float4_dot_impl(a, a);
}

// Length
inline float float4_length_impl(const float* a)
{
return std::sqrt(float4_length_squared_impl(a));
}

// Normalize in-place
inline void float4_normalize_impl(float* a)
{
f32x4 va = v_load(a);
f32x4 invLen = v_rsqrt_nr(v_dot4(va, va)); // fully abstracted
f32x4 vnorm = v_mul(va, invLen);
v_store(a, vnorm);
}

// Normalize with magnitude: r = normalize(a) * r
inline void float4_normalize_mag_impl(float* a, float r)
{
f32x4 va = v_load(a);

// invLen = r / sqrt(dot(a,a)) = r * rsqrt(dot(a,a))
f32x4 invLen = v_mul(v_set1(r), v_rsqrt_nr(v_dot4(va, va)));

f32x4 vnorm = v_mul(va, invLen);
v_store(a, vnorm);
}

// Linear interpolation: r = from + (to - from) * f
inline void float4_lerp_impl(const float* from, const float* to, float f, float* r)
{
f32x4 vfrom = v_load(from);
f32x4 vto = v_load(to);
f32x4 vf = v_set1(f);
f32x4 vr = v_add(vfrom, v_mul(vf, v_sub(vto, vfrom)));
v_store(r, vr);
}

inline void float4_cross_impl(const float* a, const float* b, float* r)
{
f32x4 va = v_load(a);
f32x4 vb = v_load(b);
f32x4 vcross = v_cross(va, vb);
v_store(r, vcross);
}

} // namespace math_backend::float4
Loading
Loading