Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions kernels/bvh/node_intersector1.h
Original file line number Diff line number Diff line change
Expand Up @@ -539,15 +539,40 @@ namespace embree
template<int N>
__forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
{
const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
const vfloat<N> tFarX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
const vfloat<N> tFarY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
const vfloat<N> tFarZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
const vfloat<N> lowerX = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX));
const vfloat<N> lowerY = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY));
const vfloat<N> lowerZ = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ));
const vfloat<N> upperX = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX ));
const vfloat<N> upperY = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY ));
const vfloat<N> upperZ = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ ));

const vfloat<N> tNearX0 = (lowerX - ray.org.x) * ray.rdir_near.x;
const vfloat<N> tNearY0 = (lowerY - ray.org.y) * ray.rdir_near.y;
const vfloat<N> tNearZ0 = (lowerZ - ray.org.z) * ray.rdir_near.z;
const vfloat<N> tFarX0 = (upperX - ray.org.x) * ray.rdir_far.x;
const vfloat<N> tFarY0 = (upperY - ray.org.y) * ray.rdir_far.y;
const vfloat<N> tFarZ0 = (upperZ - ray.org.z) * ray.rdir_far.z;

const vbool<N> parX = ray.dir.x == vfloat<N>(0.0f);
const vbool<N> parY = ray.dir.y == vfloat<N>(0.0f);
const vbool<N> parZ = ray.dir.z == vfloat<N>(0.0f);

Comment on lines +549 to +559
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still evaluates (bound - org) * rdir for parallel lanes, so the original 0 * inf invalid operation (and resulting NaN) can still be produced before being overwritten by select. If the goal is to fully avoid generating NaNs / FP invalid exceptions (e.g., when FP exceptions/status flags matter), consider masking the operands before the multiply for par* lanes (e.g., adjust the delta and/or reciprocal direction for parallel lanes) so the multiply never computes 0 * inf in the first place.

Suggested change
const vfloat<N> tNearX0 = (lowerX - ray.org.x) * ray.rdir_near.x;
const vfloat<N> tNearY0 = (lowerY - ray.org.y) * ray.rdir_near.y;
const vfloat<N> tNearZ0 = (lowerZ - ray.org.z) * ray.rdir_near.z;
const vfloat<N> tFarX0 = (upperX - ray.org.x) * ray.rdir_far.x;
const vfloat<N> tFarY0 = (upperY - ray.org.y) * ray.rdir_far.y;
const vfloat<N> tFarZ0 = (upperZ - ray.org.z) * ray.rdir_far.z;
const vbool<N> parX = ray.dir.x == vfloat<N>(0.0f);
const vbool<N> parY = ray.dir.y == vfloat<N>(0.0f);
const vbool<N> parZ = ray.dir.z == vfloat<N>(0.0f);
/* detect parallel rays per axis */
const vbool<N> parX = ray.dir.x == vfloat<N>(0.0f);
const vbool<N> parY = ray.dir.y == vfloat<N>(0.0f);
const vbool<N> parZ = ray.dir.z == vfloat<N>(0.0f);
/* mask reciprocal directions for parallel lanes to avoid 0 * inf */
const vfloat<N> safe_rdir_near_x = select(parX, vfloat<N>(0.0f), ray.rdir_near.x);
const vfloat<N> safe_rdir_near_y = select(parY, vfloat<N>(0.0f), ray.rdir_near.y);
const vfloat<N> safe_rdir_near_z = select(parZ, vfloat<N>(0.0f), ray.rdir_near.z);
const vfloat<N> safe_rdir_far_x = select(parX, vfloat<N>(0.0f), ray.rdir_far.x);
const vfloat<N> safe_rdir_far_y = select(parY, vfloat<N>(0.0f), ray.rdir_far.y);
const vfloat<N> safe_rdir_far_z = select(parZ, vfloat<N>(0.0f), ray.rdir_far.z);
const vfloat<N> tNearX0 = (lowerX - ray.org.x) * safe_rdir_near_x;
const vfloat<N> tNearY0 = (lowerY - ray.org.y) * safe_rdir_near_y;
const vfloat<N> tNearZ0 = (lowerZ - ray.org.z) * safe_rdir_near_z;
const vfloat<N> tFarX0 = (upperX - ray.org.x) * safe_rdir_far_x;
const vfloat<N> tFarY0 = (upperY - ray.org.y) * safe_rdir_far_y;
const vfloat<N> tFarZ0 = (upperZ - ray.org.z) * safe_rdir_far_z;

Copilot uses AI. Check for mistakes.
const vbool<N> outX = parX & ((ray.org.x < lowerX) | (ray.org.x > upperX));
const vbool<N> outY = parY & ((ray.org.y < lowerY) | (ray.org.y > upperY));
const vbool<N> outZ = parZ & ((ray.org.z < lowerZ) | (ray.org.z > upperZ));
Comment on lines +556 to +562
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The outside-slab check uses lower*/upper* values that are loaded via ray.near*/ray.far*. Those offsets can represent swapped bounds depending on ray direction/sign conventions (and can be sensitive to -0.0), which can invert the slab range and incorrectly mark origins as outside. For the outside-slab test, compare against the true min/max bounds per axis (e.g., compute minX = min(lowerX, upperX) and maxX = max(lowerX, upperX) (same for Y/Z) and then test org < min or org > max).

Suggested change
const vbool<N> parX = ray.dir.x == vfloat<N>(0.0f);
const vbool<N> parY = ray.dir.y == vfloat<N>(0.0f);
const vbool<N> parZ = ray.dir.z == vfloat<N>(0.0f);
const vbool<N> outX = parX & ((ray.org.x < lowerX) | (ray.org.x > upperX));
const vbool<N> outY = parY & ((ray.org.y < lowerY) | (ray.org.y > upperY));
const vbool<N> outZ = parZ & ((ray.org.z < lowerZ) | (ray.org.z > upperZ));
const vfloat<N> minX = min(lowerX, upperX);
const vfloat<N> maxX = max(lowerX, upperX);
const vfloat<N> minY = min(lowerY, upperY);
const vfloat<N> maxY = max(lowerY, upperY);
const vfloat<N> minZ = min(lowerZ, upperZ);
const vfloat<N> maxZ = max(lowerZ, upperZ);
const vbool<N> parX = ray.dir.x == vfloat<N>(0.0f);
const vbool<N> parY = ray.dir.y == vfloat<N>(0.0f);
const vbool<N> parZ = ray.dir.z == vfloat<N>(0.0f);
const vbool<N> outX = parX & ((ray.org.x < minX) | (ray.org.x > maxX));
const vbool<N> outY = parY & ((ray.org.y < minY) | (ray.org.y > maxY));
const vbool<N> outZ = parZ & ((ray.org.z < minZ) | (ray.org.z > maxZ));

Copilot uses AI. Check for mistakes.

const vfloat<N> pinf = std::numeric_limits<float>::infinity();
const vfloat<N> ninf = -pinf;
const vfloat<N> tNearX = select(parX, ninf, tNearX0);
const vfloat<N> tNearY = select(parY, ninf, tNearY0);
const vfloat<N> tNearZ = select(parZ, ninf, tNearZ0);
const vfloat<N> tFarX = select(parX, pinf, tFarX0);
const vfloat<N> tFarY = select(parY, pinf, tFarY0);
const vfloat<N> tFarZ = select(parZ, pinf, tFarZ0);

const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
const vfloat<N> tFar = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
const vbool<N> vmask = tNear <= tFar;
const vbool<N> vmask = (tNear <= tFar) & !(outX | outY | outZ);
const size_t mask = movemask(vmask);
dist = tNear;
return mask;
Expand Down