From 7fc448a8a946e1f099796278dd8b93f2bf477d39 Mon Sep 17 00:00:00 2001
From: Stefan Werner <stefan.werner@intel.com>
Date: Thu, 5 Mar 2026 09:25:03 +0100
Subject: [PATCH] Fixed robust node intersection for parallel rays:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Problem
The original code computed tNear and tFar slab distances in a single expression: (bound - org) * rdir. When a ray direction component is exactly 0.0f, rdir becomes inf, and (bound - org) * inf can produce NaN (specifically when bound == org, yielding 0 * inf = NaN). This caused rays parallel to a slab to either incorrectly miss or hit BVH nodes.

What changed
Separated loads from arithmetic — The AABB bound values (lowerX/Y/Z, upperX/Y/Z) are now loaded into named variables first, so they can be reused for the parallel-ray check.

Initial slab distances computed normally — tNearX0..tFarZ0 are computed as before, but stored as intermediate values.

Parallel-ray detection — Three boolean masks (parX, parY, parZ) check if each ray direction component is exactly zero.

Outside-slab detection — For each parallel axis, outX/Y/Z checks whether the ray origin lies outside the bounding box on that axis. A ray parallel to a slab and outside it can never intersect.

Infinity substitution via select — For parallel axes, tNear is forced to -inf and tFar to +inf, effectively making that slab a no-op in the max/min reduction (the slab is "infinitely wide"). This avoids NaN propagation.

Final mask includes outX|outY|outZ — Even though the slab distances are now clean, nodes are explicitly rejected if the ray is parallel to and outside the box on any axis: (tNear <= tFar) & !(outX | outY | outZ).

In summary
This is a correctness fix for the robust BVH node intersection when rays are axis-aligned (direction component = 0). It prevents NaN results from 0 * inf and properly handles the case where a parallel ray is outside the bounding box slab.
---
 kernels/bvh/node_intersector1.h | 39 +++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 7 deletions(-)
diff --git a/kernels/bvh/node_intersector1.h b/kernels/bvh/node_intersector1.h
index 17641fa888..80749892ac 100644
--- a/kernels/bvh/node_intersector1.h
+++ b/kernels/bvh/node_intersector1.h
@@ -539,15 +539,40 @@ namespace embree
     template<int N>
       __forceinline size_t intersectNodeRobust(const typename BVHN<N>::AABBNode* node, const TravRay<N,true>& ray, vfloat<N>& dist)
     {
-      const vfloat<N> tNearX = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX)) - ray.org.x) * ray.rdir_near.x;
-      const vfloat<N> tNearY = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY)) - ray.org.y) * ray.rdir_near.y;
-      const vfloat<N> tNearZ = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ)) - ray.org.z) * ray.rdir_near.z;
-      const vfloat<N> tFarX  = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX )) - ray.org.x) * ray.rdir_far.x;
-      const vfloat<N> tFarY  = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY )) - ray.org.y) * ray.rdir_far.y;
-      const vfloat<N> tFarZ  = (vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ )) - ray.org.z) * ray.rdir_far.z;
+      const vfloat<N> lowerX = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearX));
+      const vfloat<N> lowerY = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearY));
+      const vfloat<N> lowerZ = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.nearZ));
+      const vfloat<N> upperX = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farX ));
+      const vfloat<N> upperY = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farY ));
+      const vfloat<N> upperZ = vfloat<N>::load((float*)((const char*)&node->lower_x+ray.farZ ));
+
+      const vfloat<N> tNearX0 = (lowerX - ray.org.x) * ray.rdir_near.x;
+      const vfloat<N> tNearY0 = (lowerY - ray.org.y) * ray.rdir_near.y;
+      const vfloat<N> tNearZ0 = (lowerZ - ray.org.z) * ray.rdir_near.z;
+      const vfloat<N> tFarX0  = (upperX - ray.org.x) * ray.rdir_far.x;
+      const vfloat<N> tFarY0  = (upperY - ray.org.y) * ray.rdir_far.y;
+      const vfloat<N> tFarZ0  = (upperZ - ray.org.z) * ray.rdir_far.z;
+
+      const vbool<N> parX = ray.dir.x == vfloat<N>(0.0f);
+      const vbool<N> parY = ray.dir.y == vfloat<N>(0.0f);
+      const vbool<N> parZ = ray.dir.z == vfloat<N>(0.0f);
+
+      const vbool<N> outX = parX & ((ray.org.x < lowerX) | (ray.org.x > upperX));
+      const vbool<N> outY = parY & ((ray.org.y < lowerY) | (ray.org.y > upperY));
+      const vbool<N> outZ = parZ & ((ray.org.z < lowerZ) | (ray.org.z > upperZ));
+
+      const vfloat<N> pinf = std::numeric_limits<float>::infinity();
+      const vfloat<N> ninf = -pinf;
+      const vfloat<N> tNearX = select(parX, ninf, tNearX0);
+      const vfloat<N> tNearY = select(parY, ninf, tNearY0);
+      const vfloat<N> tNearZ = select(parZ, ninf, tNearZ0);
+      const vfloat<N> tFarX  = select(parX, pinf, tFarX0);
+      const vfloat<N> tFarY  = select(parY, pinf, tFarY0);
+      const vfloat<N> tFarZ  = select(parZ, pinf, tFarZ0);
+
       const vfloat<N> tNear = max(tNearX,tNearY,tNearZ,ray.tnear);
       const vfloat<N> tFar  = min(tFarX ,tFarY ,tFarZ ,ray.tfar);
-      const vbool<N> vmask = tNear <= tFar;
+      const vbool<N> vmask = (tNear <= tFar) & !(outX | outY | outZ);
       const size_t mask = movemask(vmask);
       dist = tNear;
       return mask;