Cycles: Distance optimization for QBVH

This commit implements heuristic which allows to skip nodes pushed to the stack
from intersection if distance to them is larger than the distance to the current
intersection.

This should solve speed regression which i didn't notice in the original QBVH
commit (which could have because i had WIP version of this patch applied in my
local branch).

From quick tests speed seems to be much closer to what is was with regular BVH.

There's still some possible code cleanup, but they'll need a bit of assembly
code check and now i want to make it so artists can happily use Cycles over the
holidays.
This commit is contained in:
2014-12-25 22:40:02 +05:00
parent 30e3aa1561
commit cd095aae13
6 changed files with 201 additions and 99 deletions

View File

@@ -391,6 +391,38 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra
*t *= len;
}
#ifdef __QBVH__
/* Same as above, but optimized for QBVH scene intersection,
* which needs to modify two max distances.
*
* TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
* so we can avoid having this duplication.
*/
ccl_device_inline void qbvh_instance_push(KernelGlobals *kg,
int object,
const Ray *ray,
float3 *P,
float3 *dir,
float3 *idir,
float *t,
float *t1)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
*P = transform_point(&tfm, ray->P);
float len;
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
*idir = bvh_inverse_direction(*dir);
if(*t != FLT_MAX)
*t *= len;
if(*t1 != -FLT_MAX)
*t1 *= len;
}
#endif
/* Transorm ray to exit static object in BVH */
ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t)
@@ -436,6 +468,33 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, c
*t *= len;
}
#ifdef __QBVH__
/* Same as above, but optimized for QBVH scene intersection,
* which needs to modify two max distances.
*
* TODO(sergey): Investigate if passing NULL instead of t1 gets optimized
* so we can avoid having this duplication.
*/
ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, float *t1, Transform *tfm)
{
Transform itfm;
*tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm);
*P = transform_point(&itfm, ray->P);
float len;
*dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len));
*idir = bvh_inverse_direction(*dir);
if(*t != FLT_MAX)
*t *= len;
if(*t1 != -FLT_MAX)
*t1 *= len;
}
#endif
/* Transorm ray to exit motion blurred object in BVH */
ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm)

View File

@@ -14,32 +14,31 @@
* limitations under the License.
*/
ccl_device_inline void qbvh_stack_sort(int *__restrict s1,
int *__restrict s2,
int *__restrict s3,
float *__restrict d1,
float *__restrict d2,
float *__restrict d3)
struct QBVHStackItem {
int addr;
float dist;
};
/* TOOD(sergey): Investigate if using instrinsics helps here. */
ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
QBVHStackItem *__restrict s2,
QBVHStackItem *__restrict s3)
{
if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); }
if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
if(s2->dist < s1->dist) { util_swap(s2, s1); }
if(s3->dist < s2->dist) { util_swap(s3, s2); }
if(s2->dist < s1->dist) { util_swap(s2, s1); }
}
ccl_device_inline void qbvh_stack_sort(int *__restrict s1,
int *__restrict s2,
int *__restrict s3,
int *__restrict s4,
float *__restrict d1,
float *__restrict d2,
float *__restrict d3,
float *__restrict d4)
ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1,
QBVHStackItem *__restrict s2,
QBVHStackItem *__restrict s3,
QBVHStackItem *__restrict s4)
{
if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); }
if(*d4 < *d3) { util_swap(s4, s3); util_swap(d4, d3); }
if(*d3 < *d1) { util_swap(s3, s1); util_swap(d3, d1); }
if(*d4 < *d2) { util_swap(s4, s2); util_swap(d4, d2); }
if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); }
if(s2->dist < s1->dist) { util_swap(s2, s1); }
if(s4->dist < s3->dist) { util_swap(s4, s3); }
if(s3->dist < s1->dist) { util_swap(s3, s1); }
if(s4->dist < s2->dist) { util_swap(s4, s2); }
if(s3->dist < s2->dist) { util_swap(s3, s2); }
}
ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg,

View File

@@ -39,8 +39,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
QBVHStackItem traversalStack[BVH_STACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
@@ -128,13 +128,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -143,9 +145,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = c1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = c0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -155,12 +159,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&d2, &d1, &d0);
nodeAddr = traversalStack[stackPtr];
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
continue;
}
@@ -172,17 +176,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
traversalStack[stackPtr] = c3;
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3],
&d3, &d2, &d1, &d0);
&traversalStack[stackPtr - 3]);
}
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -197,7 +202,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
#ifdef __VISIBILITY_FLAG__
@@ -315,7 +320,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
@@ -368,7 +373,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */

View File

@@ -42,8 +42,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
QBVHStackItem traversalStack[BVH_STACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
@@ -124,13 +124,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -139,9 +141,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -151,12 +155,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&d2, &d1, &d0);
nodeAddr = traversalStack[stackPtr];
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
continue;
}
@@ -168,17 +172,18 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
traversalStack[stackPtr] = c3;
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3],
&d3, &d2, &d1, &d0);
&traversalStack[stackPtr - 3]);
}
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -193,7 +198,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
/* Primitive intersection. */
@@ -250,13 +255,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -290,7 +295,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */

View File

@@ -47,12 +47,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
QBVHStackItem traversalStack[BVH_STACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
traversalStack[0].dist = -FLT_MAX;
/* Traversal variables in registers. */
int stackPtr = 0;
int nodeAddr = kernel_data.bvh.root;
float nodeDist = -FLT_MAX;
/* Ray parameters in registers. */
float3 P = ray->P;
@@ -100,6 +102,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
do {
/* Traverse internal nodes. */
while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) {
if(UNLIKELY(nodeDist > isect->t)) {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
continue;
}
int traverseChild;
ssef dist;
@@ -152,8 +162,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
/* One child is hit, continue with that child. */
int r = __bscf(traverseChild);
float d0 = ((float*)&dist)[r];
if(traverseChild == 0) {
nodeAddr = __float_as_int(cnodes[r]);
nodeDist = d0;
continue;
}
@@ -161,7 +173,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* closer child.
*/
int c0 = __float_as_int(cnodes[r]);
float d0 = ((float*)&dist)[r];
r = __bscf(traverseChild);
int c1 = __float_as_int(cnodes[r]);
float d1 = ((float*)&dist)[r];
@@ -169,13 +180,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -184,9 +197,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -196,12 +211,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&d2, &d1, &d0);
nodeAddr = traversalStack[stackPtr];
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
continue;
}
@@ -213,22 +229,32 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
traversalStack[stackPtr] = c3;
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3],
&d3, &d2, &d1, &d0);
&traversalStack[stackPtr - 3]);
}
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
}
/* If node is leaf, fetch triangle list. */
if(nodeAddr < 0) {
if(UNLIKELY(nodeDist > isect->t)) {
/* Pop. */
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
continue;
}
float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6);
int primAddr = __float_as_int(leaf.x);
@@ -238,7 +264,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
#ifdef __VISIBILITY_FLAG__
@@ -299,9 +326,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
object = kernel_tex_fetch(__prim_object, -primAddr-1);
#if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm);
qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_tfm);
#else
bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t);
qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist);
#endif
if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; }
@@ -318,7 +345,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
traversalStack[stackPtr].dist = -FLT_MAX;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
@@ -351,7 +379,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
nodeDist = traversalStack[stackPtr].dist;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */

View File

@@ -38,8 +38,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
*/
/* Traversal stack in CUDA thread-local memory. */
int traversalStack[BVH_STACK_SIZE];
traversalStack[0] = ENTRYPOINT_SENTINEL;
QBVHStackItem traversalStack[BVH_STACK_SIZE];
traversalStack[0].addr = ENTRYPOINT_SENTINEL;
/* Traversal variables in registers. */
int stackPtr = 0;
@@ -130,13 +130,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
if(d1 < d0) {
nodeAddr = c1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
continue;
}
else {
nodeAddr = c0;
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
continue;
}
}
@@ -145,9 +147,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
* all nodes onto the stack to sort them there.
*/
++stackPtr;
traversalStack[stackPtr] = c1;
traversalStack[stackPtr].addr = c1;
traversalStack[stackPtr].dist = d1;
++stackPtr;
traversalStack[stackPtr] = c0;
traversalStack[stackPtr].addr = c0;
traversalStack[stackPtr].dist = d0;
/* Three children are hit, push all onto stack and sort 3
* stack items, continue with closest child.
@@ -157,12 +161,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
float d2 = ((float*)&dist)[r];
if(traverseChild == 0) {
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&d2, &d1, &d0);
nodeAddr = traversalStack[stackPtr];
&traversalStack[stackPtr - 2]);
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
continue;
}
@@ -174,17 +178,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int c3 = __float_as_int(cnodes[r]);
float d3 = ((float*)&dist)[r];
++stackPtr;
traversalStack[stackPtr] = c3;
traversalStack[stackPtr].addr = c3;
traversalStack[stackPtr].dist = d3;
++stackPtr;
traversalStack[stackPtr] = c2;
traversalStack[stackPtr].addr = c2;
traversalStack[stackPtr].dist = d2;
qbvh_stack_sort(&traversalStack[stackPtr],
&traversalStack[stackPtr - 1],
&traversalStack[stackPtr - 2],
&traversalStack[stackPtr - 3],
&d3, &d2, &d1, &d0);
&traversalStack[stackPtr - 3]);
}
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
@@ -199,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
int primAddr2 = __float_as_int(leaf.y);
/* Pop. */
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
/* Primitive intersection. */
@@ -270,14 +275,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
++stackPtr;
traversalStack[stackPtr] = ENTRYPOINT_SENTINEL;
traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL;
nodeAddr = kernel_tex_fetch(__object_node, object);
}
else {
/* Pop. */
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
}
@@ -310,7 +315,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg,
triangle_intersect_precalc(dir, &isect_precalc);
object = OBJECT_NONE;
nodeAddr = traversalStack[stackPtr];
nodeAddr = traversalStack[stackPtr].addr;
--stackPtr;
}
#endif /* FEATURE(BVH_INSTANCING) */