diff --git a/intern/cycles/kernel/geom/geom_object.h b/intern/cycles/kernel/geom/geom_object.h index 91edd5863ac..79a56683454 100644 --- a/intern/cycles/kernel/geom/geom_object.h +++ b/intern/cycles/kernel/geom/geom_object.h @@ -391,6 +391,38 @@ ccl_device_inline void bvh_instance_push(KernelGlobals *kg, int object, const Ra *t *= len; } +#ifdef __QBVH__ +/* Same as above, but optimized for QBVH scene intersection, + * which needs to modify two max distances. + * + * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized + * so we can avoid having this duplication. + */ +ccl_device_inline void qbvh_instance_push(KernelGlobals *kg, + int object, + const Ray *ray, + float3 *P, + float3 *dir, + float3 *idir, + float *t, + float *t1) +{ + Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM); + + *P = transform_point(&tfm, ray->P); + + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); + + if(*t != FLT_MAX) + *t *= len; + + if(*t1 != -FLT_MAX) + *t1 *= len; +} +#endif + /* Transorm ray to exit static object in BVH */ ccl_device_inline void bvh_instance_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t) @@ -436,6 +468,33 @@ ccl_device_inline void bvh_instance_motion_push(KernelGlobals *kg, int object, c *t *= len; } +#ifdef __QBVH__ +/* Same as above, but optimized for QBVH scene intersection, + * which needs to modify two max distances. + * + * TODO(sergey): Investigate if passing NULL instead of t1 gets optimized + * so we can avoid having this duplication. + */ +ccl_device_inline void qbvh_instance_motion_push(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, float *t1, Transform *tfm) +{ + Transform itfm; + *tfm = object_fetch_transform_motion_test(kg, object, ray->time, &itfm); + + *P = transform_point(&itfm, ray->P); + + float len; + *dir = bvh_clamp_direction(normalize_len(transform_direction(&itfm, ray->D), &len)); + *idir = bvh_inverse_direction(*dir); + + + if(*t != FLT_MAX) + *t *= len; + + if(*t1 != -FLT_MAX) + *t1 *= len; +} +#endif + /* Transorm ray to exit motion blurred object in BVH */ ccl_device_inline void bvh_instance_motion_pop(KernelGlobals *kg, int object, const Ray *ray, float3 *P, float3 *dir, float3 *idir, float *t, Transform *tfm) diff --git a/intern/cycles/kernel/geom/geom_qbvh.h b/intern/cycles/kernel/geom/geom_qbvh.h index a1dd89c41ca..7a354379bed 100644 --- a/intern/cycles/kernel/geom/geom_qbvh.h +++ b/intern/cycles/kernel/geom/geom_qbvh.h @@ -14,32 +14,31 @@ * limitations under the License. */ -ccl_device_inline void qbvh_stack_sort(int *__restrict s1, - int *__restrict s2, - int *__restrict s3, - float *__restrict d1, - float *__restrict d2, - float *__restrict d3) +struct QBVHStackItem { + int addr; + float dist; +}; + +/* TOOD(sergey): Investigate if using instrinsics helps here. */ +ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, + QBVHStackItem *__restrict s2, + QBVHStackItem *__restrict s3) { - if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); } - if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); } - if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); } + if(s2->dist < s1->dist) { util_swap(s2, s1); } + if(s3->dist < s2->dist) { util_swap(s3, s2); } + if(s2->dist < s1->dist) { util_swap(s2, s1); } } -ccl_device_inline void qbvh_stack_sort(int *__restrict s1, - int *__restrict s2, - int *__restrict s3, - int *__restrict s4, - float *__restrict d1, - float *__restrict d2, - float *__restrict d3, - float *__restrict d4) +ccl_device_inline void qbvh_stack_sort(QBVHStackItem *__restrict s1, + QBVHStackItem *__restrict s2, + QBVHStackItem *__restrict s3, + QBVHStackItem *__restrict s4) { - if(*d2 < *d1) { util_swap(s2, s1); util_swap(d2, d1); } - if(*d4 < *d3) { util_swap(s4, s3); util_swap(d4, d3); } - if(*d3 < *d1) { util_swap(s3, s1); util_swap(d3, d1); } - if(*d4 < *d2) { util_swap(s4, s2); util_swap(d4, d2); } - if(*d3 < *d2) { util_swap(s3, s2); util_swap(d3, d2); } + if(s2->dist < s1->dist) { util_swap(s2, s1); } + if(s4->dist < s3->dist) { util_swap(s4, s3); } + if(s3->dist < s1->dist) { util_swap(s3, s1); } + if(s4->dist < s2->dist) { util_swap(s4, s2); } + if(s3->dist < s2->dist) { util_swap(s3, s2); } } ccl_device_inline int qbvh_node_intersect(KernelGlobals *__restrict kg, diff --git a/intern/cycles/kernel/geom/geom_qbvh_shadow.h b/intern/cycles/kernel/geom/geom_qbvh_shadow.h index f8279996450..2d1ad498972 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_shadow.h +++ b/intern/cycles/kernel/geom/geom_qbvh_shadow.h @@ -39,8 +39,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, */ /* Traversal stack in CUDA thread-local memory. */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; + QBVHStackItem traversalStack[BVH_STACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; /* Traversal variables in registers. */ int stackPtr = 0; @@ -128,13 +128,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(d1 < d0) { nodeAddr = c1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; continue; } else { nodeAddr = c0; ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; continue; } } @@ -143,9 +145,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, * all nodes onto the stack to sort them there. */ ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = c1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = c0; /* Three children are hit, push all onto stack and sort 3 * stack items, continue with closest child. @@ -155,12 +159,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, float d2 = ((float*)&dist)[r]; if(traverseChild == 0) { ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &d2, &d1, &d0); - nodeAddr = traversalStack[stackPtr]; + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; continue; } @@ -172,17 +176,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int c3 = __float_as_int(cnodes[r]); float d3 = ((float*)&dist)[r]; ++stackPtr; - traversalStack[stackPtr] = c3; + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3], - &d3, &d2, &d1, &d0); + &traversalStack[stackPtr - 3]); } - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } @@ -197,7 +202,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int primAddr2 = __float_as_int(leaf.y); /* Pop. */ - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; #ifdef __VISIBILITY_FLAG__ @@ -315,7 +320,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); ++stackPtr; - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); @@ -368,7 +373,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } #endif /* FEATURE(BVH_INSTANCING) */ diff --git a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h index bc43d81f9d3..acb1bbd01c0 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_subsurface.h +++ b/intern/cycles/kernel/geom/geom_qbvh_subsurface.h @@ -42,8 +42,8 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, */ /* Traversal stack in CUDA thread-local memory. */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; + QBVHStackItem traversalStack[BVH_STACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; /* Traversal variables in registers. */ int stackPtr = 0; @@ -124,13 +124,15 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(d1 < d0) { nodeAddr = c1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; continue; } else { nodeAddr = c0; ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; continue; } } @@ -139,9 +141,11 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, * all nodes onto the stack to sort them there. */ ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; /* Three children are hit, push all onto stack and sort 3 * stack items, continue with closest child. @@ -151,12 +155,12 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, float d2 = ((float*)&dist)[r]; if(traverseChild == 0) { ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &d2, &d1, &d0); - nodeAddr = traversalStack[stackPtr]; + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; continue; } @@ -168,17 +172,18 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int c3 = __float_as_int(cnodes[r]); float d3 = ((float*)&dist)[r]; ++stackPtr; - traversalStack[stackPtr] = c3; + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3], - &d3, &d2, &d1, &d0); + &traversalStack[stackPtr - 3]); } - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } @@ -193,7 +198,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int primAddr2 = __float_as_int(leaf.y); /* Pop. */ - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; /* Primitive intersection. */ @@ -250,13 +255,13 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); ++stackPtr; - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); } else { /* Pop. */ - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } @@ -290,7 +295,7 @@ ccl_device uint BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } #endif /* FEATURE(BVH_INSTANCING) */ diff --git a/intern/cycles/kernel/geom/geom_qbvh_traversal.h b/intern/cycles/kernel/geom/geom_qbvh_traversal.h index 56289900e80..fdb22725ceb 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_traversal.h +++ b/intern/cycles/kernel/geom/geom_qbvh_traversal.h @@ -47,12 +47,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, */ /* Traversal stack in CUDA thread-local memory. */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; + QBVHStackItem traversalStack[BVH_STACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; + traversalStack[0].dist = -FLT_MAX; /* Traversal variables in registers. */ int stackPtr = 0; int nodeAddr = kernel_data.bvh.root; + float nodeDist = -FLT_MAX; /* Ray parameters in registers. */ float3 P = ray->P; @@ -100,6 +102,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, do { /* Traverse internal nodes. */ while(nodeAddr >= 0 && nodeAddr != ENTRYPOINT_SENTINEL) { + if(UNLIKELY(nodeDist > isect->t)) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + int traverseChild; ssef dist; @@ -152,8 +162,10 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, /* One child is hit, continue with that child. */ int r = __bscf(traverseChild); + float d0 = ((float*)&dist)[r]; if(traverseChild == 0) { nodeAddr = __float_as_int(cnodes[r]); + nodeDist = d0; continue; } @@ -161,7 +173,6 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, * closer child. */ int c0 = __float_as_int(cnodes[r]); - float d0 = ((float*)&dist)[r]; r = __bscf(traverseChild); int c1 = __float_as_int(cnodes[r]); float d1 = ((float*)&dist)[r]; @@ -169,13 +180,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(d1 < d0) { nodeAddr = c1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; continue; } else { nodeAddr = c0; ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; continue; } } @@ -184,9 +197,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, * all nodes onto the stack to sort them there. */ ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; /* Three children are hit, push all onto stack and sort 3 * stack items, continue with closest child. @@ -196,12 +211,13 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, float d2 = ((float*)&dist)[r]; if(traverseChild == 0) { ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &d2, &d1, &d0); - nodeAddr = traversalStack[stackPtr]; + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; --stackPtr; continue; } @@ -213,22 +229,32 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int c3 = __float_as_int(cnodes[r]); float d3 = ((float*)&dist)[r]; ++stackPtr; - traversalStack[stackPtr] = c3; + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3], - &d3, &d2, &d1, &d0); + &traversalStack[stackPtr - 3]); } - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; --stackPtr; } /* If node is leaf, fetch triangle list. */ if(nodeAddr < 0) { + if(UNLIKELY(nodeDist > isect->t)) { + /* Pop. */ + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; + --stackPtr; + continue; + } + float4 leaf = kernel_tex_fetch(__bvh_nodes, (-nodeAddr-1)*BVH_QNODE_SIZE+6); int primAddr = __float_as_int(leaf.x); @@ -238,7 +264,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int primAddr2 = __float_as_int(leaf.y); /* Pop. */ - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; --stackPtr; #ifdef __VISIBILITY_FLAG__ @@ -299,9 +326,9 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, object = kernel_tex_fetch(__prim_object, -primAddr-1); #if BVH_FEATURE(BVH_MOTION) - bvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &ob_tfm); + qbvh_instance_motion_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist, &ob_tfm); #else - bvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t); + qbvh_instance_push(kg, object, ray, &P, &dir, &idir, &isect->t, &nodeDist); #endif if(idir.x >= 0.0f) { near_x = 0; far_x = 1; } else { near_x = 1; far_x = 0; } @@ -318,7 +345,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); ++stackPtr; - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].dist = -FLT_MAX; nodeAddr = kernel_tex_fetch(__object_node, object); } @@ -351,7 +379,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; + nodeDist = traversalStack[stackPtr].dist; --stackPtr; } #endif /* FEATURE(BVH_INSTANCING) */ diff --git a/intern/cycles/kernel/geom/geom_qbvh_volume.h b/intern/cycles/kernel/geom/geom_qbvh_volume.h index 3630436bddc..21466cca92a 100644 --- a/intern/cycles/kernel/geom/geom_qbvh_volume.h +++ b/intern/cycles/kernel/geom/geom_qbvh_volume.h @@ -38,8 +38,8 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, */ /* Traversal stack in CUDA thread-local memory. */ - int traversalStack[BVH_STACK_SIZE]; - traversalStack[0] = ENTRYPOINT_SENTINEL; + QBVHStackItem traversalStack[BVH_STACK_SIZE]; + traversalStack[0].addr = ENTRYPOINT_SENTINEL; /* Traversal variables in registers. */ int stackPtr = 0; @@ -130,13 +130,15 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, if(d1 < d0) { nodeAddr = c1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; continue; } else { nodeAddr = c0; ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; continue; } } @@ -145,9 +147,11 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, * all nodes onto the stack to sort them there. */ ++stackPtr; - traversalStack[stackPtr] = c1; + traversalStack[stackPtr].addr = c1; + traversalStack[stackPtr].dist = d1; ++stackPtr; - traversalStack[stackPtr] = c0; + traversalStack[stackPtr].addr = c0; + traversalStack[stackPtr].dist = d0; /* Three children are hit, push all onto stack and sort 3 * stack items, continue with closest child. @@ -157,12 +161,12 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, float d2 = ((float*)&dist)[r]; if(traverseChild == 0) { ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], - &traversalStack[stackPtr - 2], - &d2, &d1, &d0); - nodeAddr = traversalStack[stackPtr]; + &traversalStack[stackPtr - 2]); + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; continue; } @@ -174,17 +178,18 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int c3 = __float_as_int(cnodes[r]); float d3 = ((float*)&dist)[r]; ++stackPtr; - traversalStack[stackPtr] = c3; + traversalStack[stackPtr].addr = c3; + traversalStack[stackPtr].dist = d3; ++stackPtr; - traversalStack[stackPtr] = c2; + traversalStack[stackPtr].addr = c2; + traversalStack[stackPtr].dist = d2; qbvh_stack_sort(&traversalStack[stackPtr], &traversalStack[stackPtr - 1], &traversalStack[stackPtr - 2], - &traversalStack[stackPtr - 3], - &d3, &d2, &d1, &d0); + &traversalStack[stackPtr - 3]); } - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } @@ -199,7 +204,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, int primAddr2 = __float_as_int(leaf.y); /* Pop. */ - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; /* Primitive intersection. */ @@ -270,14 +275,14 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); ++stackPtr; - traversalStack[stackPtr] = ENTRYPOINT_SENTINEL; + traversalStack[stackPtr].addr = ENTRYPOINT_SENTINEL; nodeAddr = kernel_tex_fetch(__object_node, object); } else { /* Pop. */ object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } } @@ -310,7 +315,7 @@ ccl_device bool BVH_FUNCTION_FULL_NAME(QBVH)(KernelGlobals *kg, triangle_intersect_precalc(dir, &isect_precalc); object = OBJECT_NONE; - nodeAddr = traversalStack[stackPtr]; + nodeAddr = traversalStack[stackPtr].addr; --stackPtr; } #endif /* FEATURE(BVH_INSTANCING) */