Cycles: add Optix support in the kernel
This adds all the kernel side changes for the Optix backend. Ref D5363
This commit is contained in:
		| @@ -33,49 +33,51 @@ CCL_NAMESPACE_BEGIN | ||||
|  | ||||
| #include "kernel/bvh/bvh_types.h" | ||||
|  | ||||
| #ifndef __KERNEL_OPTIX__ | ||||
|  | ||||
| /* Common QBVH functions. */ | ||||
| #ifdef __QBVH__ | ||||
| #  ifdef __QBVH__ | ||||
| #    include "kernel/bvh/qbvh_nodes.h" | ||||
| #    ifdef __KERNEL_AVX2__ | ||||
| #      include "kernel/bvh/obvh_nodes.h" | ||||
| #    endif | ||||
| #endif | ||||
| #  endif | ||||
|  | ||||
| /* Regular BVH traversal */ | ||||
|  | ||||
| #include "kernel/bvh/bvh_nodes.h" | ||||
| #  include "kernel/bvh/bvh_nodes.h" | ||||
|  | ||||
| #define BVH_FUNCTION_NAME bvh_intersect | ||||
| #define BVH_FUNCTION_FEATURES 0 | ||||
| #include "kernel/bvh/bvh_traversal.h" | ||||
| #  define BVH_FUNCTION_NAME bvh_intersect | ||||
| #  define BVH_FUNCTION_FEATURES 0 | ||||
| #  include "kernel/bvh/bvh_traversal.h" | ||||
|  | ||||
| #if defined(__INSTANCING__) | ||||
| #  if defined(__INSTANCING__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_instancing | ||||
| #    define BVH_FUNCTION_FEATURES BVH_INSTANCING | ||||
| #    include "kernel/bvh/bvh_traversal.h" | ||||
| #endif | ||||
| #  endif | ||||
|  | ||||
| #if defined(__HAIR__) | ||||
| #  if defined(__HAIR__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_hair | ||||
| #    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | ||||
| #    include "kernel/bvh/bvh_traversal.h" | ||||
| #endif | ||||
| #  endif | ||||
|  | ||||
| #if defined(__OBJECT_MOTION__) | ||||
| #  if defined(__OBJECT_MOTION__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_motion | ||||
| #    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | ||||
| #    include "kernel/bvh/bvh_traversal.h" | ||||
| #endif | ||||
| #  endif | ||||
|  | ||||
| #if defined(__HAIR__) && defined(__OBJECT_MOTION__) | ||||
| #  if defined(__HAIR__) && defined(__OBJECT_MOTION__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_hair_motion | ||||
| #    define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION | ||||
| #    include "kernel/bvh/bvh_traversal.h" | ||||
| #endif | ||||
| #  endif | ||||
|  | ||||
| /* Subsurface scattering BVH traversal */ | ||||
|  | ||||
| #if defined(__BVH_LOCAL__) | ||||
| #  if defined(__BVH_LOCAL__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_local | ||||
| #    define BVH_FUNCTION_FEATURES BVH_HAIR | ||||
| #    include "kernel/bvh/bvh_local.h" | ||||
| @@ -85,11 +87,11 @@ CCL_NAMESPACE_BEGIN | ||||
| #      define BVH_FUNCTION_FEATURES BVH_MOTION | BVH_HAIR | ||||
| #      include "kernel/bvh/bvh_local.h" | ||||
| #    endif | ||||
| #endif /* __BVH_LOCAL__ */ | ||||
| #  endif /* __BVH_LOCAL__ */ | ||||
|  | ||||
| /* Volume BVH traversal */ | ||||
|  | ||||
| #if defined(__VOLUME__) | ||||
| #  if defined(__VOLUME__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_volume | ||||
| #    define BVH_FUNCTION_FEATURES BVH_HAIR | ||||
| #    include "kernel/bvh/bvh_volume.h" | ||||
| @@ -105,11 +107,11 @@ CCL_NAMESPACE_BEGIN | ||||
| #      define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR | ||||
| #      include "kernel/bvh/bvh_volume.h" | ||||
| #    endif | ||||
| #endif /* __VOLUME__ */ | ||||
| #  endif /* __VOLUME__ */ | ||||
|  | ||||
| /* Record all intersections - Shadow BVH traversal */ | ||||
|  | ||||
| #if defined(__SHADOW_RECORD_ALL__) | ||||
| #  if defined(__SHADOW_RECORD_ALL__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_shadow_all | ||||
| #    define BVH_FUNCTION_FEATURES 0 | ||||
| #    include "kernel/bvh/bvh_shadow_all.h" | ||||
| @@ -137,11 +139,11 @@ CCL_NAMESPACE_BEGIN | ||||
| #      define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_HAIR | BVH_MOTION | ||||
| #      include "kernel/bvh/bvh_shadow_all.h" | ||||
| #    endif | ||||
| #endif /* __SHADOW_RECORD_ALL__ */ | ||||
| #  endif /* __SHADOW_RECORD_ALL__ */ | ||||
|  | ||||
| /* Record all intersections - Volume BVH traversal  */ | ||||
|  | ||||
| #if defined(__VOLUME_RECORD_ALL__) | ||||
| #  if defined(__VOLUME_RECORD_ALL__) | ||||
| #    define BVH_FUNCTION_NAME bvh_intersect_volume_all | ||||
| #    define BVH_FUNCTION_FEATURES BVH_HAIR | ||||
| #    include "kernel/bvh/bvh_volume_all.h" | ||||
| @@ -157,12 +159,14 @@ CCL_NAMESPACE_BEGIN | ||||
| #      define BVH_FUNCTION_FEATURES BVH_INSTANCING | BVH_MOTION | BVH_HAIR | ||||
| #      include "kernel/bvh/bvh_volume_all.h" | ||||
| #    endif | ||||
| #endif /* __VOLUME_RECORD_ALL__ */ | ||||
| #  endif /* __VOLUME_RECORD_ALL__ */ | ||||
|  | ||||
| #undef BVH_FEATURE | ||||
| #undef BVH_NAME_JOIN | ||||
| #undef BVH_NAME_EVAL | ||||
| #undef BVH_FUNCTION_FULL_NAME | ||||
| #  undef BVH_FEATURE | ||||
| #  undef BVH_NAME_JOIN | ||||
| #  undef BVH_NAME_EVAL | ||||
| #  undef BVH_FUNCTION_FULL_NAME | ||||
|  | ||||
| #endif /* __KERNEL_OPTIX__ */ | ||||
|  | ||||
| ccl_device_inline bool scene_intersect_valid(const Ray *ray) | ||||
| { | ||||
| @@ -173,8 +177,10 @@ ccl_device_inline bool scene_intersect_valid(const Ray *ray) | ||||
|    * such cases. | ||||
|    * From production scenes so far it seems it's enough to test first element | ||||
|    * only. | ||||
|    * Scene intersection may also called with empty rays for conditional trace | ||||
|    * calls that evaluate to false, so filter those out. | ||||
|    */ | ||||
|   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x); | ||||
|   return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f; | ||||
| } | ||||
|  | ||||
| ccl_device_intersect bool scene_intersect(KernelGlobals *kg, | ||||
| @@ -184,10 +190,46 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, | ||||
| { | ||||
|   PROFILING_INIT(kg, PROFILING_INTERSECT); | ||||
|  | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
|   uint p0 = 0; | ||||
|   uint p1 = 0; | ||||
|   uint p2 = 0; | ||||
|   uint p3 = 0; | ||||
|   uint p4 = visibility; | ||||
|   uint p5 = PRIMITIVE_NONE; | ||||
|  | ||||
|   optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, | ||||
|              ray->P, | ||||
|              ray->D, | ||||
|              0.0f, | ||||
|              ray->t, | ||||
|              ray->time, | ||||
|              0xFF, | ||||
|              OPTIX_RAY_FLAG_NONE, | ||||
|              0, | ||||
|              0, | ||||
|              0,  // SBT offset for PG_HITD | ||||
|              p0, | ||||
|              p1, | ||||
|              p2, | ||||
|              p3, | ||||
|              p4, | ||||
|              p5); | ||||
|  | ||||
|   isect->t = __uint_as_float(p0); | ||||
|   isect->u = __uint_as_float(p1); | ||||
|   isect->v = __uint_as_float(p2); | ||||
|   isect->prim = p3; | ||||
|   isect->object = p4; | ||||
|   isect->type = p5; | ||||
|  | ||||
|   return p5 != PRIMITIVE_NONE; | ||||
| #else /* __KERNEL_OPTIX__ */ | ||||
|   if (!scene_intersect_valid(ray)) { | ||||
|     return false; | ||||
|   } | ||||
| #ifdef __EMBREE__ | ||||
|  | ||||
| #  ifdef __EMBREE__ | ||||
|   if (kernel_data.bvh.scene) { | ||||
|     isect->t = ray->t; | ||||
|     CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_REGULAR); | ||||
| @@ -202,8 +244,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, | ||||
|     } | ||||
|     return false; | ||||
|   } | ||||
| #endif /* __EMBREE__ */ | ||||
| #ifdef __OBJECT_MOTION__ | ||||
| #  endif /* __EMBREE__ */ | ||||
|  | ||||
| #  ifdef __OBJECT_MOTION__ | ||||
|   if (kernel_data.bvh.have_motion) { | ||||
| #    ifdef __HAIR__ | ||||
|     if (kernel_data.bvh.have_curves) { | ||||
| @@ -213,31 +256,29 @@ ccl_device_intersect bool scene_intersect(KernelGlobals *kg, | ||||
|  | ||||
|     return bvh_intersect_motion(kg, ray, isect, visibility); | ||||
|   } | ||||
| #endif /* __OBJECT_MOTION__ */ | ||||
| #  endif   /* __OBJECT_MOTION__ */ | ||||
|  | ||||
| #ifdef __HAIR__ | ||||
| #  ifdef __HAIR__ | ||||
|   if (kernel_data.bvh.have_curves) { | ||||
|     return bvh_intersect_hair(kg, ray, isect, visibility); | ||||
|   } | ||||
| #endif /* __HAIR__ */ | ||||
|  | ||||
| #ifdef __KERNEL_CPU__ | ||||
| #  endif /* __HAIR__ */ | ||||
|  | ||||
| #  ifdef __KERNEL_CPU__ | ||||
| #    ifdef __INSTANCING__ | ||||
|   if (kernel_data.bvh.have_instancing) { | ||||
|     return bvh_intersect_instancing(kg, ray, isect, visibility); | ||||
|   } | ||||
| #    endif /* __INSTANCING__ */ | ||||
|   return bvh_intersect(kg, ray, isect, visibility); | ||||
| #else /* __KERNEL_CPU__ */ | ||||
|  | ||||
| #  else    /* __KERNEL_CPU__ */ | ||||
| #    ifdef __INSTANCING__ | ||||
|   return bvh_intersect_instancing(kg, ray, isect, visibility); | ||||
| #    else | ||||
|   return bvh_intersect(kg, ray, isect, visibility); | ||||
| #    endif /* __INSTANCING__ */ | ||||
|  | ||||
| #endif /* __KERNEL_CPU__ */ | ||||
| #  endif   /* __KERNEL_CPU__ */ | ||||
| #endif     /* __KERNEL_OPTIX__ */ | ||||
| } | ||||
|  | ||||
| #ifdef __BVH_LOCAL__ | ||||
| @@ -250,10 +291,42 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, | ||||
| { | ||||
|   PROFILING_INIT(kg, PROFILING_INTERSECT_LOCAL); | ||||
|  | ||||
| #  ifdef __KERNEL_OPTIX__ | ||||
|   uint p0 = ((uint64_t)lcg_state) & 0xFFFFFFFF; | ||||
|   uint p1 = (((uint64_t)lcg_state) >> 32) & 0xFFFFFFFF; | ||||
|   uint p2 = ((uint64_t)local_isect) & 0xFFFFFFFF; | ||||
|   uint p3 = (((uint64_t)local_isect) >> 32) & 0xFFFFFFFF; | ||||
|   uint p4 = local_object; | ||||
|   // Is set to zero on miss or if ray is aborted, so can be used as return value | ||||
|   uint p5 = max_hits; | ||||
|  | ||||
|   local_isect->num_hits = 0;  // Initialize hit count to zero | ||||
|   optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, | ||||
|              ray->P, | ||||
|              ray->D, | ||||
|              0.0f, | ||||
|              ray->t, | ||||
|              ray->time, | ||||
|              // Need to always call into __anyhit__kernel_optix_local_hit | ||||
|              0xFF, | ||||
|              OPTIX_RAY_FLAG_ENFORCE_ANYHIT, | ||||
|              1, | ||||
|              0, | ||||
|              0,  // SBT offset for PG_HITL | ||||
|              p0, | ||||
|              p1, | ||||
|              p2, | ||||
|              p3, | ||||
|              p4, | ||||
|              p5); | ||||
|  | ||||
|   return p5; | ||||
| #  else /* __KERNEL_OPTIX__ */ | ||||
|   if (!scene_intersect_valid(ray)) { | ||||
|     local_isect->num_hits = 0; | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
| #    ifdef __EMBREE__ | ||||
|   if (kernel_data.bvh.scene) { | ||||
|     CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SSS); | ||||
| @@ -297,12 +370,14 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals *kg, | ||||
|     return local_isect->num_hits > 0; | ||||
|   } | ||||
| #    endif /* __EMBREE__ */ | ||||
|  | ||||
| #    ifdef __OBJECT_MOTION__ | ||||
|   if (kernel_data.bvh.have_motion) { | ||||
|     return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits); | ||||
|   } | ||||
| #    endif /* __OBJECT_MOTION__ */ | ||||
|   return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits); | ||||
| #  endif   /* __KERNEL_OPTIX__ */ | ||||
| } | ||||
| #endif | ||||
|  | ||||
| @@ -316,10 +391,40 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, | ||||
| { | ||||
|   PROFILING_INIT(kg, PROFILING_INTERSECT_SHADOW_ALL); | ||||
|  | ||||
| #  ifdef __KERNEL_OPTIX__ | ||||
|   uint p0 = ((uint64_t)isect) & 0xFFFFFFFF; | ||||
|   uint p1 = (((uint64_t)isect) >> 32) & 0xFFFFFFFF; | ||||
|   uint p3 = max_hits; | ||||
|   uint p4 = visibility; | ||||
|   uint p5 = false; | ||||
|  | ||||
|   *num_hits = 0;  // Initialize hit count to zero | ||||
|   optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, | ||||
|              ray->P, | ||||
|              ray->D, | ||||
|              0.0f, | ||||
|              ray->t, | ||||
|              ray->time, | ||||
|              // Need to always call into __anyhit__kernel_optix_shadow_all_hit | ||||
|              0xFF, | ||||
|              OPTIX_RAY_FLAG_ENFORCE_ANYHIT, | ||||
|              2, | ||||
|              0, | ||||
|              0,  // SBT offset for PG_HITS | ||||
|              p0, | ||||
|              p1, | ||||
|              *num_hits, | ||||
|              p3, | ||||
|              p4, | ||||
|              p5); | ||||
|  | ||||
|   return p5; | ||||
| #  else /* __KERNEL_OPTIX__ */ | ||||
|   if (!scene_intersect_valid(ray)) { | ||||
|     *num_hits = 0; | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
| #    ifdef __EMBREE__ | ||||
|   if (kernel_data.bvh.scene) { | ||||
|     CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_SHADOW_ALL); | ||||
| @@ -337,7 +442,8 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, | ||||
|     *num_hits = ctx.num_hits; | ||||
|     return rtc_ray.tfar == -INFINITY; | ||||
|   } | ||||
| #  endif | ||||
| #    endif /* __EMBREE__ */ | ||||
|  | ||||
| #    ifdef __OBJECT_MOTION__ | ||||
|   if (kernel_data.bvh.have_motion) { | ||||
| #      ifdef __HAIR__ | ||||
| @@ -356,13 +462,21 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals *kg, | ||||
|   } | ||||
| #    endif /* __HAIR__ */ | ||||
|  | ||||
| #    ifdef __KERNEL_CPU__ | ||||
| #      ifdef __INSTANCING__ | ||||
|   if (kernel_data.bvh.have_instancing) { | ||||
|     return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits); | ||||
|   } | ||||
| #      endif /* __INSTANCING__ */ | ||||
|  | ||||
|   return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits); | ||||
| #    else | ||||
| #      ifdef __INSTANCING__ | ||||
|   return bvh_intersect_shadow_all_instancing(kg, ray, isect, visibility, max_hits, num_hits); | ||||
| #      else | ||||
|   return bvh_intersect_shadow_all(kg, ray, isect, visibility, max_hits, num_hits); | ||||
| #      endif /* __INSTANCING__ */ | ||||
| #    endif   /* __KERNEL_CPU__ */ | ||||
| #  endif     /* __KERNEL_OPTIX__ */ | ||||
| } | ||||
| #endif /* __SHADOW_RECORD_ALL__ */ | ||||
|  | ||||
| @@ -374,6 +488,42 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, | ||||
| { | ||||
|   PROFILING_INIT(kg, PROFILING_INTERSECT_VOLUME); | ||||
|  | ||||
| #  ifdef __KERNEL_OPTIX__ | ||||
|   uint p0 = 0; | ||||
|   uint p1 = 0; | ||||
|   uint p2 = 0; | ||||
|   uint p3 = 0; | ||||
|   uint p4 = visibility; | ||||
|   uint p5 = PRIMITIVE_NONE; | ||||
|  | ||||
|   optixTrace(scene_intersect_valid(ray) ? kernel_data.bvh.scene : 0, | ||||
|              ray->P, | ||||
|              ray->D, | ||||
|              0.0f, | ||||
|              ray->t, | ||||
|              ray->time, | ||||
|              // Visibility mask set to only intersect objects with volumes | ||||
|              0x02, | ||||
|              OPTIX_RAY_FLAG_NONE, | ||||
|              0, | ||||
|              0, | ||||
|              0,  // SBT offset for PG_HITD | ||||
|              p0, | ||||
|              p1, | ||||
|              p2, | ||||
|              p3, | ||||
|              p4, | ||||
|              p5); | ||||
|  | ||||
|   isect->t = __uint_as_float(p0); | ||||
|   isect->u = __uint_as_float(p1); | ||||
|   isect->v = __uint_as_float(p2); | ||||
|   isect->prim = p3; | ||||
|   isect->object = p4; | ||||
|   isect->type = p5; | ||||
|  | ||||
|   return p5 != PRIMITIVE_NONE; | ||||
| #  else /* __KERNEL_OPTIX__ */ | ||||
|   if (!scene_intersect_valid(ray)) { | ||||
|     return false; | ||||
|   } | ||||
| @@ -398,6 +548,7 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals *kg, | ||||
|   return bvh_intersect_volume(kg, ray, isect, visibility); | ||||
| #      endif /* __INSTANCING__ */ | ||||
| #    endif   /* __KERNEL_CPU__ */ | ||||
| #  endif     /* __KERNEL_OPTIX__ */ | ||||
| } | ||||
| #endif /* __VOLUME__ */ | ||||
|  | ||||
| @@ -413,6 +564,7 @@ ccl_device_intersect uint scene_intersect_volume_all(KernelGlobals *kg, | ||||
|   if (!scene_intersect_valid(ray)) { | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
| #  ifdef __EMBREE__ | ||||
|   if (kernel_data.bvh.scene) { | ||||
|     CCLIntersectContext ctx(kg, CCLIntersectContext::RAY_VOLUME_ALL); | ||||
|   | ||||
| @@ -38,12 +38,14 @@ ccl_device_forceinline bool cardinal_curve_intersect(KernelGlobals *kg, | ||||
| { | ||||
|   const bool is_curve_primitive = (type & PRIMITIVE_CURVE); | ||||
|  | ||||
| #  ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ | ||||
|   if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { | ||||
|     const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); | ||||
|     if (time < prim_time.x || time > prim_time.y) { | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
| #  endif | ||||
|  | ||||
|   int segment = PRIMITIVE_UNPACK_SEGMENT(type); | ||||
|   float epsilon = 0.0f; | ||||
| @@ -505,12 +507,14 @@ ccl_device_forceinline bool curve_intersect(KernelGlobals *kg, | ||||
|  | ||||
|   const bool is_curve_primitive = (type & PRIMITIVE_CURVE); | ||||
|  | ||||
| #  ifndef __KERNEL_OPTIX__ /* see OptiX motion flag OPTIX_MOTION_FLAG_[START|END]_VANISH */ | ||||
|   if (!is_curve_primitive && kernel_data.bvh.use_bvh_steps) { | ||||
|     const float2 prim_time = kernel_tex_fetch(__prim_time, curveAddr); | ||||
|     if (time < prim_time.x || time > prim_time.y) { | ||||
|       return false; | ||||
|     } | ||||
|   } | ||||
| #  endif | ||||
|  | ||||
|   int segment = PRIMITIVE_UNPACK_SEGMENT(type); | ||||
|   /* curve Intersection check */ | ||||
|   | ||||
							
								
								
									
										89
									
								
								intern/cycles/kernel/kernel_compat_optix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								intern/cycles/kernel/kernel_compat_optix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| /* | ||||
|  * Copyright 2019, NVIDIA Corporation. | ||||
|  * Copyright 2019, Blender Foundation. | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #ifndef __KERNEL_COMPAT_OPTIX_H__ | ||||
| #define __KERNEL_COMPAT_OPTIX_H__ | ||||
|  | ||||
| #define OPTIX_DONT_INCLUDE_CUDA | ||||
| #include <optix.h> | ||||
|  | ||||
| #define __KERNEL_GPU__ | ||||
| #define __KERNEL_CUDA__  // OptiX kernels are implicitly CUDA kernels too | ||||
| #define __KERNEL_OPTIX__ | ||||
| #define CCL_NAMESPACE_BEGIN | ||||
| #define CCL_NAMESPACE_END | ||||
|  | ||||
| #ifndef ATTR_FALLTHROUGH | ||||
| #  define ATTR_FALLTHROUGH | ||||
| #endif | ||||
|  | ||||
| typedef unsigned int uint32_t; | ||||
| typedef unsigned long long uint64_t; | ||||
| typedef unsigned short half; | ||||
| typedef unsigned long long CUtexObject; | ||||
|  | ||||
| #define FLT_MIN 1.175494350822287507969e-38f | ||||
| #define FLT_MAX 340282346638528859811704183484516925440.0f | ||||
|  | ||||
| __device__ half __float2half(const float f) | ||||
| { | ||||
|   half val; | ||||
|   asm("{  cvt.rn.f16.f32 %0, %1;}\n" : "=h"(val) : "f"(f)); | ||||
|   return val; | ||||
| } | ||||
|  | ||||
| /* Selective nodes compilation. */ | ||||
| #ifndef __NODES_MAX_GROUP__ | ||||
| #  define __NODES_MAX_GROUP__ NODE_GROUP_LEVEL_MAX | ||||
| #endif | ||||
| #ifndef __NODES_FEATURES__ | ||||
| #  define __NODES_FEATURES__ NODE_FEATURE_ALL | ||||
| #endif | ||||
|  | ||||
| #define ccl_device \ | ||||
|   __device__ __forceinline__  // Function calls are bad for OptiX performance, so inline everything | ||||
| #define ccl_device_inline ccl_device | ||||
| #define ccl_device_forceinline ccl_device | ||||
| #define ccl_device_noinline __device__ __noinline__ | ||||
| #define ccl_device_noinline_cpu ccl_device | ||||
| #define ccl_global | ||||
| #define ccl_static_constant __constant__ | ||||
| #define ccl_constant const | ||||
| #define ccl_local | ||||
| #define ccl_local_param | ||||
| #define ccl_private | ||||
| #define ccl_may_alias | ||||
| #define ccl_addr_space | ||||
| #define ccl_restrict __restrict__ | ||||
| #define ccl_ref | ||||
| #define ccl_align(n) __align__(n) | ||||
|  | ||||
| // Zero initialize structs to help the compiler figure out scoping | ||||
| #define ccl_optional_struct_init = {} | ||||
|  | ||||
| #define kernel_data __params.data  // See kernel_globals.h | ||||
| #define kernel_tex_array(t) __params.t | ||||
| #define kernel_tex_fetch(t, index) __params.t[(index)] | ||||
|  | ||||
| #define kernel_assert(cond) | ||||
|  | ||||
| /* Types */ | ||||
|  | ||||
| #include "util/util_half.h" | ||||
| #include "util/util_types.h" | ||||
|  | ||||
| #endif /* __KERNEL_COMPAT_OPTIX_H__ */ | ||||
| @@ -90,12 +90,43 @@ typedef struct KernelGlobals { | ||||
|  | ||||
| #endif /* __KERNEL_CPU__ */ | ||||
|  | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
|  | ||||
| typedef struct ShaderParams { | ||||
|   uint4 *input; | ||||
|   float4 *output; | ||||
|   int type; | ||||
|   int filter; | ||||
|   int sx; | ||||
|   int offset; | ||||
|   int sample; | ||||
| } ShaderParams; | ||||
|  | ||||
| typedef struct KernelParams { | ||||
|   WorkTile tile; | ||||
|   KernelData data; | ||||
|   ShaderParams shader; | ||||
| #  define KERNEL_TEX(type, name) const type *name; | ||||
| #  include "kernel/kernel_textures.h" | ||||
| } KernelParams; | ||||
|  | ||||
| typedef struct KernelGlobals { | ||||
| #  ifdef __VOLUME__ | ||||
|   VolumeState volume_state; | ||||
| #  endif | ||||
|   Intersection hits_stack[64]; | ||||
| } KernelGlobals; | ||||
|  | ||||
| extern "C" __constant__ KernelParams __params; | ||||
|  | ||||
| #else /* __KERNEL_OPTIX__ */ | ||||
|  | ||||
| /* For CUDA, constant memory textures must be globals, so we can't put them | ||||
|  * into a struct. As a result we don't actually use this struct and use actual | ||||
|  * globals and simply pass along a NULL pointer everywhere, which we hope gets | ||||
|  * optimized out. */ | ||||
|  | ||||
| #ifdef __KERNEL_CUDA__ | ||||
| #  ifdef __KERNEL_CUDA__ | ||||
|  | ||||
| __constant__ KernelData __data; | ||||
| typedef struct KernelGlobals { | ||||
| @@ -106,7 +137,9 @@ typedef struct KernelGlobals { | ||||
| #    define KERNEL_TEX(type, name) const __constant__ __device__ type *name; | ||||
| #    include "kernel/kernel_textures.h" | ||||
|  | ||||
| #endif /* __KERNEL_CUDA__ */ | ||||
| #  endif /* __KERNEL_CUDA__ */ | ||||
|  | ||||
| #endif /* __KERNEL_OPTIX__ */ | ||||
|  | ||||
| /* OpenCL */ | ||||
|  | ||||
|   | ||||
| @@ -326,7 +326,13 @@ ccl_device_forceinline bool kernel_path_shader_apply(KernelGlobals *kg, | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| ccl_device_noinline void kernel_path_ao(KernelGlobals *kg, | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
| ccl_device_inline /* inline trace calls */ | ||||
| #else | ||||
| ccl_device_noinline | ||||
| #endif | ||||
|     void | ||||
|     kernel_path_ao(KernelGlobals *kg, | ||||
|                    ShaderData *sd, | ||||
|                    ShaderData *emission_sd, | ||||
|                    PathRadiance *L, | ||||
| @@ -655,9 +661,11 @@ ccl_device void kernel_path_trace( | ||||
|  | ||||
|   kernel_path_trace_setup(kg, sample, x, y, &rng_hash, &ray); | ||||
|  | ||||
| #  ifndef __KERNEL_OPTIX__ | ||||
|   if (ray.t == 0.0f) { | ||||
|     return; | ||||
|   } | ||||
| #  endif | ||||
|  | ||||
|   /* Initialize state. */ | ||||
|   float3 throughput = make_float3(1.0f, 1.0f, 1.0f); | ||||
| @@ -671,6 +679,13 @@ ccl_device void kernel_path_trace( | ||||
|   PathState state; | ||||
|   path_state_init(kg, emission_sd, &state, rng_hash, sample, &ray); | ||||
|  | ||||
| #  ifdef __KERNEL_OPTIX__ | ||||
|   /* Force struct into local memory to avoid costly spilling on trace calls. */ | ||||
|   if (pass_stride < 0) /* This is never executed and just prevents the compiler from doing SROA. */ | ||||
|     for (int i = 0; i < sizeof(L); ++i) | ||||
|       reinterpret_cast<unsigned char *>(&L)[-pass_stride + i] = 0; | ||||
| #  endif | ||||
|  | ||||
|   /* Integrate. */ | ||||
|   kernel_path_integrate(kg, &state, throughput, &ray, &L, buffer, emission_sd); | ||||
|  | ||||
|   | ||||
| @@ -48,7 +48,13 @@ ccl_device void shader_setup_object_transforms(KernelGlobals *kg, ShaderData *sd | ||||
| } | ||||
| #endif | ||||
|  | ||||
| ccl_device_noinline void shader_setup_from_ray(KernelGlobals *kg, | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
| ccl_device_inline | ||||
| #else | ||||
| ccl_device_noinline | ||||
| #endif | ||||
|     void | ||||
|     shader_setup_from_ray(KernelGlobals *kg, | ||||
|                           ShaderData *sd, | ||||
|                           const Intersection *isect, | ||||
|                           const Ray *ray) | ||||
|   | ||||
| @@ -17,13 +17,6 @@ | ||||
| CCL_NAMESPACE_BEGIN | ||||
|  | ||||
| #ifdef __VOLUME__ | ||||
| typedef struct VolumeState { | ||||
| #  ifdef __SPLIT_KERNEL__ | ||||
| #  else | ||||
|   PathState ps; | ||||
| #  endif | ||||
| } VolumeState; | ||||
|  | ||||
| /* Get PathState ready for use for volume stack evaluation. */ | ||||
| #  ifdef __SPLIT_KERNEL__ | ||||
| ccl_addr_space | ||||
| @@ -55,12 +48,11 @@ ccl_addr_space | ||||
| /* Attenuate throughput accordingly to the given intersection event. | ||||
|  * Returns true if the throughput is zero and traversal can be aborted. | ||||
|  */ | ||||
| ccl_device_forceinline bool shadow_handle_transparent_isect( | ||||
|     KernelGlobals *kg, | ||||
| ccl_device_forceinline bool shadow_handle_transparent_isect(KernelGlobals *kg, | ||||
|                                                             ShaderData *shadow_sd, | ||||
|                                                             ccl_addr_space PathState *state, | ||||
| #ifdef __VOLUME__ | ||||
|     ccl_addr_space struct PathState *volume_state, | ||||
|                                                             ccl_addr_space PathState *volume_state, | ||||
| #endif | ||||
|                                                             Intersection *isect, | ||||
|                                                             Ray *ray, | ||||
| @@ -163,7 +155,11 @@ ccl_device bool shadow_blocked_transparent_all_loop(KernelGlobals *kg, | ||||
|   uint num_hits; | ||||
|   const bool blocked = scene_intersect_shadow_all(kg, ray, hits, visibility, max_hits, &num_hits); | ||||
| #    ifdef __VOLUME__ | ||||
| #      ifdef __KERNEL_OPTIX__ | ||||
|   VolumeState &volume_state = kg->volume_state; | ||||
| #      else | ||||
|   VolumeState volume_state; | ||||
| #      endif | ||||
| #    endif | ||||
|   /* If no opaque surface found but we did find transparent hits, | ||||
|    * shade them. | ||||
| @@ -302,7 +298,11 @@ ccl_device bool shadow_blocked_transparent_stepped_loop(KernelGlobals *kg, | ||||
|                                                         float3 *shadow) | ||||
| { | ||||
| #    ifdef __VOLUME__ | ||||
| #      ifdef __KERNEL_OPTIX__ | ||||
|   VolumeState &volume_state = kg->volume_state; | ||||
| #      else | ||||
|   VolumeState volume_state; | ||||
| #      endif | ||||
| #    endif | ||||
|   if (blocked && is_transparent_isect) { | ||||
|     float3 throughput = make_float3(1.0f, 1.0f, 1.0f); | ||||
| @@ -387,32 +387,38 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, | ||||
|                                       ShaderData *sd, | ||||
|                                       ShaderData *shadow_sd, | ||||
|                                       ccl_addr_space PathState *state, | ||||
|                                       Ray *ray_input, | ||||
|                                       Ray *ray, | ||||
|                                       float3 *shadow) | ||||
| { | ||||
|   Ray *ray = ray_input; | ||||
|   Intersection isect; | ||||
|   /* Some common early checks. */ | ||||
|   *shadow = make_float3(1.0f, 1.0f, 1.0f); | ||||
| #if !defined(__KERNEL_OPTIX__) | ||||
|   /* Some common early checks. | ||||
|    * Avoid conditional trace call in OptiX though, since those hurt performance there. | ||||
|    */ | ||||
|   if (ray->t == 0.0f) { | ||||
|     return false; | ||||
|   } | ||||
| #endif | ||||
| #ifdef __SHADOW_TRICKS__ | ||||
|   const uint visibility = (state->flag & PATH_RAY_SHADOW_CATCHER) ? PATH_RAY_SHADOW_NON_CATCHER : | ||||
|                                                                     PATH_RAY_SHADOW; | ||||
| #else | ||||
|   const uint visibility = PATH_RAY_SHADOW; | ||||
| #endif | ||||
|   /* Do actual shadow shading. */ | ||||
|   /* First of all, we check if integrator requires transparent shadows. | ||||
|   /* Do actual shadow shading. | ||||
|    * First of all, we check if integrator requires transparent shadows. | ||||
|    * if not, we use simplest and fastest ever way to calculate occlusion. | ||||
|    * Do not do this in OptiX to avoid the additional trace call. | ||||
|    */ | ||||
| #ifdef __TRANSPARENT_SHADOWS__ | ||||
| #if !defined(__KERNEL_OPTIX__) || !defined(__TRANSPARENT_SHADOWS__) | ||||
|   Intersection isect; | ||||
| #  ifdef __TRANSPARENT_SHADOWS__ | ||||
|   if (!kernel_data.integrator.transparent_shadows) | ||||
| #endif | ||||
| #  endif | ||||
|   { | ||||
|     return shadow_blocked_opaque(kg, shadow_sd, state, visibility, ray, &isect, shadow); | ||||
|   } | ||||
| #endif | ||||
| #ifdef __TRANSPARENT_SHADOWS__ | ||||
| #  ifdef __SHADOW_RECORD_ALL__ | ||||
|   /* For the transparent shadows we try to use record-all logic on the | ||||
| @@ -426,7 +432,7 @@ ccl_device_inline bool shadow_blocked(KernelGlobals *kg, | ||||
|     return true; | ||||
|   } | ||||
|   const uint max_hits = transparent_max_bounce - state->transparent_bounce - 1; | ||||
| #    ifdef __KERNEL_GPU__ | ||||
| #    if defined(__KERNEL_GPU__) && !defined(__KERNEL_OPTIX__) | ||||
|   /* On GPU we do tricky with tracing opaque ray first, this avoids speed | ||||
|    * regressions in some files. | ||||
|    * | ||||
|   | ||||
| @@ -353,7 +353,13 @@ ccl_device void subsurface_random_walk_coefficients(const ShaderClosure *sc, | ||||
|   *weight = safe_divide_color(bssrdf->weight, A); | ||||
| } | ||||
|  | ||||
| ccl_device_noinline bool subsurface_random_walk(KernelGlobals *kg, | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
| ccl_device_inline /* inline trace calls */ | ||||
| #else | ||||
| ccl_device_noinline | ||||
| #endif | ||||
|     bool | ||||
|     subsurface_random_walk(KernelGlobals *kg, | ||||
|                            LocalIntersection *ss_isect, | ||||
|                            ShaderData *sd, | ||||
|                            ccl_addr_space PathState *state, | ||||
|   | ||||
| @@ -143,6 +143,13 @@ CCL_NAMESPACE_BEGIN | ||||
| #  endif | ||||
| #endif /* __KERNEL_CUDA__ */ | ||||
|  | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
| #  undef __BAKING__ | ||||
| #  undef __BRANCHED_PATH__ | ||||
| /* TODO(pmours): Cannot use optixTrace in non-inlined functions */ | ||||
| #  undef __SHADER_RAYTRACE__ | ||||
| #endif /* __KERNEL_OPTIX__ */ | ||||
|  | ||||
| #ifdef __KERNEL_OPENCL__ | ||||
| #endif /* __KERNEL_OPENCL__ */ | ||||
|  | ||||
| @@ -1056,6 +1063,15 @@ typedef struct PathState { | ||||
| #endif | ||||
| } PathState; | ||||
|  | ||||
| #ifdef __VOLUME__ | ||||
| typedef struct VolumeState { | ||||
| #  ifdef __SPLIT_KERNEL__ | ||||
| #  else | ||||
|   PathState ps; | ||||
| #  endif | ||||
| } VolumeState; | ||||
| #endif | ||||
|  | ||||
| /* Struct to gather multiple nearby intersections. */ | ||||
| typedef struct LocalIntersection { | ||||
|   Ray ray; | ||||
| @@ -1343,9 +1359,12 @@ typedef enum KernelBVHLayout { | ||||
|   BVH_LAYOUT_BVH2 = (1 << 0), | ||||
|   BVH_LAYOUT_BVH4 = (1 << 1), | ||||
|   BVH_LAYOUT_BVH8 = (1 << 2), | ||||
|  | ||||
|   BVH_LAYOUT_EMBREE = (1 << 3), | ||||
|   BVH_LAYOUT_OPTIX = (1 << 4), | ||||
|  | ||||
|   BVH_LAYOUT_DEFAULT = BVH_LAYOUT_BVH8, | ||||
|   BVH_LAYOUT_ALL = (unsigned int)(-1), | ||||
|   BVH_LAYOUT_ALL = (unsigned int)(~0u), | ||||
| } KernelBVHLayout; | ||||
|  | ||||
| typedef struct KernelBVH { | ||||
| @@ -1357,14 +1376,18 @@ typedef struct KernelBVH { | ||||
|   int bvh_layout; | ||||
|   int use_bvh_steps; | ||||
|  | ||||
|   /* Embree */ | ||||
| #ifdef __EMBREE__ | ||||
|   /* Custom BVH */ | ||||
| #ifdef __KERNEL_OPTIX__ | ||||
|   OptixTraversableHandle scene; | ||||
| #else | ||||
| #  ifdef __EMBREE__ | ||||
|   RTCScene scene; | ||||
| #    ifndef __KERNEL_64_BIT__ | ||||
|   int pad1; | ||||
|   int pad2; | ||||
| #    endif | ||||
| #  else | ||||
|   int scene, pad2; | ||||
| #  endif | ||||
| #else | ||||
|   int pad1, pad2; | ||||
| #endif | ||||
| } KernelBVH; | ||||
| static_assert_align(KernelBVH, 16); | ||||
|   | ||||
							
								
								
									
										294
									
								
								intern/cycles/kernel/kernels/optix/kernel_optix.cu
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										294
									
								
								intern/cycles/kernel/kernels/optix/kernel_optix.cu
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,294 @@ | ||||
| /* | ||||
|  * Copyright 2019, NVIDIA Corporation. | ||||
|  * Copyright 2019, Blender Foundation. | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  * http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| #include "kernel/kernel_compat_optix.h" | ||||
| #include "util/util_atomic.h" | ||||
| #include "kernel/kernel_types.h" | ||||
| #include "kernel/kernel_globals.h" | ||||
| #include "../cuda/kernel_cuda_image.h"  // Texture lookup uses normal CUDA intrinsics | ||||
|  | ||||
| #include "kernel/kernel_path.h" | ||||
| #include "kernel/kernel_bake.h" | ||||
|  | ||||
| template<typename T> ccl_device_forceinline T *get_payload_ptr_0() | ||||
| { | ||||
|   return (T *)(((uint64_t)optixGetPayload_1() << 32) | optixGetPayload_0()); | ||||
| } | ||||
| template<typename T> ccl_device_forceinline T *get_payload_ptr_2() | ||||
| { | ||||
|   return (T *)(((uint64_t)optixGetPayload_3() << 32) | optixGetPayload_2()); | ||||
| } | ||||
|  | ||||
| template<bool always = false> ccl_device_forceinline uint get_object_id() | ||||
| { | ||||
| #ifdef __OBJECT_MOTION__ | ||||
|   // Always get the the instance ID from the TLAS | ||||
|   // There might be a motion transform node between TLAS and BLAS which does not have one | ||||
|   uint object = optixGetInstanceIdFromHandle(optixGetTransformListHandle(0)); | ||||
| #else | ||||
|   uint object = optixGetInstanceId(); | ||||
| #endif | ||||
|   // Choose between always returning object ID or only for instances | ||||
|   if (always) | ||||
|     // Can just remove the high bit since instace always contains object ID | ||||
|     return object & 0x7FFFFF; | ||||
|   // Set to OBJECT_NONE if this is not an instanced object | ||||
|   else if (object & 0x800000) | ||||
|     object = OBJECT_NONE; | ||||
|   return object; | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __raygen__kernel_optix_path_trace() | ||||
| { | ||||
|   KernelGlobals kg;  // Allocate stack storage for common data | ||||
|  | ||||
|   const uint3 launch_index = optixGetLaunchIndex(); | ||||
|   // Keep threads for same pixel together to improve occupancy of warps | ||||
|   uint pixel_offset = launch_index.x / __params.tile.num_samples; | ||||
|   uint sample_offset = launch_index.x % __params.tile.num_samples; | ||||
|  | ||||
|   kernel_path_trace(&kg, | ||||
|                     __params.tile.buffer, | ||||
|                     __params.tile.start_sample + sample_offset, | ||||
|                     __params.tile.x + pixel_offset, | ||||
|                     __params.tile.y + launch_index.y, | ||||
|                     __params.tile.offset, | ||||
|                     __params.tile.stride); | ||||
| } | ||||
|  | ||||
| #ifdef __BAKING__ | ||||
| extern "C" __global__ void __raygen__kernel_optix_bake() | ||||
| { | ||||
|   KernelGlobals kg; | ||||
|   const ShaderParams &p = __params.shader; | ||||
|   kernel_bake_evaluate(&kg, | ||||
|                        p.input, | ||||
|                        p.output, | ||||
|                        (ShaderEvalType)p.type, | ||||
|                        p.filter, | ||||
|                        p.sx + optixGetLaunchIndex().x, | ||||
|                        p.offset, | ||||
|                        p.sample); | ||||
| } | ||||
| #endif | ||||
|  | ||||
| extern "C" __global__ void __raygen__kernel_optix_displace() | ||||
| { | ||||
|   KernelGlobals kg; | ||||
|   const ShaderParams &p = __params.shader; | ||||
|   kernel_displace_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __raygen__kernel_optix_background() | ||||
| { | ||||
|   KernelGlobals kg; | ||||
|   const ShaderParams &p = __params.shader; | ||||
|   kernel_background_evaluate(&kg, p.input, p.output, p.sx + optixGetLaunchIndex().x); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __miss__kernel_optix_miss() | ||||
| { | ||||
|   // 'kernel_path_lamp_emission' checks intersection distance, so need to set it even on a miss | ||||
|   optixSetPayload_0(__float_as_uint(optixGetRayTmax())); | ||||
|   optixSetPayload_5(PRIMITIVE_NONE); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __anyhit__kernel_optix_local_hit() | ||||
| { | ||||
| #ifdef __BVH_LOCAL__ | ||||
|   const uint object = get_object_id<true>(); | ||||
|   if (object != optixGetPayload_4() /* local_object */) { | ||||
|     // Only intersect with matching object | ||||
|     return optixIgnoreIntersection(); | ||||
|   } | ||||
|  | ||||
|   int hit = 0; | ||||
|   uint *const lcg_state = get_payload_ptr_0<uint>(); | ||||
|   LocalIntersection *const local_isect = get_payload_ptr_2<LocalIntersection>(); | ||||
|  | ||||
|   if (lcg_state) { | ||||
|     const uint max_hits = optixGetPayload_5(); | ||||
|     for (int i = min(max_hits, local_isect->num_hits) - 1; i >= 0; --i) { | ||||
|       if (optixGetRayTmax() == local_isect->hits[i].t) { | ||||
|         return optixIgnoreIntersection(); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     hit = local_isect->num_hits++; | ||||
|  | ||||
|     if (local_isect->num_hits > max_hits) { | ||||
|       hit = lcg_step_uint(lcg_state) % local_isect->num_hits; | ||||
|       if (hit >= max_hits) { | ||||
|         return optixIgnoreIntersection(); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   else { | ||||
|     if (local_isect->num_hits && optixGetRayTmax() > local_isect->hits[0].t) { | ||||
|       // Record closest intersection only (do not terminate ray here, since there is no guarantee | ||||
|       // about distance ordering in anyhit) | ||||
|       return optixIgnoreIntersection(); | ||||
|     } | ||||
|  | ||||
|     local_isect->num_hits = 1; | ||||
|   } | ||||
|  | ||||
|   Intersection *isect = &local_isect->hits[hit]; | ||||
|   isect->t = optixGetRayTmax(); | ||||
|   isect->prim = optixGetPrimitiveIndex(); | ||||
|   isect->object = get_object_id(); | ||||
|   isect->type = kernel_tex_fetch(__prim_type, isect->prim); | ||||
|  | ||||
|   if (optixIsTriangleHit()) { | ||||
|     const float2 barycentrics = optixGetTriangleBarycentrics(); | ||||
|     isect->u = 1.0f - barycentrics.y - barycentrics.x; | ||||
|     isect->v = barycentrics.x; | ||||
|   } | ||||
|   else { | ||||
|     isect->u = __uint_as_float(optixGetAttribute_0()); | ||||
|     isect->v = __uint_as_float(optixGetAttribute_1()); | ||||
|   } | ||||
|  | ||||
|   // Record geometric normal | ||||
|   const uint tri_vindex = kernel_tex_fetch(__prim_tri_index, isect->prim); | ||||
|   const float3 tri_a = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 0)); | ||||
|   const float3 tri_b = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 1)); | ||||
|   const float3 tri_c = float4_to_float3(kernel_tex_fetch(__prim_tri_verts, tri_vindex + 2)); | ||||
|   local_isect->Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a)); | ||||
|  | ||||
|   // Continue tracing (without this the trace call would return after the first hit) | ||||
|   optixIgnoreIntersection(); | ||||
| #endif | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __anyhit__kernel_optix_shadow_all_hit() | ||||
| { | ||||
| #ifdef __SHADOW_RECORD_ALL__ | ||||
|   const uint prim = optixGetPrimitiveIndex(); | ||||
| #  ifdef __VISIBILITY_FLAG__ | ||||
|   const uint visibility = optixGetPayload_4(); | ||||
|   if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) { | ||||
|     return optixIgnoreIntersection(); | ||||
|   } | ||||
| #  endif | ||||
|  | ||||
|   // Offset into array with num_hits | ||||
|   Intersection *const isect = get_payload_ptr_0<Intersection>() + optixGetPayload_2(); | ||||
|   isect->t = optixGetRayTmax(); | ||||
|   isect->prim = prim; | ||||
|   isect->object = get_object_id(); | ||||
|   isect->type = kernel_tex_fetch(__prim_type, prim); | ||||
|  | ||||
|   if (optixIsTriangleHit()) { | ||||
|     const float2 barycentrics = optixGetTriangleBarycentrics(); | ||||
|     isect->u = 1.0f - barycentrics.y - barycentrics.x; | ||||
|     isect->v = barycentrics.x; | ||||
|   } | ||||
|   else { | ||||
|     isect->u = __uint_as_float(optixGetAttribute_0()); | ||||
|     isect->v = __uint_as_float(optixGetAttribute_1()); | ||||
|   } | ||||
|  | ||||
| #  ifdef __TRANSPARENT_SHADOWS__ | ||||
|   // Detect if this surface has a shader with transparent shadows | ||||
|   if (!shader_transparent_shadow(NULL, isect) || optixGetPayload_2() >= optixGetPayload_3()) { | ||||
| #  endif | ||||
|     // This is an opaque hit or the hit limit has been reached, abort traversal | ||||
|     optixSetPayload_5(true); | ||||
|     return optixTerminateRay(); | ||||
| #  ifdef __TRANSPARENT_SHADOWS__ | ||||
|   } | ||||
|  | ||||
|   // TODO(pmours): Do we need REQUIRE_UNIQUE_ANYHIT for this to work? | ||||
|   optixSetPayload_2(optixGetPayload_2() + 1);  // num_hits++ | ||||
|  | ||||
|   // Continue tracing | ||||
|   optixIgnoreIntersection(); | ||||
| #  endif | ||||
| #endif | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __anyhit__kernel_optix_visibility_test() | ||||
| { | ||||
|   uint visibility = optixGetPayload_4(); | ||||
| #ifdef __VISIBILITY_FLAG__ | ||||
|   const uint prim = optixGetPrimitiveIndex(); | ||||
|   if ((kernel_tex_fetch(__prim_visibility, prim) & visibility) == 0) | ||||
|     return optixIgnoreIntersection(); | ||||
| #endif | ||||
|  | ||||
|   // Shadow ray early termination | ||||
|   if (visibility & PATH_RAY_SHADOW_OPAQUE) | ||||
|     return optixTerminateRay(); | ||||
| } | ||||
|  | ||||
| extern "C" __global__ void __closesthit__kernel_optix_hit() | ||||
| { | ||||
|   optixSetPayload_0(__float_as_uint(optixGetRayTmax()));  // Intersection distance | ||||
|   optixSetPayload_3(optixGetPrimitiveIndex()); | ||||
|   optixSetPayload_4(get_object_id()); | ||||
|   // Can be PRIMITIVE_TRIANGLE and PRIMITIVE_MOTION_TRIANGLE or curve type and segment index | ||||
|   optixSetPayload_5(kernel_tex_fetch(__prim_type, optixGetPrimitiveIndex())); | ||||
|  | ||||
|   if (optixIsTriangleHit()) { | ||||
|     const float2 barycentrics = optixGetTriangleBarycentrics(); | ||||
|     optixSetPayload_1(__float_as_uint(1.0f - barycentrics.y - barycentrics.x)); | ||||
|     optixSetPayload_2(__float_as_uint(barycentrics.x)); | ||||
|   } | ||||
|   else { | ||||
|     optixSetPayload_1(optixGetAttribute_0()); | ||||
|     optixSetPayload_2(optixGetAttribute_1()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| #ifdef __HAIR__ | ||||
| extern "C" __global__ void __intersection__curve() | ||||
| { | ||||
|   const uint prim = optixGetPrimitiveIndex(); | ||||
|   const uint object = get_object_id<true>(); | ||||
|   const uint type = kernel_tex_fetch(__prim_type, prim); | ||||
|   const uint visibility = optixGetPayload_4(); | ||||
|  | ||||
|   const float3 P = optixGetObjectRayOrigin(); | ||||
|   const float3 dir = optixGetObjectRayDirection(); | ||||
|  | ||||
| #  ifdef __OBJECT_MOTION__ | ||||
|   const float time = optixGetRayTime(); | ||||
| #  else | ||||
|   const float time = 0.0f; | ||||
| #  endif | ||||
|  | ||||
|   Intersection isect; | ||||
|   isect.t = optixGetRayTmax(); | ||||
|  | ||||
|   if (!(kernel_data.curve.curveflags & CURVE_KN_INTERPOLATE) ? | ||||
|           curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type) : | ||||
|           cardinal_curve_intersect(NULL, &isect, P, dir, visibility, object, prim, time, type)) { | ||||
|     optixReportIntersection(isect.t, | ||||
|                             type & PRIMITIVE_ALL, | ||||
|                             __float_as_int(isect.u),   // Attribute_0 | ||||
|                             __float_as_int(isect.v));  // Attribute_1 | ||||
|   } | ||||
| } | ||||
| #endif | ||||
|  | ||||
| #ifdef __KERNEL_DEBUG__ | ||||
| extern "C" __global__ void __exception__kernel_optix_exception() | ||||
| { | ||||
|   printf("Unhandled exception occured: code %d!\n", optixGetExceptionCode()); | ||||
| } | ||||
| #endif | ||||
		Reference in New Issue
	
	Block a user