Cycles: oneAPI: Improve performance of scenes not using volume #109245
|
@ -279,6 +279,27 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
/* The NODE_VOLUME feature adds a lot of code, including the entire NanoVDB library,
|
||||
* causing GPU compilers to spend additional registers while the feature is not used often,
|
||||
* leading to suboptimal execution.
|
||||
* The use of a specialized version below gives a noticeable speed-up for oneAPI execution. */
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_surface_no_volume,
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
if (ccl_gpu_kernel_within_bounds(global_index, work_size)) {
|
||||
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
||||
ccl_gpu_kernel_call(integrator_shade_surface_no_volume(NULL, state, render_buffer));
|
||||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
#endif
|
||||
|
||||
#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
|
||||
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
|
||||
#endif
|
||||
|
|
|
@ -463,8 +463,18 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: {
|
||||
oneapi_call(
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface);
|
||||
if (kernel_features & KERNEL_FEATURE_NODE_VOLUME) {
|
||||
oneapi_call(
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface);
|
||||
}
|
||||
else {
|
||||
oneapi_call(kg,
|
||||
cgh,
|
||||
global_size,
|
||||
local_size,
|
||||
args,
|
||||
oneapi_kernel_integrator_shade_surface_no_volume);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: {
|
||||
|
|
|
@ -799,6 +799,15 @@ ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
|
|||
integrator_shade_surface_next_kernel<current_kernel>(kg, state);
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
ccl_device_forceinline void integrator_shade_surface_no_volume(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE &
|
||||
~KERNEL_FEATURE_NODE_VOLUME>(kg, state, render_buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device_forceinline void integrator_shade_surface_raytrace(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue