Cycles: oneAPI: Improve performance of scenes not using volume #109245
|
@ -279,6 +279,27 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
/* The NODE_VOLUME feature adds a lot of code, including the entire NanoVDB library,
|
||||
* causing GPU compilers to spend additional registers while the feature is not used often,
|
||||
* leading to suboptimal execution.
|
||||
* The use of a specialized version below gives a noticeable speed-up for oneAPI execution. */
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_surface_no_volume,
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
if (ccl_gpu_kernel_within_bounds(global_index, work_size)) {
|
||||
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
||||
ccl_gpu_kernel_call(integrator_shade_surface_no_volume(NULL, state, render_buffer));
|
||||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
#endif
|
||||
|
||||
#if defined(__KERNEL_METAL_APPLE__) && defined(__METALRT__)
|
||||
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
|
||||
#endif
|
||||
|
|
|
@ -463,8 +463,18 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
|||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE: {
|
||||
if (kernel_features & KERNEL_FEATURE_NODE_VOLUME) {
|
||||
oneapi_call(
|
||||
kg, cgh, global_size, local_size, args, oneapi_kernel_integrator_shade_surface);
|
||||
}
|
||||
else {
|
||||
oneapi_call(kg,
|
||||
cgh,
|
||||
global_size,
|
||||
local_size,
|
||||
args,
|
||||
oneapi_kernel_integrator_shade_surface_no_volume);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE: {
|
||||
|
|
|
@ -799,6 +799,15 @@ ccl_device_forceinline void integrator_shade_surface(KernelGlobals kg,
|
|||
integrator_shade_surface_next_kernel<current_kernel>(kg, state);
|
||||
}
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
ccl_device_forceinline void integrator_shade_surface_no_volume(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
integrator_shade_surface<KERNEL_FEATURE_NODE_MASK_SURFACE & ~KERNEL_FEATURE_NODE_RAYTRACE &
|
||||
~KERNEL_FEATURE_NODE_VOLUME>(kg, state, render_buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device_forceinline void integrator_shade_surface_raytrace(
|
||||
KernelGlobals kg, IntegratorState state, ccl_global float *ccl_restrict render_buffer)
|
||||
{
|
||||
|
|
|
@ -1944,6 +1944,19 @@ void PointDensityTextureNode::compile(OSLCompiler &compiler)
|
|||
}
|
||||
}
|
||||
|
||||
int PointDensityTextureNode::get_feature()
|
||||
{
|
||||
ShaderOutput *density_out = output("Density");
|
||||
ShaderOutput *color_out = output("Color");
|
||||
|
||||
const bool use_density = !density_out->links.empty();
|
||||
const bool use_color = !color_out->links.empty();
|
||||
|
||||
/* NOTE: A need for NODE_VOLUME feature is conditional and based on
|
||||
* the coresponding logic in ::compile implementation. */
|
||||
return ShaderNode::get_feature() | ((use_density || use_color) ? KERNEL_FEATURE_NODE_VOLUME : 0);
|
||||
}
|
||||
|
||||
/* Normal */
|
||||
|
||||
NODE_DEFINE(NormalNode)
|
||||
|
|
|
@ -346,6 +346,8 @@ class PointDensityTextureNode : public ShaderNode {
|
|||
public:
|
||||
SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode)
|
||||
|
||||
virtual int get_feature();
|
||||
|
||||
~PointDensityTextureNode();
|
||||
ShaderNode *clone(ShaderGraph *graph) const;
|
||||
void attributes(Shader *shader, AttributeRequestSet *attributes);
|
||||
|
|
Loading…
Reference in New Issue