|
|
|
@@ -58,6 +58,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
INTEGRATOR_STATE_WRITE(state, shadow_path, queued_kernel) = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_init_from_camera,
|
|
|
|
@@ -89,6 +90,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(
|
|
|
|
|
integrator_init_from_camera(nullptr, state, tile, render_buffer, x, y, sample));
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_init_from_bake,
|
|
|
|
@@ -120,6 +122,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(
|
|
|
|
|
integrator_init_from_bake(nullptr, state, tile, render_buffer, x, y, sample));
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
|
|
|
@@ -134,6 +137,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_intersect_closest(NULL, state, render_buffer));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_intersect_shadow,
|
|
|
|
@@ -147,6 +151,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_intersect_shadow(NULL, state));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
|
|
|
|
@@ -160,6 +165,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_intersect_subsurface(NULL, state));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
|
|
|
|
@@ -173,6 +179,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_intersect_volume_stack(NULL, state));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_shade_background,
|
|
|
|
@@ -187,6 +194,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_shade_background(NULL, state, render_buffer));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_shade_light,
|
|
|
|
@@ -201,6 +209,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_shade_light(NULL, state, render_buffer));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_shade_shadow,
|
|
|
|
@@ -215,6 +224,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_shade_shadow(NULL, state, render_buffer));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_shade_surface,
|
|
|
|
@@ -229,6 +239,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_shade_surface(NULL, state, render_buffer));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
#ifdef __KERNEL_METAL__
|
|
|
|
|
constant int __dummy_constant [[function_constant(0)]];
|
|
|
|
@@ -256,6 +267,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_shade_volume,
|
|
|
|
@@ -270,6 +282,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_shade_volume(NULL, state, render_buffer));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_queued_paths_array,
|
|
|
|
@@ -288,6 +301,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_queued_shadow_paths_array,
|
|
|
|
@@ -306,6 +320,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_active_paths_array,
|
|
|
|
@@ -321,6 +336,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_terminated_paths_array,
|
|
|
|
@@ -337,6 +353,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_terminated_shadow_paths_array,
|
|
|
|
@@ -353,6 +370,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_sorted_paths_array,
|
|
|
|
@@ -380,6 +398,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
key_prefix_sum,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_compact_paths_array,
|
|
|
|
@@ -399,6 +418,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_compact_states,
|
|
|
|
@@ -416,6 +436,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_state_move(NULL, to_state, from_state));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_compact_shadow_paths_array,
|
|
|
|
@@ -435,6 +456,7 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_ACTIVE_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
num_indices,
|
|
|
|
|
ccl_gpu_kernel_lambda_pass);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_signature(integrator_compact_shadow_states,
|
|
|
|
@@ -452,12 +474,14 @@ ccl_gpu_kernel_threads(GPU_PARALLEL_SORTED_INDEX_DEFAULT_BLOCK_SIZE)
|
|
|
|
|
ccl_gpu_kernel_call(integrator_shadow_state_move(NULL, to_state, from_state));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel_threads(GPU_PARALLEL_PREFIX_SUM_DEFAULT_BLOCK_SIZE) ccl_gpu_kernel_signature(
|
|
|
|
|
prefix_sum, ccl_global int *counter, ccl_global int *prefix_sum, int num_values)
|
|
|
|
|
{
|
|
|
|
|
gpu_parallel_prefix_sum(ccl_gpu_global_id_x(), counter, prefix_sum, num_values);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Adaptive sampling.
|
|
|
|
@@ -494,6 +518,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
atomic_fetch_and_add_uint32(num_active_pixels, popcount(num_active_pixels_mask));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(adaptive_sampling_filter_x,
|
|
|
|
@@ -512,6 +537,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
kernel_adaptive_sampling_filter_x(NULL, render_buffer, sy + y, sx, sw, offset, stride));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(adaptive_sampling_filter_y,
|
|
|
|
@@ -530,6 +556,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
kernel_adaptive_sampling_filter_y(NULL, render_buffer, sx + x, sy, sh, offset, stride));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Cryptomatte.
|
|
|
|
@@ -546,6 +573,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(kernel_cryptomatte_post(nullptr, render_buffer, pixel_index));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Film.
|
|
|
|
@@ -627,6 +655,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
|
|
|
|
|
\
|
|
|
|
|
FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \
|
|
|
|
|
} \
|
|
|
|
|
ccl_gpu_kernel_postfix \
|
|
|
|
|
\
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \
|
|
|
|
|
ccl_gpu_kernel_signature(film_convert_##variant##_half_rgba, \
|
|
|
|
@@ -666,7 +695,8 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb
|
|
|
|
|
const half4 half_pixel = float4_to_half4_display( \
|
|
|
|
|
make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); \
|
|
|
|
|
kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); \
|
|
|
|
|
}
|
|
|
|
|
} \
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* 1 channel inputs */
|
|
|
|
|
KERNEL_FILM_CONVERT_VARIANT(depth, 1)
|
|
|
|
@@ -706,6 +736,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(kernel_displace_evaluate(NULL, input, output, offset + i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* Background */
|
|
|
|
|
|
|
|
|
@@ -721,6 +752,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_call(kernel_background_evaluate(NULL, input, output, offset + i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* Curve Shadow Transparency */
|
|
|
|
|
|
|
|
|
@@ -737,6 +769,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
kernel_curve_shadow_transparency_evaluate(NULL, input, output, offset + i));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Denoising.
|
|
|
|
@@ -770,6 +803,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
color_out[1] = clamp(color_out[1], 0.0f, 10000.0f);
|
|
|
|
|
color_out[2] = clamp(color_out[2], 0.0f, 10000.0f);
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(filter_guiding_preprocess,
|
|
|
|
@@ -849,6 +883,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
flow_out[1] = -motion_in[1] * pixel_scale;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(filter_guiding_set_fake_albedo,
|
|
|
|
@@ -877,6 +912,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
albedo_out[1] = 0.5f;
|
|
|
|
|
albedo_out[2] = 0.5f;
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
ccl_gpu_kernel_signature(filter_color_postprocess,
|
|
|
|
@@ -936,6 +972,7 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
denoised_pixel[3] = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|
|
|
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Shadow catcher.
|
|
|
|
@@ -961,3 +998,4 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
|
|
|
|
atomic_fetch_and_add_uint32(num_possible_splits, popcount(can_split_mask));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
ccl_gpu_kernel_postfix
|
|
|
|
|