diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.cc b/source/blender/draw/engines/eevee_next/eevee_velocity.cc index c69816e15e0..eec00933350 100644 --- a/source/blender/draw/engines/eevee_next/eevee_velocity.cc +++ b/source/blender/draw/engines/eevee_next/eevee_velocity.cc @@ -279,7 +279,9 @@ void VelocityModule::geometry_steps_fill() copy_ps.push_constant("start_offset", geom.ofs); copy_ps.push_constant("vertex_stride", int(format->stride / 4)); copy_ps.push_constant("vertex_count", geom.len); - copy_ps.dispatch(int3(divide_ceil_u(geom.len, VERTEX_COPY_GROUP_SIZE), 1, 1)); + uint group_len_x = divide_ceil_u(geom.len, VERTEX_COPY_GROUP_SIZE); + uint verts_per_thread = divide_ceil_u(group_len_x, GPU_max_work_group_count(0)); + copy_ps.dispatch(int3(group_len_x / verts_per_thread, 1, 1)); } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl index 4135716455a..9fc004733db 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl @@ -2,14 +2,19 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ +#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl) + void main() { - uint vertex_id = gl_GlobalInvocationID.x; - if (vertex_id >= uint(vertex_count)) { - return; + uint vertices_per_thread = divide_ceil(uint(vertex_count), uint(gl_WorkGroupSize.x)) / + gl_NumWorkGroups.x; + uint vertex_start = min(gl_GlobalInvocationID.x * vertices_per_thread, uint(vertex_count)); + uint vertex_end = min(vertex_start + vertices_per_thread, uint(vertex_count)); + + for (uint vertex_id = vertex_start; vertex_id < vertex_end; vertex_id++) { + out_buf[start_offset + vertex_id] = vec4(in_buf[vertex_id * vertex_stride + 0], + in_buf[vertex_id * vertex_stride + 1], + in_buf[vertex_id * vertex_stride + 2], + 1.0); } - out_buf[start_offset + vertex_id] = vec4(in_buf[vertex_id * vertex_stride + 0], - in_buf[vertex_id * vertex_stride + 1], - in_buf[vertex_id * vertex_stride + 2], - 1.0); }