From 74b128dd6d3426b0f9019c442e01417a3d9eb2c7 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 22 Apr 2024 10:06:54 +0200 Subject: [PATCH 1/6] EEVEE-Next: Fix motion blur large geom on low end devices Detected when testing mr_elephant on an Intel HD520. When copying the velocity buffer using the copy shader, the number of scheduled workgroups could be larger than supported by the device. This PR fixes this by splitting the copy pass in multiple smaller passes so the velocity is copied. NOTE: I didn't went for the approach to add a new workgroup dimension as that would lead to more overhead when using more smaller meshes. I would assume these devices would more often be used with scenes with smaller geometry. --- .../draw/engines/eevee_next/eevee_velocity.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.cc b/source/blender/draw/engines/eevee_next/eevee_velocity.cc index c69816e15e0..eba772ddd73 100644 --- a/source/blender/draw/engines/eevee_next/eevee_velocity.cc +++ b/source/blender/draw/engines/eevee_next/eevee_velocity.cc @@ -276,10 +276,21 @@ void VelocityModule::geometry_steps_fill() else { BLI_assert(format->stride % 4 == 0); copy_ps.bind_ssbo("in_buf", geom.pos_buf); - copy_ps.push_constant("start_offset", geom.ofs); copy_ps.push_constant("vertex_stride", int(format->stride / 4)); - copy_ps.push_constant("vertex_count", geom.len); - copy_ps.dispatch(int3(divide_ceil_u(geom.len, VERTEX_COPY_GROUP_SIZE), 1, 1)); + int sub_offset = geom.ofs; + int sub_len_left = geom.len; + while (sub_len_left != 0) { + PassSimple::Sub ©_sub_ps = copy_ps.sub("PartialCopy"); + copy_sub_ps.push_constant("vertex_count", sub_len_left); + copy_sub_ps.push_constant("start_offset", sub_offset); + int group_len_x = min_ii(divide_ceil_u(sub_len_left, VERTEX_COPY_GROUP_SIZE), + GPU_max_work_group_count(0)); + int vertices = group_len_x * VERTEX_COPY_GROUP_SIZE; + copy_sub_ps.dispatch(int3(group_len_x, 1, 1)); + + sub_offset += vertices; + sub_len_left = max_ii(sub_len_left - vertices, 0); + } } } -- 2.30.2 From 684c73c3a8a22616cf6f0fe822a903e3910c24b3 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 23 Apr 2024 10:53:30 +0200 Subject: [PATCH 2/6] Moved for loop to copy shader --- .../draw/engines/eevee_next/eevee_velocity.cc | 19 +++++-------------- .../shaders/eevee_vertex_copy_comp.glsl | 16 +++++++++------- .../shaders/infos/eevee_velocity_info.hh | 1 + 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/eevee_velocity.cc b/source/blender/draw/engines/eevee_next/eevee_velocity.cc index eba772ddd73..eec00933350 100644 --- a/source/blender/draw/engines/eevee_next/eevee_velocity.cc +++ b/source/blender/draw/engines/eevee_next/eevee_velocity.cc @@ -276,21 +276,12 @@ void VelocityModule::geometry_steps_fill() else { BLI_assert(format->stride % 4 == 0); copy_ps.bind_ssbo("in_buf", geom.pos_buf); + copy_ps.push_constant("start_offset", geom.ofs); copy_ps.push_constant("vertex_stride", int(format->stride / 4)); - int sub_offset = geom.ofs; - int sub_len_left = geom.len; - while (sub_len_left != 0) { - PassSimple::Sub ©_sub_ps = copy_ps.sub("PartialCopy"); - copy_sub_ps.push_constant("vertex_count", sub_len_left); - copy_sub_ps.push_constant("start_offset", sub_offset); - int group_len_x = min_ii(divide_ceil_u(sub_len_left, VERTEX_COPY_GROUP_SIZE), - GPU_max_work_group_count(0)); - int vertices = group_len_x * VERTEX_COPY_GROUP_SIZE; - copy_sub_ps.dispatch(int3(group_len_x, 1, 1)); - - sub_offset += vertices; - sub_len_left = max_ii(sub_len_left - vertices, 0); - } + copy_ps.push_constant("vertex_count", geom.len); + uint group_len_x = divide_ceil_u(geom.len, VERTEX_COPY_GROUP_SIZE); + uint verts_per_thread = divide_ceil_u(group_len_x, GPU_max_work_group_count(0)); + copy_ps.dispatch(int3(group_len_x / verts_per_thread, 1, 1)); } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl index 4135716455a..d6c15f2d527 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl @@ -4,12 +4,14 @@ void main() { - uint vertex_id = gl_GlobalInvocationID.x; - if (vertex_id >= uint(vertex_count)) { - return; + uint vertices_per_thread = vertex_count / (gl_NumWorkGroups.x * VERTEX_COPY_GROUP_SIZE); + uint vertex_start = min(gl_GlobalInvocationID.x * vertices_per_thread, uint(vertex_count)); + uint vertex_end = min(vertex_start + vertices_per_thread, uint(vertex_count)); + + for (uint vertex_id = vertex_start; vertex_id < vertex_end; vertex_id++) { + out_buf[start_offset + vertex_id] = vec4(in_buf[vertex_id * vertex_stride + 0], + in_buf[vertex_id * vertex_stride + 1], + in_buf[vertex_id * vertex_stride + 2], + 1.0); } - out_buf[start_offset + vertex_id] = vec4(in_buf[vertex_id * vertex_stride + 0], - in_buf[vertex_id * vertex_stride + 1], - in_buf[vertex_id * vertex_stride + 2], - 1.0); } diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh index ae8937e301d..0c267e41a5d 100644 --- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh @@ -46,6 +46,7 @@ GPU_SHADER_CREATE_INFO(eevee_vertex_copy) .push_constant(Type::INT, "start_offset") .push_constant(Type::INT, "vertex_stride") .push_constant(Type::INT, "vertex_count") + .additional_info("eevee_shared") .do_static_compilation(true); /** \} */ -- 2.30.2 From 3ef21dda3e74504de5e072814617c680bcdb8b07 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 23 Apr 2024 11:28:32 +0200 Subject: [PATCH 3/6] Fix incorrect division. --- .../draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl index d6c15f2d527..b8cabcc8a14 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl @@ -4,7 +4,7 @@ void main() { - uint vertices_per_thread = vertex_count / (gl_NumWorkGroups.x * VERTEX_COPY_GROUP_SIZE); + uint vertices_per_thread = (gl_NumWorkGroups.x * VERTEX_COPY_GROUP_SIZE) / vertex_count; uint vertex_start = min(gl_GlobalInvocationID.x * vertices_per_thread, uint(vertex_count)); uint vertex_end = min(vertex_start + vertices_per_thread, uint(vertex_count)); -- 2.30.2 From 83595636857115aff254b932b1c769fb7de3621e Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 23 Apr 2024 11:42:46 +0200 Subject: [PATCH 4/6] Fix incorrect division --- .../engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl index b8cabcc8a14..819e58e6080 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl @@ -2,9 +2,12 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ +#pragma BLENDER_REQUIRE(common_math_lib.glsl) + void main() { - uint vertices_per_thread = (gl_NumWorkGroups.x * VERTEX_COPY_GROUP_SIZE) / vertex_count; + uint vertices_per_thread = gl_NumWorkGroups.x / + divide_ceil(uint(vertex_count), uint(VERTEX_COPY_GROUP_SIZE)); uint vertex_start = min(gl_GlobalInvocationID.x * vertices_per_thread, uint(vertex_count)); uint vertex_end = min(vertex_start + vertices_per_thread, uint(vertex_count)); -- 2.30.2 From 740c0b3d4c00063a492ff185709cf96b0b16c2a6 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Tue, 23 Apr 2024 12:19:28 +0200 Subject: [PATCH 5/6] Fix division. --- .../engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl index 819e58e6080..5da6be17e2a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl @@ -6,8 +6,8 @@ void main() { - uint vertices_per_thread = gl_NumWorkGroups.x / - divide_ceil(uint(vertex_count), uint(VERTEX_COPY_GROUP_SIZE)); + uint vertices_per_thread = divide_ceil(uint(vertex_count), uint(VERTEX_COPY_GROUP_SIZE)) / + gl_NumWorkGroups.x; uint vertex_start = min(gl_GlobalInvocationID.x * vertices_per_thread, uint(vertex_count)); uint vertex_end = min(vertex_start + vertices_per_thread, uint(vertex_count)); -- 2.30.2 From 91c5533cb166c2eaed331277f8988e2251d9faeb Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 6 May 2024 08:27:34 +0200 Subject: [PATCH 6/6] Use gl_WorkGroupSize.x --- .../engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl | 4 ++-- .../engines/eevee_next/shaders/infos/eevee_velocity_info.hh | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl index 5da6be17e2a..9fc004733db 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_vertex_copy_comp.glsl @@ -2,11 +2,11 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ -#pragma BLENDER_REQUIRE(common_math_lib.glsl) +#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl) void main() { - uint vertices_per_thread = divide_ceil(uint(vertex_count), uint(VERTEX_COPY_GROUP_SIZE)) / + uint vertices_per_thread = divide_ceil(uint(vertex_count), uint(gl_WorkGroupSize.x)) / gl_NumWorkGroups.x; uint vertex_start = min(gl_GlobalInvocationID.x * vertices_per_thread, uint(vertex_count)); uint vertex_end = min(vertex_start + vertices_per_thread, uint(vertex_count)); diff --git a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh index 0c267e41a5d..ae8937e301d 100644 --- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh +++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_velocity_info.hh @@ -46,7 +46,6 @@ GPU_SHADER_CREATE_INFO(eevee_vertex_copy) .push_constant(Type::INT, "start_offset") .push_constant(Type::INT, "vertex_stride") .push_constant(Type::INT, "vertex_count") - .additional_info("eevee_shared") .do_static_compilation(true); /** \} */ -- 2.30.2