DRW: Curves: Indexbuf optimization for large numbers of curves #116617

Merged
Clément Foucault merged 7 commits from Eugene-Kuznetsov/blender:ek_curves_2d_draw into main 2024-02-25 17:23:06 +01:00
10 changed files with 182 additions and 60 deletions

View File

@ -42,7 +42,6 @@
#include "draw_cache_impl.hh" /* own include */
#include "draw_cache_inline.hh"
#include "draw_curves_private.hh" /* own include */
#include "draw_shader.hh"
namespace blender::draw {
@ -417,57 +416,6 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
cache.final[subdiv].strands_res * cache.strands_len);
}
static void curves_batch_cache_fill_segments_indices(GPUPrimType prim_type,
const bke::CurvesGeometry &curves,
const int res,
GPUIndexBufBuilder &elb)
{
switch (prim_type) {
/* Populate curves using compressed restart-compatible types. */
case GPU_PRIM_LINE_STRIP:
case GPU_PRIM_TRI_STRIP: {
uint curr_point = 0;
for ([[maybe_unused]] const int i : IndexRange(curves.curves_num())) {
for (int k = 0; k < res; k++) {
GPU_indexbuf_add_generic_vert(&elb, curr_point++);
}
GPU_indexbuf_add_primitive_restart(&elb);
}
break;
}
/* Generate curves using independent line segments. */
case GPU_PRIM_LINES: {
uint curr_point = 0;
for ([[maybe_unused]] const int i : IndexRange(curves.curves_num())) {
for (int k = 0; k < res / 2; k++) {
GPU_indexbuf_add_line_verts(&elb, curr_point, curr_point + 1);
curr_point++;
}
/* Skip to next primitive base index. */
curr_point++;
}
break;
}
/* Generate curves using independent two-triangle segments. */
case GPU_PRIM_TRIS: {
uint curr_point = 0;
for ([[maybe_unused]] const int i : IndexRange(curves.curves_num())) {
for (int k = 0; k < res / 6; k++) {
GPU_indexbuf_add_tri_verts(&elb, curr_point, curr_point + 1, curr_point + 2);
GPU_indexbuf_add_tri_verts(&elb, curr_point + 1, curr_point + 3, curr_point + 2);
curr_point += 2;
}
/* Skip to next primitive base index. */
curr_point += 2;
}
break;
}
default:
BLI_assert_unreachable();
break;
}
}
static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeometry &curves,
CurvesEvalCache &cache,
const int thickness_res,
@ -483,13 +431,11 @@ static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeomet
* NOTE: Metal backend uses non-restart prim types for optimal HW performance. */
bool use_strip_prims = (GPU_backend_get_type() != GPU_BACKEND_METAL);
int verts_per_curve;
int element_count;
GPUPrimType prim_type;
if (use_strip_prims) {
/* +1 for primitive restart */
verts_per_curve = cache.final[subdiv].strands_res * thickness_res;
element_count = (verts_per_curve + 1) * cache.strands_len;
prim_type = (thickness_res == 1) ? GPU_PRIM_LINE_STRIP : GPU_PRIM_TRI_STRIP;
}
else {
@ -497,7 +443,6 @@ static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeomet
prim_type = (thickness_res == 1) ? GPU_PRIM_LINES : GPU_PRIM_TRIS;
int verts_per_segment = ((prim_type == GPU_PRIM_LINES) ? 2 : 6);
verts_per_curve = (cache.final[subdiv].strands_res - 1) * verts_per_segment;
element_count = verts_per_curve * cache.strands_len;
}
static GPUVertFormat format = {0};
@ -509,13 +454,11 @@ static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeomet
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
GPU_vertbuf_data_alloc(vbo, 1);
GPUIndexBufBuilder elb;
GPU_indexbuf_init_ex(&elb, prim_type, element_count, element_count);
curves_batch_cache_fill_segments_indices(prim_type, curves, verts_per_curve, elb);
GPUIndexBuf *ibo = GPU_indexbuf_build_curves_on_device(
prim_type, curves.curves_num(), verts_per_curve);
cache.final[subdiv].proc_hairs[thickness_res - 1] = GPU_batch_create_ex(
prim_type, vbo, GPU_indexbuf_build(&elb), GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX);
prim_type, vbo, ibo, GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX);
}
static bool curves_ensure_attributes(const Curves &curves,

View File

@ -590,6 +590,9 @@ set(GLSL_SRC
shaders/gpu_shader_cfg_world_clip_lib.glsl
shaders/gpu_shader_colorspace_lib.glsl
shaders/gpu_shader_index_2d_array_points.glsl
shaders/gpu_shader_index_2d_array_lines.glsl
shaders/gpu_shader_index_2d_array_tris.glsl
GPU_shader_shared_utils.h
)
@ -757,6 +760,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/gpu_shader_3D_uniform_color_info.hh
shaders/infos/gpu_shader_gpencil_stroke_info.hh
shaders/infos/gpu_shader_icon_info.hh
shaders/infos/gpu_shader_index_info.hh
shaders/infos/gpu_shader_instance_varying_color_varying_size_info.hh
shaders/infos/gpu_shader_keyframe_shape_info.hh
shaders/infos/gpu_shader_line_dashed_uniform_color_info.hh

View File

@ -72,6 +72,10 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *);
void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding);
GPUIndexBuf *GPU_indexbuf_build_curves_on_device(GPUPrimType prim_type,
uint curves_num,
uint verts_per_curve);
/* Upload data to the GPU (if not built on the device) and bind the buffer to its default target.
*/
void GPU_indexbuf_use(GPUIndexBuf *elem);

View File

@ -79,6 +79,11 @@ typedef enum eGPUBuiltinShader {
/** Draw wide lines with uniform color. Has an additional clip plane parameter. */
GPU_SHADER_3D_POLYLINE_CLIPPED_UNIFORM_COLOR,
/** Compute shaders to generate 2d index buffers (mainly for curve drawing). */
GPU_SHADER_INDEXBUF_POINTS,
GPU_SHADER_INDEXBUF_LINES,
GPU_SHADER_INDEXBUF_TRIS,
/**
* ----------------------- Shaders exposed through pyGPU module -----------------------
*

View File

@ -17,6 +17,8 @@
#include "gpu_index_buffer_private.hh"
#include "GPU_capabilities.h"
#include "GPU_compute.h"
#include "GPU_platform.h"
#include <algorithm> /* For `min/max`. */
@ -241,6 +243,54 @@ void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
builder->uses_restart_indices = true;
}
GPUIndexBuf *GPU_indexbuf_build_curves_on_device(GPUPrimType prim_type,
uint curves_num,
uint verts_per_curve)
{
uint64_t dispatch_x_dim = verts_per_curve;
if (prim_type == GPU_PRIM_LINE_STRIP || prim_type == GPU_PRIM_TRI_STRIP) {
dispatch_x_dim += 1;
}
uint64_t grid_x, grid_y, grid_z;
uint64_t max_grid_x = GPU_max_work_group_count(0), max_grid_y = GPU_max_work_group_count(1),
max_grid_z = GPU_max_work_group_count(2);
grid_x = min_uu(max_grid_x, (dispatch_x_dim + 15) / 16);
grid_y = (curves_num + 15) / 16;
if (grid_y <= max_grid_y) {
grid_z = 1;
}
else {
grid_y = grid_z = uint64_t(ceil(sqrt(double(grid_y))));
grid_y = min_uu(grid_y, max_grid_y);
grid_z = min_uu(grid_z, max_grid_z);
}
bool tris = (prim_type == GPU_PRIM_TRIS);
bool lines = (prim_type == GPU_PRIM_LINES);
GPUShader *shader = GPU_shader_get_builtin_shader(
tris ? GPU_SHADER_INDEXBUF_TRIS :
(lines ? GPU_SHADER_INDEXBUF_LINES : GPU_SHADER_INDEXBUF_POINTS));
GPU_shader_bind(shader);
GPUIndexBuf *ibo = GPU_indexbuf_build_on_device(curves_num * dispatch_x_dim);
int resolution;
if (tris) {
resolution = 6;
}
else if (lines) {
resolution = 2;
}
else {
resolution = 1;
}
GPU_shader_uniform_1i(shader, "elements_per_curve", dispatch_x_dim / resolution);
GPU_shader_uniform_1i(shader, "ncurves", curves_num);
GPU_indexbuf_bind_as_ssbo(ibo, GPU_shader_get_ssbo_binding(shader, "out_indices"));
GPU_compute_dispatch(shader, grid_x, grid_y, grid_z);
GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
GPU_shader_unbind();
return ibo;
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@ -87,6 +87,12 @@ static const char *builtin_shader_create_info_name(eGPUBuiltinShader shader)
return "gpu_shader_2D_nodelink_inst";
case GPU_SHADER_GPENCIL_STROKE:
return "gpu_shader_gpencil_stroke";
case GPU_SHADER_INDEXBUF_POINTS:
return "gpu_shader_index_2d_array_points";
case GPU_SHADER_INDEXBUF_LINES:
return "gpu_shader_index_2d_array_lines";
case GPU_SHADER_INDEXBUF_TRIS:
return "gpu_shader_index_2d_array_tris";
default:
BLI_assert_unreachable();
return "";

View File

@ -0,0 +1,27 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Constructs a 2D array index buffer with 'ncurves' rows and 'elements_per_curve*2'
* columns. Each row contains 'elements_per_curve' pairs of indexes.
* e.g., for elements_per_curve=32, first two rows are
* 0 1 1 2 2 3 ... 31 32
* 33 34 34 35 35 36 .. 64 65
* The index buffer can then be used to draw 'ncurves' curves with 'elements_per_curve+1'
* vertexes each, using GL_LINES primitives. Intended to be used if GL_LINE_STRIP
* primitives can't be used for some reason.
*/
void main()
{
ivec3 gid = ivec3(gl_GlobalInvocationID);
ivec3 nthreads = ivec3(gl_NumWorkGroups) * ivec3(gl_WorkGroupSize);
for (int y = gid.y + gid.z * nthreads.y; y < ncurves; y += nthreads.y * nthreads.z) {
for (int x = gid.x; x < elements_per_curve; x += nthreads.x) {
int store_index = (x + y * elements_per_curve) * 2;
uint t = uint(x + y * (elements_per_curve + 1));
out_indices[store_index] = t;
out_indices[store_index + 1] = t + 1u;
}
}
}

View File

@ -0,0 +1,23 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Constructs a simple 2D array index buffer, with 'ncurves' rows and 'elements_per_curve'
* columns. Each row contains 'elements_per_curve-1' indexes and a restart index.
* The index buffer can then be used to draw either 'ncurves' lines with 'elements_per_curve-1'
* vertexes each, or 'ncurves' triangle strips with 'elements_per_curve-3' triangles each.
*/
void main()
{
ivec3 gid = ivec3(gl_GlobalInvocationID);
ivec3 nthreads = ivec3(gl_NumWorkGroups) * ivec3(gl_WorkGroupSize);
for (int y = gid.y + gid.z * nthreads.y; y < ncurves; y += nthreads.y * nthreads.z) {
for (int x = gid.x; x < elements_per_curve; x += nthreads.x) {
int store_index = x + y * elements_per_curve;
out_indices[store_index] = (x + 1 < elements_per_curve) ?
uint(x + y * (elements_per_curve - 1)) :
0xFFFFFFFFu;
}
}
}

View File

@ -0,0 +1,26 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Constructs a 2D array index buffer, with 'ncurves' rows and 'elements_per_curve*6' columns.
* The index buffer can be used to draw 'ncurves' triangle strips with 'elements_per_curve*2'
* triangles each, using GL_TRIANGLES primitives. Intended to be used if GL_TRIANGLE_STRIP
* primitives can't be used for some reason.
*/
void main()
{
ivec3 gid = ivec3(gl_GlobalInvocationID);
ivec3 nthreads = ivec3(gl_NumWorkGroups) * ivec3(gl_WorkGroupSize);
for (int y = gid.y + gid.z * nthreads.y; y < ncurves; y += nthreads.y * nthreads.z)
for (int x = gid.x; x < elements_per_curve; x += nthreads.x) {
int store_index = (x + y * elements_per_curve) * 6;
uint t = x + y * (elements_per_curve * 2 + 2);
out_indices[store_index + 0] = t;
out_indices[store_index + 1] = t + 1u;
out_indices[store_index + 2] = t + 2u;
out_indices[store_index + 3] = t + 1u;
out_indices[store_index + 4] = t + 3u;
out_indices[store_index + 5] = t + 2u;
}
}

View File

@ -0,0 +1,34 @@
/* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#include "gpu_interface_info.hh"
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(gpu_shader_index_2d_array_points)
.local_group_size(16, 16, 1)
.push_constant(Type::INT, "elements_per_curve")
.push_constant(Type::INT, "ncurves")
.storage_buf(0, Qualifier::WRITE, "uint", "out_indices[]")
.compute_source("gpu_shader_index_2d_array_points.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(gpu_shader_index_2d_array_lines)
.local_group_size(16, 16, 1)
.push_constant(Type::INT, "elements_per_curve")
.push_constant(Type::INT, "ncurves")
.storage_buf(0, Qualifier::WRITE, "uint", "out_indices[]")
.compute_source("gpu_shader_index_2d_array_lines.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(gpu_shader_index_2d_array_tris)
.local_group_size(16, 16, 1)
.push_constant(Type::INT, "elements_per_curve")
.push_constant(Type::INT, "ncurves")
.storage_buf(0, Qualifier::WRITE, "uint", "out_indices[]")
.compute_source("gpu_shader_index_2d_array_tris.glsl")
.do_static_compilation(true);