DRW: Curves: Indexbuf optimization for large numbers of curves #116617
|
@ -42,7 +42,6 @@
|
|||
#include "draw_cache_impl.hh" /* own include */
|
||||
#include "draw_cache_inline.hh"
|
||||
#include "draw_curves_private.hh" /* own include */
|
||||
#include "draw_shader.hh"
|
||||
|
||||
namespace blender::draw {
|
||||
|
||||
|
@ -417,57 +416,6 @@ static void curves_batch_cache_ensure_procedural_final_points(CurvesEvalCache &c
|
|||
cache.final[subdiv].strands_res * cache.strands_len);
|
||||
}
|
||||
|
||||
static void curves_batch_cache_fill_segments_indices(GPUPrimType prim_type,
|
||||
const bke::CurvesGeometry &curves,
|
||||
const int res,
|
||||
GPUIndexBufBuilder &elb)
|
||||
{
|
||||
switch (prim_type) {
|
||||
/* Populate curves using compressed restart-compatible types. */
|
||||
case GPU_PRIM_LINE_STRIP:
|
||||
case GPU_PRIM_TRI_STRIP: {
|
||||
uint curr_point = 0;
|
||||
for ([[maybe_unused]] const int i : IndexRange(curves.curves_num())) {
|
||||
for (int k = 0; k < res; k++) {
|
||||
GPU_indexbuf_add_generic_vert(&elb, curr_point++);
|
||||
}
|
||||
GPU_indexbuf_add_primitive_restart(&elb);
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* Generate curves using independent line segments. */
|
||||
case GPU_PRIM_LINES: {
|
||||
uint curr_point = 0;
|
||||
for ([[maybe_unused]] const int i : IndexRange(curves.curves_num())) {
|
||||
for (int k = 0; k < res / 2; k++) {
|
||||
GPU_indexbuf_add_line_verts(&elb, curr_point, curr_point + 1);
|
||||
curr_point++;
|
||||
}
|
||||
/* Skip to next primitive base index. */
|
||||
curr_point++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
/* Generate curves using independent two-triangle segments. */
|
||||
case GPU_PRIM_TRIS: {
|
||||
uint curr_point = 0;
|
||||
for ([[maybe_unused]] const int i : IndexRange(curves.curves_num())) {
|
||||
for (int k = 0; k < res / 6; k++) {
|
||||
GPU_indexbuf_add_tri_verts(&elb, curr_point, curr_point + 1, curr_point + 2);
|
||||
GPU_indexbuf_add_tri_verts(&elb, curr_point + 1, curr_point + 3, curr_point + 2);
|
||||
curr_point += 2;
|
||||
}
|
||||
/* Skip to next primitive base index. */
|
||||
curr_point += 2;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeometry &curves,
|
||||
CurvesEvalCache &cache,
|
||||
const int thickness_res,
|
||||
|
@ -483,13 +431,11 @@ static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeomet
|
|||
* NOTE: Metal backend uses non-restart prim types for optimal HW performance. */
|
||||
bool use_strip_prims = (GPU_backend_get_type() != GPU_BACKEND_METAL);
|
||||
int verts_per_curve;
|
||||
int element_count;
|
||||
GPUPrimType prim_type;
|
||||
|
||||
if (use_strip_prims) {
|
||||
/* +1 for primitive restart */
|
||||
verts_per_curve = cache.final[subdiv].strands_res * thickness_res;
|
||||
element_count = (verts_per_curve + 1) * cache.strands_len;
|
||||
prim_type = (thickness_res == 1) ? GPU_PRIM_LINE_STRIP : GPU_PRIM_TRI_STRIP;
|
||||
}
|
||||
else {
|
||||
|
@ -497,7 +443,6 @@ static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeomet
|
|||
prim_type = (thickness_res == 1) ? GPU_PRIM_LINES : GPU_PRIM_TRIS;
|
||||
int verts_per_segment = ((prim_type == GPU_PRIM_LINES) ? 2 : 6);
|
||||
verts_per_curve = (cache.final[subdiv].strands_res - 1) * verts_per_segment;
|
||||
element_count = verts_per_curve * cache.strands_len;
|
||||
}
|
||||
|
||||
static GPUVertFormat format = {0};
|
||||
|
@ -509,13 +454,11 @@ static void curves_batch_cache_ensure_procedural_indices(const bke::CurvesGeomet
|
|||
GPUVertBuf *vbo = GPU_vertbuf_create_with_format(&format);
|
||||
GPU_vertbuf_data_alloc(vbo, 1);
|
||||
|
||||
GPUIndexBufBuilder elb;
|
||||
GPU_indexbuf_init_ex(&elb, prim_type, element_count, element_count);
|
||||
|
||||
curves_batch_cache_fill_segments_indices(prim_type, curves, verts_per_curve, elb);
|
||||
GPUIndexBuf *ibo = GPU_indexbuf_build_curves_on_device(
|
||||
prim_type, curves.curves_num(), verts_per_curve);
|
||||
|
||||
cache.final[subdiv].proc_hairs[thickness_res - 1] = GPU_batch_create_ex(
|
||||
prim_type, vbo, GPU_indexbuf_build(&elb), GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX);
|
||||
prim_type, vbo, ibo, GPU_BATCH_OWNS_VBO | GPU_BATCH_OWNS_INDEX);
|
||||
}
|
||||
|
||||
static bool curves_ensure_attributes(const Curves &curves,
|
||||
|
|
|
@ -590,6 +590,9 @@ set(GLSL_SRC
|
|||
shaders/gpu_shader_cfg_world_clip_lib.glsl
|
||||
shaders/gpu_shader_colorspace_lib.glsl
|
||||
|
||||
shaders/gpu_shader_index_2d_array_points.glsl
|
||||
shaders/gpu_shader_index_2d_array_lines.glsl
|
||||
shaders/gpu_shader_index_2d_array_tris.glsl
|
||||
|
||||
GPU_shader_shared_utils.h
|
||||
)
|
||||
|
@ -757,6 +760,7 @@ set(SRC_SHADER_CREATE_INFOS
|
|||
shaders/infos/gpu_shader_3D_uniform_color_info.hh
|
||||
shaders/infos/gpu_shader_gpencil_stroke_info.hh
|
||||
shaders/infos/gpu_shader_icon_info.hh
|
||||
shaders/infos/gpu_shader_index_info.hh
|
||||
shaders/infos/gpu_shader_instance_varying_color_varying_size_info.hh
|
||||
shaders/infos/gpu_shader_keyframe_shape_info.hh
|
||||
shaders/infos/gpu_shader_line_dashed_uniform_color_info.hh
|
||||
|
|
|
@ -72,6 +72,10 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *, GPUIndexBuf *);
|
|||
|
||||
void GPU_indexbuf_bind_as_ssbo(GPUIndexBuf *elem, int binding);
|
||||
|
||||
GPUIndexBuf *GPU_indexbuf_build_curves_on_device(GPUPrimType prim_type,
|
||||
uint curves_num,
|
||||
uint verts_per_curve);
|
||||
|
||||
/* Upload data to the GPU (if not built on the device) and bind the buffer to its default target.
|
||||
*/
|
||||
void GPU_indexbuf_use(GPUIndexBuf *elem);
|
||||
|
|
|
@ -79,6 +79,11 @@ typedef enum eGPUBuiltinShader {
|
|||
/** Draw wide lines with uniform color. Has an additional clip plane parameter. */
|
||||
GPU_SHADER_3D_POLYLINE_CLIPPED_UNIFORM_COLOR,
|
||||
|
||||
/** Compute shaders to generate 2d index buffers (mainly for curve drawing). */
|
||||
GPU_SHADER_INDEXBUF_POINTS,
|
||||
GPU_SHADER_INDEXBUF_LINES,
|
||||
GPU_SHADER_INDEXBUF_TRIS,
|
||||
|
||||
/**
|
||||
* ----------------------- Shaders exposed through pyGPU module -----------------------
|
||||
*
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
#include "gpu_index_buffer_private.hh"
|
||||
|
||||
#include "GPU_capabilities.h"
|
||||
#include "GPU_compute.h"
|
||||
#include "GPU_platform.h"
|
||||
|
||||
#include <algorithm> /* For `min/max`. */
|
||||
|
@ -241,6 +243,54 @@ void GPU_indexbuf_set_tri_restart(GPUIndexBufBuilder *builder, uint elem)
|
|||
builder->uses_restart_indices = true;
|
||||
}
|
||||
|
||||
GPUIndexBuf *GPU_indexbuf_build_curves_on_device(GPUPrimType prim_type,
|
||||
uint curves_num,
|
||||
uint verts_per_curve)
|
||||
{
|
||||
uint64_t dispatch_x_dim = verts_per_curve;
|
||||
if (prim_type == GPU_PRIM_LINE_STRIP || prim_type == GPU_PRIM_TRI_STRIP) {
|
||||
dispatch_x_dim += 1;
|
||||
}
|
||||
uint64_t grid_x, grid_y, grid_z;
|
||||
uint64_t max_grid_x = GPU_max_work_group_count(0), max_grid_y = GPU_max_work_group_count(1),
|
||||
max_grid_z = GPU_max_work_group_count(2);
|
||||
grid_x = min_uu(max_grid_x, (dispatch_x_dim + 15) / 16);
|
||||
grid_y = (curves_num + 15) / 16;
|
||||
if (grid_y <= max_grid_y) {
|
||||
grid_z = 1;
|
||||
}
|
||||
else {
|
||||
grid_y = grid_z = uint64_t(ceil(sqrt(double(grid_y))));
|
||||
grid_y = min_uu(grid_y, max_grid_y);
|
||||
grid_z = min_uu(grid_z, max_grid_z);
|
||||
}
|
||||
bool tris = (prim_type == GPU_PRIM_TRIS);
|
||||
bool lines = (prim_type == GPU_PRIM_LINES);
|
||||
GPUShader *shader = GPU_shader_get_builtin_shader(
|
||||
tris ? GPU_SHADER_INDEXBUF_TRIS :
|
||||
(lines ? GPU_SHADER_INDEXBUF_LINES : GPU_SHADER_INDEXBUF_POINTS));
|
||||
GPU_shader_bind(shader);
|
||||
GPUIndexBuf *ibo = GPU_indexbuf_build_on_device(curves_num * dispatch_x_dim);
|
||||
int resolution;
|
||||
if (tris) {
|
||||
resolution = 6;
|
||||
}
|
||||
else if (lines) {
|
||||
resolution = 2;
|
||||
}
|
||||
else {
|
||||
resolution = 1;
|
||||
}
|
||||
GPU_shader_uniform_1i(shader, "elements_per_curve", dispatch_x_dim / resolution);
|
||||
GPU_shader_uniform_1i(shader, "ncurves", curves_num);
|
||||
GPU_indexbuf_bind_as_ssbo(ibo, GPU_shader_get_ssbo_binding(shader, "out_indices"));
|
||||
GPU_compute_dispatch(shader, grid_x, grid_y, grid_z);
|
||||
|
||||
GPU_memory_barrier(GPU_BARRIER_ELEMENT_ARRAY);
|
||||
GPU_shader_unbind();
|
||||
return ibo;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
|
|
@ -87,6 +87,12 @@ static const char *builtin_shader_create_info_name(eGPUBuiltinShader shader)
|
|||
return "gpu_shader_2D_nodelink_inst";
|
||||
case GPU_SHADER_GPENCIL_STROKE:
|
||||
return "gpu_shader_gpencil_stroke";
|
||||
case GPU_SHADER_INDEXBUF_POINTS:
|
||||
return "gpu_shader_index_2d_array_points";
|
||||
case GPU_SHADER_INDEXBUF_LINES:
|
||||
return "gpu_shader_index_2d_array_lines";
|
||||
case GPU_SHADER_INDEXBUF_TRIS:
|
||||
return "gpu_shader_index_2d_array_tris";
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
return "";
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* Constructs a 2D array index buffer with 'ncurves' rows and 'elements_per_curve*2'
|
||||
* columns. Each row contains 'elements_per_curve' pairs of indexes.
|
||||
* e.g., for elements_per_curve=32, first two rows are
|
||||
* 0 1 1 2 2 3 ... 31 32
|
||||
* 33 34 34 35 35 36 .. 64 65
|
||||
* The index buffer can then be used to draw 'ncurves' curves with 'elements_per_curve+1'
|
||||
* vertexes each, using GL_LINES primitives. Intended to be used if GL_LINE_STRIP
|
||||
* primitives can't be used for some reason.
|
||||
*/
|
||||
void main()
|
||||
{
|
||||
ivec3 gid = ivec3(gl_GlobalInvocationID);
|
||||
ivec3 nthreads = ivec3(gl_NumWorkGroups) * ivec3(gl_WorkGroupSize);
|
||||
for (int y = gid.y + gid.z * nthreads.y; y < ncurves; y += nthreads.y * nthreads.z) {
|
||||
for (int x = gid.x; x < elements_per_curve; x += nthreads.x) {
|
||||
int store_index = (x + y * elements_per_curve) * 2;
|
||||
uint t = uint(x + y * (elements_per_curve + 1));
|
||||
out_indices[store_index] = t;
|
||||
out_indices[store_index + 1] = t + 1u;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* Constructs a simple 2D array index buffer, with 'ncurves' rows and 'elements_per_curve'
|
||||
* columns. Each row contains 'elements_per_curve-1' indexes and a restart index.
|
||||
* The index buffer can then be used to draw either 'ncurves' lines with 'elements_per_curve-1'
|
||||
* vertexes each, or 'ncurves' triangle strips with 'elements_per_curve-3' triangles each.
|
||||
*/
|
||||
void main()
|
||||
{
|
||||
ivec3 gid = ivec3(gl_GlobalInvocationID);
|
||||
ivec3 nthreads = ivec3(gl_NumWorkGroups) * ivec3(gl_WorkGroupSize);
|
||||
for (int y = gid.y + gid.z * nthreads.y; y < ncurves; y += nthreads.y * nthreads.z) {
|
||||
for (int x = gid.x; x < elements_per_curve; x += nthreads.x) {
|
||||
int store_index = x + y * elements_per_curve;
|
||||
out_indices[store_index] = (x + 1 < elements_per_curve) ?
|
||||
uint(x + y * (elements_per_curve - 1)) :
|
||||
0xFFFFFFFFu;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* Constructs a 2D array index buffer, with 'ncurves' rows and 'elements_per_curve*6' columns.
|
||||
* The index buffer can be used to draw 'ncurves' triangle strips with 'elements_per_curve*2'
|
||||
* triangles each, using GL_TRIANGLES primitives. Intended to be used if GL_TRIANGLE_STRIP
|
||||
* primitives can't be used for some reason.
|
||||
*/
|
||||
void main()
|
||||
{
|
||||
ivec3 gid = ivec3(gl_GlobalInvocationID);
|
||||
ivec3 nthreads = ivec3(gl_NumWorkGroups) * ivec3(gl_WorkGroupSize);
|
||||
for (int y = gid.y + gid.z * nthreads.y; y < ncurves; y += nthreads.y * nthreads.z)
|
||||
for (int x = gid.x; x < elements_per_curve; x += nthreads.x) {
|
||||
int store_index = (x + y * elements_per_curve) * 6;
|
||||
uint t = x + y * (elements_per_curve * 2 + 2);
|
||||
out_indices[store_index + 0] = t;
|
||||
out_indices[store_index + 1] = t + 1u;
|
||||
out_indices[store_index + 2] = t + 2u;
|
||||
out_indices[store_index + 3] = t + 1u;
|
||||
out_indices[store_index + 4] = t + 3u;
|
||||
out_indices[store_index + 5] = t + 2u;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/* SPDX-FileCopyrightText: 2022 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/** \file
|
||||
* \ingroup gpu
|
||||
*/
|
||||
|
||||
#include "gpu_interface_info.hh"
|
||||
#include "gpu_shader_create_info.hh"
|
||||
|
||||
GPU_SHADER_CREATE_INFO(gpu_shader_index_2d_array_points)
|
||||
.local_group_size(16, 16, 1)
|
||||
.push_constant(Type::INT, "elements_per_curve")
|
||||
.push_constant(Type::INT, "ncurves")
|
||||
.storage_buf(0, Qualifier::WRITE, "uint", "out_indices[]")
|
||||
.compute_source("gpu_shader_index_2d_array_points.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(gpu_shader_index_2d_array_lines)
|
||||
.local_group_size(16, 16, 1)
|
||||
.push_constant(Type::INT, "elements_per_curve")
|
||||
.push_constant(Type::INT, "ncurves")
|
||||
.storage_buf(0, Qualifier::WRITE, "uint", "out_indices[]")
|
||||
.compute_source("gpu_shader_index_2d_array_lines.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(gpu_shader_index_2d_array_tris)
|
||||
.local_group_size(16, 16, 1)
|
||||
.push_constant(Type::INT, "elements_per_curve")
|
||||
.push_constant(Type::INT, "ncurves")
|
||||
.storage_buf(0, Qualifier::WRITE, "uint", "out_indices[]")
|
||||
.compute_source("gpu_shader_index_2d_array_tris.glsl")
|
||||
.do_static_compilation(true);
|
Loading…
Reference in New Issue