From 4bdd1778b9bca07b3da4de07c467e2bfae11485e Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Wed, 15 Mar 2023 09:05:32 -0400 Subject: [PATCH 1/4] Mesh: Parallelize extraction of UV maps Since UVs are now stored as 2D vectors in meshes, they can be copied directly to the vertex buffers. Somewhat surprisingly, multithreading the copying into the vertex buffer provides a good speedup on a CPU with many cores at least. Here is a test uploading two UV maps created in geometry nodes with a 1 million quad mesh, with a Ryzen 7950x: | | Before | After | Speedup | | Average | 24.3 ms | 7.5 ms | 3.2x | | Min | 17.6 ms | 7.0 ms | 2.5x | I added the copying utilities to the array utils header, since the need for them has come up in a few different places already, and the existing function with a selection argument didn't make sense here. --- source/blender/blenlib/BLI_array_utils.hh | 18 +++++++++++++ source/blender/blenlib/intern/array_utils.cc | 9 +++++++ .../mesh_extractors/extract_mesh_vbo_uv.cc | 25 +++++++++++-------- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/source/blender/blenlib/BLI_array_utils.hh b/source/blender/blenlib/BLI_array_utils.hh index 81ffa7eade8..21013340cea 100644 --- a/source/blender/blenlib/BLI_array_utils.hh +++ b/source/blender/blenlib/BLI_array_utils.hh @@ -10,6 +10,24 @@ namespace blender::array_utils { +/** + * Fill the destination span by copying all values from the `src` array. Threaded based on + * grain-size. + */ +void copy(const GVArray &src, GMutableSpan dst, int64_t grain_size = 4096); + +/** + * Fill the destination span by copying all values from the `src` array. Threaded based on + * grain-size. + */ +template +inline void copy(const Span src, MutableSpan dst, const int64_t grain_size = 4096) +{ + BLI_assert(src.size() == dst.size()); + threading::parallel_for(src.index_range(), grain_size, [src, dst](const IndexRange range) { + dst.slice(range).copy_from(src.slice(range)); + }); +} /** * Fill the destination span by copying masked values from the `src` array. Threaded based on * grain-size. diff --git a/source/blender/blenlib/intern/array_utils.cc b/source/blender/blenlib/intern/array_utils.cc index 1e1ef354461..4abf9ce5e34 100644 --- a/source/blender/blenlib/intern/array_utils.cc +++ b/source/blender/blenlib/intern/array_utils.cc @@ -4,6 +4,15 @@ namespace blender::array_utils { +void copy(const GVArray &src, GMutableSpan dst, const int64_t grain_size) +{ + BLI_assert(src.type() == dst.type()); + BLI_assert(src.size() == dst.size()); + threading::parallel_for(src.index_range(), grain_size, [&](const IndexRange range) { + src.materialize_to_uninitialized(range, dst.data()); + }); +} + void copy(const GVArray &src, const IndexMask selection, GMutableSpan dst, diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc index b92d367ac0e..91febf9d2e0 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc @@ -5,8 +5,8 @@ * \ingroup draw */ +#include "BLI_array_utils.hh" #include "BLI_math_vector_types.hh" -#include "BLI_string.h" #include "draw_subdivision.h" #include "extract_mesh.hh" @@ -77,6 +77,7 @@ static void extract_uv_init(const MeshRenderData *mr, void *buf, void * /*tls_data*/) { + SCOPED_TIMER_AVERAGED(__func__); GPUVertBuf *vbo = static_cast(buf); GPUVertFormat format = {0}; @@ -91,8 +92,9 @@ static void extract_uv_init(const MeshRenderData *mr, GPU_vertbuf_init_with_format(vbo, &format); GPU_vertbuf_data_alloc(vbo, v_len); - float2 *uv_data = static_cast(GPU_vertbuf_get_data(vbo)); - for (int i = 0; i < MAX_MTFACE; i++) { + MutableSpan uv_data(static_cast(GPU_vertbuf_get_data(vbo)), v_len); + int vbo_index = 0; + for (const int i : IndexRange(MAX_MTFACE)) { if (uv_layers & (1 << i)) { if (mr->extract_type == MR_EXTRACT_BMESH) { int cd_ofs = CustomData_get_n_offset(cd_ldata, CD_PROP_FLOAT2, i); @@ -102,17 +104,20 @@ static void extract_uv_init(const MeshRenderData *mr, BMLoop *l_iter, *l_first; l_iter = l_first = BM_FACE_FIRST_LOOP(efa); do { - float *luv = BM_ELEM_CD_GET_FLOAT_P(l_iter, cd_ofs); - memcpy(uv_data, luv, sizeof(*uv_data)); - uv_data++; + uv_data[vbo_index] = BM_ELEM_CD_GET_FLOAT_P(l_iter, cd_ofs); + vbo_index++; } while ((l_iter = l_iter->next) != l_first); } } else { - const float2 *layer_data = static_cast( - CustomData_get_layer_n(cd_ldata, CD_PROP_FLOAT2, i)); - for (int ml_index = 0; ml_index < mr->loop_len; ml_index++, uv_data++, layer_data++) { - memcpy(uv_data, layer_data, sizeof(*uv_data)); + for (const int i : IndexRange(MAX_MTFACE)) { + if (uv_layers & (1 << i)) { + const Span uv_map( + static_cast(CustomData_get_layer_n(cd_ldata, CD_PROP_FLOAT2, i)), + mr->loop_len); + array_utils::copy(uv_map, uv_data.slice(vbo_index, mr->loop_len)); + vbo_index += mr->loop_len; + } } } } -- 2.30.2 From cd5cd004ae61d2895905023066aa868cad385eb1 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Fri, 17 Mar 2023 21:08:20 -0400 Subject: [PATCH 2/4] Capture spans by reference --- source/blender/blenlib/BLI_array_utils.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/blenlib/BLI_array_utils.hh b/source/blender/blenlib/BLI_array_utils.hh index 21013340cea..46d104ab59c 100644 --- a/source/blender/blenlib/BLI_array_utils.hh +++ b/source/blender/blenlib/BLI_array_utils.hh @@ -24,7 +24,7 @@ template inline void copy(const Span src, MutableSpan dst, const int64_t grain_size = 4096) { BLI_assert(src.size() == dst.size()); - threading::parallel_for(src.index_range(), grain_size, [src, dst](const IndexRange range) { + threading::parallel_for(src.index_range(), grain_size, [&](const IndexRange range) { dst.slice(range).copy_from(src.slice(range)); }); } -- 2.30.2 From bc4f12903c99bcdaae30e3793b1be5f4774f371a Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Fri, 17 Mar 2023 21:09:24 -0400 Subject: [PATCH 3/4] Remove nested timer mistake --- .../intern/mesh_extractors/extract_mesh_vbo_uv.cc | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc index 91febf9d2e0..a6ae8ecc867 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc @@ -110,15 +110,11 @@ static void extract_uv_init(const MeshRenderData *mr, } } else { - for (const int i : IndexRange(MAX_MTFACE)) { - if (uv_layers & (1 << i)) { - const Span uv_map( - static_cast(CustomData_get_layer_n(cd_ldata, CD_PROP_FLOAT2, i)), - mr->loop_len); - array_utils::copy(uv_map, uv_data.slice(vbo_index, mr->loop_len)); - vbo_index += mr->loop_len; - } - } + const Span uv_map( + static_cast(CustomData_get_layer_n(cd_ldata, CD_PROP_FLOAT2, i)), + mr->loop_len); + array_utils::copy(uv_map, uv_data.slice(vbo_index, mr->loop_len)); + vbo_index += mr->loop_len; } } } -- 2.30.2 From 2b9822055fc6a10798f3d455d9e933b53fe521f9 Mon Sep 17 00:00:00 2001 From: Hans Goudey Date: Fri, 17 Mar 2023 21:09:29 -0400 Subject: [PATCH 4/4] Remove timer --- .../blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc index a6ae8ecc867..c6d5a5fc697 100644 --- a/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc +++ b/source/blender/draw/intern/mesh_extractors/extract_mesh_vbo_uv.cc @@ -77,7 +77,6 @@ static void extract_uv_init(const MeshRenderData *mr, void *buf, void * /*tls_data*/) { - SCOPED_TIMER_AVERAGED(__func__); GPUVertBuf *vbo = static_cast(buf); GPUVertFormat format = {0}; -- 2.30.2