Sculpt: Fix #107123: New normals impl. for PBVH_FACES #107456

Closed
Joseph Eagar wants to merge 5 commits from temp-pbvh-normals into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
3 changed files with 123 additions and 128 deletions

View File

@ -1660,7 +1660,7 @@ static void sculpt_update_persistent_base(Object *ob)
}
static void sculpt_update_object(
Depsgraph *depsgraph, Object *ob, Object *ob_eval, bool need_pmap, bool is_paint_tool)
Depsgraph *depsgraph, Object *ob, Object *ob_eval, bool /* need_pmap */, bool is_paint_tool)
{
Scene *scene = DEG_get_input_scene(depsgraph);
Sculpt *sd = scene->toolsettings->sculpt;
@ -1766,13 +1766,13 @@ static void sculpt_update_object(
sculpt_attribute_update_refs(ob);
sculpt_update_persistent_base(ob);
if (need_pmap && ob->type == OB_MESH && !ss->pmap) {
if (ob->type == OB_MESH && !ss->pmap) {
BKE_mesh_vert_poly_map_create(
&ss->pmap, &ss->pmap_mem, me->polys(), me->corner_verts().data(), me->totvert);
}
if (ss->pbvh) {
BKE_pbvh_pmap_set(ss->pbvh, ss->pmap);
}
if (ss->pbvh) {
BKE_pbvh_pmap_set(ss->pbvh, ss->pmap);
}
if (ss->deform_modifiers_active) {
@ -1942,14 +1942,17 @@ void BKE_sculpt_color_layer_create_if_needed(Object *object)
}
}
void BKE_sculpt_update_object_for_edit(
Depsgraph *depsgraph, Object *ob_orig, bool need_pmap, bool /*need_mask*/, bool is_paint_tool)
void BKE_sculpt_update_object_for_edit(Depsgraph *depsgraph,
Object *ob_orig,
bool /* need_pmap */,
bool /*need_mask*/,
bool is_paint_tool)
{
BLI_assert(ob_orig == DEG_get_original_object(ob_orig));
Object *ob_eval = DEG_get_evaluated_object(depsgraph, ob_orig);
sculpt_update_object(depsgraph, ob_orig, ob_eval, need_pmap, is_paint_tool);
sculpt_update_object(depsgraph, ob_orig, ob_eval, true, is_paint_tool);
}
int *BKE_sculpt_face_sets_ensure(Mesh *mesh)

View File

@ -13,8 +13,10 @@
#include "BLI_math.h"
#include "BLI_rand.h"
#include "BLI_task.h"
#include "BLI_task.hh"
#include "BLI_utildefines.h"
#include "BLI_vector.hh"
#include "BLI_array.hh"
#include "DNA_mesh_types.h"
#include "DNA_meshdata_types.h"
@ -38,9 +40,11 @@
#include "pbvh_intern.hh"
using blender::float3;
using blender::IndexRange;
using blender::MutableSpan;
using blender::Span;
using blender::Vector;
using blender::Array;
#define LEAF_LIMIT 10000
@ -858,7 +862,10 @@ void BKE_pbvh_update_mesh_pointers(PBVH *pbvh, Mesh *mesh)
/* Make sure cached normals start out calculated. */
mesh->vert_normals();
mesh->poly_normals();
pbvh->vert_normals = BKE_mesh_vert_normals_for_write(mesh);
pbvh->poly_normals = mesh->runtime->poly_normals;
pbvh->vdata = &mesh->vdata;
pbvh->ldata = &mesh->ldata;
@ -1373,131 +1380,113 @@ struct PBVHUpdateData {
PBVHUpdateData(PBVH *pbvh_, Span<PBVHNode *> nodes_) : pbvh(pbvh_), nodes(nodes_) {}
};

const Span<PBVHNode *> nodes

`const Span<PBVHNode *> nodes`

Shouldn't that be Span<const PBVHNode*>?

Shouldn't that be `Span<const PBVHNode*>`?

The container itself is constant, since you don't change it by value (for example, swap between 2 spans). Its nodes are not contants, so how can you get a Span<const Node> from a Vector<Node>? (and you change nodes in code as i can see, not sure why isn't mutable span now)

The container itself is constant, since you don't change it by value (for example, swap between 2 spans). Its nodes are not contants, so how can you get a `Span<const Node>` from a `Vector<Node>`? (and you change nodes in code as i can see, not sure why isn't mutable span now)
static void pbvh_update_normals_clear_task_cb(void *__restrict userdata,
const int n,
const TaskParallelTLS *__restrict /*tls*/)
{
PBVHUpdateData *data = static_cast<PBVHUpdateData *>(userdata);
PBVH *pbvh = data->pbvh;
PBVHNode *node = data->nodes[n];
float(*vert_normals)[3] = data->vert_normals;
if (node->flag & PBVH_UpdateNormals) {
const int *verts = node->vert_indices;
const int totvert = node->uniq_verts;
for (int i = 0; i < totvert; i++) {
const int v = verts[i];
if (pbvh->vert_bitmap[v]) {
zero_v3(vert_normals[v]);
}
}
}
}
static void pbvh_update_normals_accum_task_cb(void *__restrict userdata,
const int n,
const TaskParallelTLS *__restrict /*tls*/)
{
PBVHUpdateData *data = static_cast<PBVHUpdateData *>(userdata);
PBVH *pbvh = data->pbvh;
PBVHNode *node = data->nodes[n];
float(*vert_normals)[3] = data->vert_normals;
if (node->flag & PBVH_UpdateNormals) {
uint mpoly_prev = UINT_MAX;
blender::float3 fn;
const int *faces = node->prim_indices;
const int totface = node->totprim;
for (int i = 0; i < totface; i++) {
const MLoopTri *lt = &pbvh->looptri[faces[i]];
const int vtri[3] = {
pbvh->corner_verts[lt->tri[0]],
pbvh->corner_verts[lt->tri[1]],
pbvh->corner_verts[lt->tri[2]],
};
const int sides = 3;
/* Face normal and mask */
if (lt->poly != mpoly_prev) {
const blender::IndexRange poly = pbvh->polys[lt->poly];
fn = blender::bke::mesh::poly_normal_calc(
{reinterpret_cast<const blender::float3 *>(pbvh->vert_positions), pbvh->totvert},
{&pbvh->corner_verts[poly.start()], poly.size()});
mpoly_prev = lt->poly;
}
for (int j = sides; j--;) {
const int v = vtri[j];
if (pbvh->vert_bitmap[v]) {
/* NOTE: This avoids `lock, add_v3_v3, unlock`
* and is five to ten times quicker than a spin-lock.
* Not exact equivalent though, since atomicity is only ensured for one component
* of the vector at a time, but here it shall not make any sensible difference. */
for (int k = 3; k--;) {
atomic_add_and_fetch_fl(&vert_normals[v][k], fn[k]);
}
}
}
}
}
}
static void pbvh_update_normals_store_task_cb(void *__restrict userdata,
const int n,
const TaskParallelTLS *__restrict /*tls*/)
{
PBVHUpdateData *data = static_cast<PBVHUpdateData *>(userdata);
PBVH *pbvh = data->pbvh;
PBVHNode *node = data->nodes[n];
float(*vert_normals)[3] = data->vert_normals;
if (node->flag & PBVH_UpdateNormals) {
const int *verts = node->vert_indices;
const int totvert = node->uniq_verts;
for (int i = 0; i < totvert; i++) {
const int v = verts[i];
/* No atomics necessary because we are iterating over uniq_verts only,
* so we know only this thread will handle this vertex. */
if (pbvh->vert_bitmap[v]) {
normalize_v3(vert_normals[v]);
pbvh->vert_bitmap[v] = false;
}
}
node->flag &= ~PBVH_UpdateNormals;
}
}
static void pbvh_faces_update_normals(PBVH *pbvh, Span<PBVHNode *> nodes)
{
/* subtle assumptions:
* - We know that for all edited vertices, the nodes with faces
* adjacent to these vertices have been marked with PBVH_UpdateNormals.
* This is true because if the vertex is inside the brush radius, the
* bounding box of its adjacent faces will be as well.
* - However this is only true for the vertices that have actually been
* edited, not for all vertices in the nodes marked for update, so we
* can only update vertices marked in the `vert_bitmap`.
*/
using namespace blender::threading;
PBVHUpdateData data(pbvh, nodes);
data.pbvh = pbvh;
data.nodes = nodes;
data.vert_normals = pbvh->vert_normals;
const int *corner_verts = pbvh->corner_verts;
Array<Vector<int>> task_update_verts(nodes.size());
Array<Vector<int>> task_update_tris(nodes.size());
Vector<Vector<int>> task_update_verts;
Vector<Vector<int>> task_update_tris;

task_update_verts.resize(nodes.size());
task_update_tris.resize(nodes.size());

->

Array<Vector<int>> task_update_verts(nodes.size());
Array<Vector<int>> task_update_tris(nodes.size());

```Cpp Vector<Vector<int>> task_update_verts; Vector<Vector<int>> task_update_tris; task_update_verts.resize(nodes.size()); task_update_tris.resize(nodes.size()); ``` -> ```Cpp Array<Vector<int>> task_update_verts(nodes.size()); Array<Vector<int>> task_update_tris(nodes.size()); ```
TaskParallelSettings settings;
BKE_pbvh_parallel_range_settings(&settings, true, nodes.size());
parallel_for(nodes.index_range(), 1, [&](IndexRange range) {
for (int n : range) {
PBVHNode *node = nodes[n];
/* Zero normals before accumulation. */
BLI_task_parallel_range(0, nodes.size(), &data, pbvh_update_normals_clear_task_cb, &settings);
BLI_task_parallel_range(0, nodes.size(), &data, pbvh_update_normals_accum_task_cb, &settings);
BLI_task_parallel_range(0, nodes.size(), &data, pbvh_update_normals_store_task_cb, &settings);
for (int i : IndexRange(node->totprim)) {
const MLoopTri &tri = pbvh->looptri[node->prim_indices[i]];
int v1 = corner_verts[tri.tri[0]];
int v2 = corner_verts[tri.tri[1]];
int v3 = corner_verts[tri.tri[2]];
if (!pbvh->vert_bitmap[v1] && !pbvh->vert_bitmap[v2] && !pbvh->vert_bitmap[v3]) {
continue;
}
task_update_verts[n].append(v1);
task_update_verts[n].append(v2);
task_update_verts[n].append(v3);
task_update_tris[n].append(node->prim_indices[i]);
}
}
});
/* Don't bother de-duplicating. */
float(*vert_positions)[3] = pbvh->vert_positions;
Vector<int> verts, tris; /* Note: these two vectors can contain duplicates. */

If each vector in task_update_verts contain at least one element.

verts.ensure(task_update_verts.size());

Same for tris

If each vector in `task_update_verts` contain at least one element. ```Cpp verts.ensure(task_update_verts.size()); ``` Same for `tris`
Review

That assumption doesn't really hold, and I don't think using the nodes size as a heuristic really works. You'd have to multiply it by some factor (a percentage of the leaf limit most likely) which would have to be tested across a range of conditions.

That assumption doesn't really hold, and I don't think using the nodes size as a heuristic really works. You'd have to multiply it by some factor (a percentage of the leaf limit most likely) which would have to be tested across a range of conditions.

This heuristic provides some improvement, but yes, without a real calculation, you can't allocate all the memory. But if it's 100 tasks with 3 verts, you'll be allocating 201, not 300 times.
But yes, this is not some real implementation requirement, but just a weak suggestion, you can close it if you don't think it's really useful

This heuristic provides some improvement, but yes, without a real calculation, you can't allocate all the memory. But if it's 100 tasks with 3 verts, you'll be allocating 201, not 300 times. But yes, this is not some real implementation requirement, but just a weak suggestion, you can close it if you don't think it's really useful
for (const Span<int> update_verts : task_update_verts) {
for (int vert : update_verts) {
for (const Span<int> verts : task_update_verts)

Same below.

```Cpp for (const Span<int> verts : task_update_verts) ``` Same below.
verts.append(vert);
}
}
for (const Span<int> update_tris : task_update_tris) {
for (int tri : update_tris) {
tris.append(tri);
}
}
/* Zero poly normals */
parallel_for(tris.index_range(), 64, [&](IndexRange range) {
for (int tri_i : range) {
const MLoopTri &tri = pbvh->looptri[tris[tri_i]];
zero_v3(pbvh->poly_normals[tri.poly]);
}
});
parallel_for(tris.index_range(), 64, [&](IndexRange range) {
for (int tri_i : range) {
const MLoopTri &tri = pbvh->looptri[tris[tri_i]];
float *co1 = vert_positions[corner_verts[tri.tri[0]]];
float *co2 = vert_positions[corner_verts[tri.tri[1]]];
float *co3 = vert_positions[corner_verts[tri.tri[2]]];
float3 no;
normal_tri_v3(no, co1, co2, co3);
for (int i : IndexRange(3)) {
atomic_add_and_fetch_fl(&pbvh->poly_normals[tri.poly][i], no[i]);
}
}
});
for (int tri_i : tris) {
const MLoopTri &tri = pbvh->looptri[tri_i];
normalize_v3(pbvh->poly_normals[tri.poly]);
}
parallel_for(verts.index_range(), 64, [&](IndexRange range) {
for (int vert_i : range) {
int vert = verts[vert_i];
float no[3] = {0.0f, 0.0f, 0.0f};
const MeshElemMap &map = pbvh->pmap[vert];
for (int i : IndexRange(map.count)) {
add_v3_v3(no, pbvh->poly_normals[map.indices[i]]);
}
normalize_v3(no);
for (int i : IndexRange(3)) {
int32_t int_val;
*reinterpret_cast<float *>(&int_val) = no[i];
atomic_store_int32(reinterpret_cast<int32_t *>(&pbvh->vert_normals[vert][i]), int_val);
}
}
});
for (int vert : verts) {
pbvh->vert_bitmap[vert] = false;
}
for (PBVHNode *node : nodes) {
node->flag &= ~PBVH_UpdateNormals;
}
}
static void pbvh_update_mask_redraw_task_cb(void *__restrict userdata,

View File

@ -3,6 +3,8 @@
#pragma once
#include "BLI_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_span.hh"
/** \file
* \ingroup bke
@ -155,6 +157,7 @@ struct PBVH {
/* NOTE: Normals are not `const` because they can be updated for drawing by sculpt code. */
float (*vert_normals)[3];
blender::MutableSpan<blender::float3> poly_normals;
bool *hide_vert;
float (*vert_positions)[3];
blender::OffsetIndices<int> polys;