3 changed files with 33 additions and 103 deletions
--- a/source/blender/blenkernel/BKE_mesh.hh
+++ b/source/blender/blenkernel/BKE_mesh.hh
@ -73,9 +73,10 @@ void normals_calc_polys(Span<float3> vert_positions,
 * \note Usually #Mesh::vert_normals() is the preferred way to access vertex normals,
 * since they may already be calculated and cached on the mesh.
 */
-void normals_calc_verts(Span<float3> vert_positions,
+void normals_calc_verts(Span<float3> positions,
                        OffsetIndices<int> polys,
                        Span<int> corner_verts,
+                        const VertToPolyMap &vert_to_poly,
                        Span<float3> poly_normals,
                        MutableSpan<float3> vert_normals);

--- a/source/blender/blenkernel/intern/key.cc
+++ b/source/blender/blenkernel/intern/key.cc
@ -2270,6 +2270,7 @@ void BKE_keyblock_mesh_calc_normals(const KeyBlock *kb,
        {reinterpret_cast<const blender::float3 *>(positions), mesh->totvert},
        polys,
        corner_verts,
+        mesh->vert_to_poly_map(),
        {reinterpret_cast<const blender::float3 *>(poly_normals), polys.size()},
        {reinterpret_cast<blender::float3 *>(vert_normals), mesh->totvert});
  }
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@ -42,46 +42,6 @@
 #  include "BLI_timeit.hh"
 #endif

-/* -------------------------------------------------------------------- */
-/** \name Private Utility Functions
- * \{ */
-
-/**
- * A thread-safe version of #add_v3_v3 that uses a spin-lock.
- *
- * \note Avoid using this when the chance of contention is high.
- */
-static void add_v3_v3_atomic(float r[3], const float a[3])
-{
-#define FLT_EQ_NONAN(_fa, _fb) (*((const uint32_t *)&_fa) == *((const uint32_t *)&_fb))
-
-  float virtual_lock = r[0];
-  while (true) {
-    /* This loops until following conditions are met:
-     * - `r[0]` has same value as virtual_lock (i.e. it did not change since last try).
-     * - `r[0]` was not `FLT_MAX`, i.e. it was not locked by another thread. */
-    const float test_lock = atomic_cas_float(&r[0], virtual_lock, FLT_MAX);
-    if (_ATOMIC_LIKELY(FLT_EQ_NONAN(test_lock, virtual_lock) && (test_lock != FLT_MAX))) {
-      break;
-    }
-    virtual_lock = test_lock;
-  }
-  virtual_lock += a[0];
-  r[1] += a[1];
-  r[2] += a[2];
-
-  /* Second atomic operation to 'release'
-   * our lock on that vector and set its first scalar value. */
-  /* Note that we do not need to loop here, since we 'locked' `r[0]`,
-   * nobody should have changed it in the mean time. */
-  virtual_lock = atomic_cas_float(&r[0], FLT_MAX, virtual_lock);
-  BLI_assert(virtual_lock == FLT_MAX);
-
-#undef FLT_EQ_NONAN
-}
-
-/** \} */
-
 /* -------------------------------------------------------------------- */
 /** \name Public Utility Functions
 *
@ -238,73 +198,33 @@ void normals_calc_polys(const Span<float3> positions,
 void normals_calc_verts(const Span<float3> positions,
                        const OffsetIndices<int> polys,
                        const Span<int> corner_verts,
+                        const VertToPolyMap &vert_to_poly,
                        const Span<float3> poly_normals,
                        MutableSpan<float3> vert_normals)
 {
-
-  /* Zero the vertex normal array for accumulation. */
-  {
-    memset(vert_normals.data(), 0, vert_normals.as_span().size_in_bytes());
-  }
-
-  /* Accumulate the normals of the faces surrounding each vertex. */
-  {
-    threading::parallel_for(polys.index_range(), 1024, [&](const IndexRange range) {
-      for (const int poly_i : range) {
-        const Span<int> poly_verts = corner_verts.slice(polys[poly_i]);
-
-        const float3 &pnor = poly_normals[poly_i];
-
-        const int i_end = poly_verts.size() - 1;
-
-        /* Accumulate angle weighted face normal into the vertex normal. */
-        /* Inline version of #accumulate_vertex_normals_poly_v3. */
-        {
-          float edvec_prev[3], edvec_next[3], edvec_end[3];
-          const float *v_curr = positions[poly_verts[i_end]];
-          sub_v3_v3v3(edvec_prev, positions[poly_verts[i_end - 1]], v_curr);
-          normalize_v3(edvec_prev);
-          copy_v3_v3(edvec_end, edvec_prev);
-
-          for (int i_next = 0, i_curr = i_end; i_next <= i_end; i_curr = i_next++) {
-            const float *v_next = positions[poly_verts[i_next]];
-
-            /* Skip an extra normalization by reusing the first calculated edge. */
-            if (i_next != i_end) {
-              sub_v3_v3v3(edvec_next, v_curr, v_next);
-              normalize_v3(edvec_next);
-            }
-            else {
-              copy_v3_v3(edvec_next, edvec_end);
-            }
-
-            /* Calculate angle between the two poly edges incident on this vertex. */
-            const float fac = saacos(-dot_v3v3(edvec_prev, edvec_next));
-            const float vnor_add[3] = {pnor[0] * fac, pnor[1] * fac, pnor[2] * fac};
-
-            float *vnor = vert_normals[poly_verts[i_curr]];
-            add_v3_v3_atomic(vnor, vnor_add);
-            v_curr = v_next;
-            copy_v3_v3(edvec_prev, edvec_next);
-          }
-        }
+  threading::parallel_for(positions.index_range(), 1024, [=](const IndexRange range) {
+    for (const int vert : range) {
+      const Span<int> polys_around_vert = vert_to_poly[vert];
+      if (polys_around_vert.is_empty()) {
+        vert_normals[vert] = math::normalize(positions[vert]);
+        continue;
      }
-    });
-  }

-  /* Normalize and validate computed vertex normals. */
-  {
-    threading::parallel_for(positions.index_range(), 1024, [&](const IndexRange range) {
-      for (const int vert_i : range) {
-        float *no = vert_normals[vert_i];
+      const float factor_inv = 1.0f / polys_around_vert.size();
+      float3 vert_normal(0);
+      for (const int poly_index : polys_around_vert) {
+        const IndexRange poly = polys[poly_index];
+        const int2 adjacent_verts = poly_find_adjecent_verts(poly, corner_verts, vert);

-        if (UNLIKELY(normalize_v3(no) == 0.0f)) {
-          /* Following Mesh convention; we use vertex coordinate itself for normal in this case. */
-          normalize_v3_v3(no, positions[vert_i]);
-        }
+        const float3 dir_prev = math::normalize(positions[adjacent_verts[0]] - positions[vert]);
+        const float3 dir_next = math::normalize(positions[adjacent_verts[1]] - positions[vert]);
+        const float factor = saacos(math::dot(dir_prev, dir_next));
+
+        vert_normal += poly_normals[poly_index] * factor * factor_inv;
      }
-    });
-  }
+      vert_normals[vert] = vert_normal;
+    }
+  });
 }

 /** \} */
@ -336,9 +256,17 @@ blender::Span<blender::float3> Mesh::vert_normals() const
    const Span<float3> positions = this->vert_positions();
    const OffsetIndices polys = this->polys();
    const Span<int> corner_verts = this->corner_verts();
-    const Span<float3> poly_normals = this->poly_normals();
+
+    bke::mesh::VertToPolyMap vert_to_poly;
+    Span<float3> poly_normals;
+    threading::parallel_invoke(
+        this->totvert > 1024,
+        [&]() { vert_to_poly = this->vert_to_poly_map(); },
+        [&]() { poly_normals = this->poly_normals(); });
+
    r_data.reinitialize(positions.size());
-    bke::mesh::normals_calc_verts(positions, polys, corner_verts, poly_normals, r_data);
+    bke::mesh::normals_calc_verts(
+        positions, polys, corner_verts, vert_to_poly, poly_normals, r_data);
  });
  return this->runtime->vert_normals_cache.data();
 }