2023-05-24 18:11:47 +02:00 · 2023-05-23 21:11:37 +02:00 · 2023-05-24 10:54:31 +02:00
7 changed files with 80 additions and 89 deletions
--- a/source/blender/blenkernel/intern/mesh_sample.cc
+++ b/source/blender/blenkernel/intern/mesh_sample.cc
@ -67,7 +67,7 @@ BLI_NOINLINE static void sample_corner_attribute(const Span<MLoopTri> looptris,
    if constexpr (check_indices) {
      if (looptri_indices[i] == -1) {
        dst[i] = {};
-        continue;
+        return;
      }
    }
    const MLoopTri &tri = looptris[looptri_indices[i]];
@ -144,17 +144,17 @@ static void sample_barycentric_weights(const Span<float3> vert_positions,
                                       const IndexMask &mask,
                                       MutableSpan<float3> bary_coords)
 {
-  for (const int i : mask) {
+  mask.foreach_index([&](const int i) {
    if constexpr (check_indices) {
      if (looptri_indices[i] == -1) {
        bary_coords[i] = {};
-        continue;
+        return;
      }
    }
    const MLoopTri &tri = looptris[looptri_indices[i]];
    bary_coords[i] = compute_bary_coord_in_triangle(
        vert_positions, corner_verts, tri, sample_positions[i]);
-  }
+  });
 }

 template<bool check_indices = false>
@ -170,7 +170,7 @@ static void sample_nearest_weights(const Span<float3> vert_positions,
    if constexpr (check_indices) {
      if (looptri_indices[i] == -1) {
        bary_coords[i] = {};
-        continue;
+        return;
      }
    }
    const MLoopTri &tri = looptris[looptri_indices[i]];
--- a/source/blender/blenlib/BLI_index_mask.hh
+++ b/source/blender/blenlib/BLI_index_mask.hh
@ -6,6 +6,7 @@
 #include <optional>
 #include <variant>

+#include "BLI_array.hh"
 #include "BLI_bit_vector.hh"
 #include "BLI_function_ref.hh"
 #include "BLI_index_range.hh"
@ -201,6 +202,11 @@ class IndexMask {
                                  GrainSize grain_size,
                                  IndexMaskMemory &memory,
                                  Fn &&predicate);
+  template<typename T, typename Fn>
+  static void from_groups(const IndexMask &universe,
+                          IndexMaskMemory &memory,
+                          Fn &&get_group_index,
+                          MutableSpan<IndexMask> r_masks);

  template<typename T> void to_indices(MutableSpan<T> r_indices) const;
  void to_bits(MutableBitSpan r_bits, int64_t offset = 0) const;
@ -696,7 +702,8 @@ template<typename Fn> inline void ChunkSlice::foreach_span(Fn &&fn) const
      fn(indices);
    }
    for (int64_t segment_i = this->begin_it.segment_i + 1; segment_i < this->end_it.segment_i;
-         segment_i++) {
+         segment_i++)
+    {
      const int64_t begin_i = 0;
      const int64_t end_i = this->chunk->segment_size(segment_i);
      const Span<int16_t> indices{this->chunk->indices_by_segment[segment_i] + begin_i,
@ -773,8 +780,8 @@ template<typename Fn> inline void IndexMask::foreach_span_template(Fn &&fn) cons
            chunk.indices_by_segment[segment_i] + begin_it.index_in_segment, segment_size};
        fn(chunk_id, indices);
      }
-      for (int64_t segment_i = begin_it.segment_i + 1; segment_i < chunk.segments_num;
-           segment_i++) {
+      for (int64_t segment_i = begin_it.segment_i + 1; segment_i < chunk.segments_num; segment_i++)
+      {
        const int64_t segment_size = chunk.cumulative_segment_sizes[segment_i + 1] -
                                     chunk.cumulative_segment_sizes[segment_i];
        const Span<int16_t> indices{chunk.indices_by_segment[segment_i], segment_size};
@ -1016,6 +1023,22 @@ inline IndexMask IndexMask::from_predicate(const IndexMask &universe,
  return IndexMask::from_indices<int64_t>(indices, memory);
 }

+template<typename T, typename Fn>
+void IndexMask::from_groups(const IndexMask &universe,
+                            IndexMaskMemory &memory,
+                            Fn &&get_group_index,
+                            MutableSpan<IndexMask> r_masks)
+{
+  Array<Vector<T>> indices_by_group(r_masks.size());
+  universe.foreach_index([&](const int64_t i) {
+    const int group_index = get_group_index(i);
+    indices_by_group[group_index].append(T(i));
+  });
+  for (const int64_t i : r_masks.index_range()) {
+    r_masks[i] = IndexMask::from_indices<T>(indices_by_group[i], memory);
+  }
+}
+
 std::optional<IndexRange> inline IndexMask::to_range() const
 {
  if (data_.indices_num == 0) {
--- a/source/blender/functions/tests/FN_multi_function_test_common.hh
+++ b/source/blender/functions/tests/FN_multi_function_test_common.hh
@ -23,9 +23,7 @@ class AddPrefixFunction : public MultiFunction {
    const VArray<std::string> &prefixes = params.readonly_single_input<std::string>(0, "Prefix");
    MutableSpan<std::string> strings = params.single_mutable<std::string>(1, "Strings");

-    for (int64_t i : mask) {
-      strings[i] = prefixes[i] + strings[i];
-    }
+    mask.foreach_index([&](const int64_t i) { strings[i] = prefixes[i] + strings[i]; });
  }
 };

@ -48,12 +46,12 @@ class CreateRangeFunction : public MultiFunction {
    const VArray<int> &sizes = params.readonly_single_input<int>(0, "Size");
    GVectorArray &ranges = params.vector_output(1, "Range");

-    for (int64_t i : mask) {
+    mask.foreach_index([&](const int64_t i) {
      int size = sizes[i];
      for (int j : IndexRange(size)) {
        ranges.append(i, &j);
      }
-    }
+    });
  }
 };

@ -75,12 +73,12 @@ class GenericAppendFunction : public MultiFunction {
    GVectorArray &vectors = params.vector_mutable(0, "Vector");
    const GVArray &values = params.readonly_single_input(1, "Value");

-    for (int64_t i : mask) {
+    mask.foreach_index([&](const int64_t i) {
      BUFFER_FOR_CPP_TYPE_VALUE(values.type(), buffer);
      values.get(i, buffer);
      vectors.append(i, buffer);
      values.type().destruct(buffer);
-    }
+    });
  }
 };

@ -125,9 +123,7 @@ class AppendFunction : public MultiFunction {
    GVectorArray_TypedMutableRef<int> vectors = params.vector_mutable<int>(0);
    const VArray<int> &values = params.readonly_single_input<int>(1);

-    for (int64_t i : mask) {
-      vectors.append(i, values[i]);
-    }
+    mask.foreach_index([&](const int64_t i) { vectors.append(i, values[i]); });
  }
 };

@ -150,13 +146,13 @@ class SumVectorFunction : public MultiFunction {
    const VVectorArray<int> &vectors = params.readonly_vector_input<int>(0);
    MutableSpan<int> sums = params.uninitialized_single_output<int>(1);

-    for (int64_t i : mask) {
+    mask.foreach_index([&](const int64_t i) {
      int sum = 0;
      for (int j : IndexRange(vectors.get_vector_size(i))) {
        sum += vectors.get_vector_element(i, j);
      }
      sums[i] = sum;
-    }
+    });
  }
 };

@ -181,9 +177,8 @@ class OptionalOutputsFunction : public MultiFunction {
      index_mask::masked_fill(values, 5, mask);
    }
    MutableSpan<std::string> values = params.uninitialized_single_output<std::string>(1, "Out 2");
-    for (const int i : mask) {
-      new (&values[i]) std::string("hello, this is a long string");
-    }
+    mask.foreach_index(
+        [&](const int i) { new (&values[i]) std::string("hello, this is a long string"); });
  }
 };

--- a/source/blender/nodes/geometry/nodes/node_geo_curve_sample.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_curve_sample.cc
@ -398,7 +398,7 @@ class SampleCurveFunction : public mf::MultiFunction {
      Vector<int> invalid_indices;
      MultiValueMap<int, int> indices_per_curve;
      devirtualize_varray(curve_indices, [&](const auto curve_indices) {
-        for (const int i : mask) {
+        mask.foreach_index([&](const int i) {
          const int curve_i = curve_indices[i];
          if (curves.curves_range().contains(curve_i)) {
            indices_per_curve.add(curve_i, i);
@ -406,7 +406,7 @@ class SampleCurveFunction : public mf::MultiFunction {
          else {
            invalid_indices.append(i);
          }
-        }
+        });
      });

      IndexMaskMemory memory;
--- a/source/blender/nodes/geometry/nodes/node_geo_index_of_nearest.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_index_of_nearest.cc
@ -21,9 +21,8 @@ static void node_declare(NodeDeclarationBuilder &b)
 static KDTree_3d *build_kdtree(const Span<float3> positions, const IndexMask &mask)
 {
  KDTree_3d *tree = BLI_kdtree_3d_new(mask.size());
-  for (const int index : mask) {
-    BLI_kdtree_3d_insert(tree, index, positions[index]);
-  }
+  mask.foreach_index(
+      [&](const int index) { BLI_kdtree_3d_insert(tree, index, positions[index]); });
  BLI_kdtree_3d_balance(tree);
  return tree;
 }
@ -41,10 +40,8 @@ static void find_neighbors(const KDTree_3d &tree,
                           const IndexMask &mask,
                           MutableSpan<int> r_indices)
 {
-  threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
-    for (const int index : mask.slice(range)) {
-      r_indices[index] = find_nearest_non_self(tree, positions[index], index);
-    }
+  mask.foreach_index(GrainSize(1024), [&](const int index) {
+    r_indices[index] = find_nearest_non_self(tree, positions[index], index);
  });
 }

@ -87,58 +84,38 @@ class IndexOfNearestFieldInput final : public bke::GeometryFieldInput {
    const VArraySpan<int> group_ids_span(group_ids);

    VectorSet<int> group_indexing;
-    for (const int index : mask) {
+    for (const int index : IndexRange(domain_size)) {
      const int group_id = group_ids_span[index];
      group_indexing.add(group_id);
    }
+    const int groups_num = group_indexing.size();

-    /* Each group ID has two corresponding index masks. One that contains all the points
-     * in each group and one that contains all the points in the group that should be looked up
-     * (the intersection of the points in the group and `mask`). In many cases, both of these
-     * masks are the same or very similar, so there is not enough benefit for a separate mask
-     * for the lookups. */
-    const bool use_separate_lookup_indices = mask.size() < domain_size / 2;
+    IndexMaskMemory mask_memory;
+    Array<IndexMask> all_indices_by_group_id(groups_num);
+    Array<IndexMask> lookup_indices_by_group_id(groups_num);

-    Array<Vector<int64_t>> all_indices_by_group_id(group_indexing.size());
-    Array<Vector<int64_t>> lookup_indices_by_group_id;
-
-    if (use_separate_lookup_indices) {
-      result.reinitialize(mask.min_array_size());
-      lookup_indices_by_group_id.reinitialize(group_indexing.size());
-    }
-    else {
-      result.reinitialize(domain_size);
-    }
-
-    const auto build_group_masks = [&](const IndexMask &mask,
-                                       MutableSpan<Vector<int64_t>> r_groups) {
-      mask.foreach_index([&](const int index) {
-        const int group_id = group_ids_span[index];
-        const int index_of_group = group_indexing.index_of_try(group_id);
-        if (index_of_group != -1) {
-          r_groups[index_of_group].append(index);
-        }
-      });
+    const auto get_group_index = [&](const int i) {
+      const int group_id = group_ids_span[i];
+      return group_indexing.index_of(group_id);
    };

-    threading::parallel_invoke(
-        domain_size > 1024 && use_separate_lookup_indices,
-        [&]() {
-          if (use_separate_lookup_indices) {
-            build_group_masks(mask, lookup_indices_by_group_id);
-          }
-        },
-        [&]() { build_group_masks(IndexMask(domain_size), all_indices_by_group_id); });
+    IndexMask::from_groups<int>(
+        IndexMask(domain_size), mask_memory, get_group_index, all_indices_by_group_id);
+
+    if (mask.size() == domain_size) {
+      lookup_indices_by_group_id = all_indices_by_group_id;
+    }
+    else {
+      IndexMask::from_groups<int>(mask, mask_memory, get_group_index, all_indices_by_group_id);
+    }

    /* The grain size should be larger as each tree gets smaller. */
    const int avg_tree_size = domain_size / group_indexing.size();
    const int grain_size = std::max(8192 / avg_tree_size, 1);
-    threading::parallel_for(group_indexing.index_range(), grain_size, [&](const IndexRange range) {
-      for (const int index : range) {
-        const IndexMask tree_mask = all_indices_by_group_id[index].as_span();
-        const IndexMask lookup_mask = use_separate_lookup_indices ?
-                                          IndexMask(lookup_indices_by_group_id[index]) :
-                                          tree_mask;
+    threading::parallel_for(IndexRange(groups_num), grain_size, [&](const IndexRange range) {
+      for (const int group_index : range) {
+        const IndexMask &tree_mask = all_indices_by_group_id[group_index];
+        const IndexMask &lookup_mask = lookup_indices_by_group_id[group_index];
        KDTree_3d *tree = build_kdtree(positions, tree_mask);
        find_neighbors(*tree, positions, lookup_mask, result);
        BLI_kdtree_3d_free(tree);
--- a/source/blender/nodes/geometry/nodes/node_geo_raycast.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_raycast.cc
@ -137,7 +137,7 @@ static void raycast_to_mesh(const IndexMask &mask,
  /* We shouldn't be rebuilding the BVH tree when calling this function in parallel. */
  BLI_assert(tree_data.cached);

-  for (const int i : mask) {
+  mask.foreach_index([&](const int i) {
    const float ray_length = ray_lengths[i];
    const float3 ray_origin = ray_origins[i];
    const float3 ray_direction = ray_directions[i];
@ -187,7 +187,7 @@ static void raycast_to_mesh(const IndexMask &mask,
        r_hit_distances[i] = ray_length;
      }
    }
-  }
+  });
 }

 class RaycastFunction : public mf::MultiFunction {
--- a/source/blender/nodes/geometry/nodes/node_geo_sample_index.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_sample_index.cc
@ -21,15 +21,13 @@ void copy_with_checked_indices(const VArray<T> &src,
 {
  const IndexRange src_range = src.index_range();
  devirtualize_varray2(src, indices, [&](const auto src, const auto indices) {
-    threading::parallel_for(mask.index_range(), 4096, [&](IndexRange range) {
-      for (const int i : mask.slice(range)) {
-        const int index = indices[i];
-        if (src_range.contains(index)) {
-          dst[i] = src[index];
-        }
-        else {
-          dst[i] = {};
-        }
+    mask.foreach_index(GrainSize(4096), [&](const int i) {
+      const int index = indices[i];
+      if (src_range.contains(index)) {
+        dst[i] = src[index];
+      }
+      else {
+        dst[i] = {};
      }
    });
  });
@ -177,11 +175,9 @@ void copy_with_clamped_indices(const VArray<T> &src,
 {
  const int last_index = src.index_range().last();
  devirtualize_varray2(src, indices, [&](const auto src, const auto indices) {
-    threading::parallel_for(mask.index_range(), 4096, [&](IndexRange range) {
-      for (const int i : mask.slice(range)) {
-        const int index = indices[i];
-        dst[i] = src[std::clamp(index, 0, last_index)];
-      }
+    mask.foreach_index(GrainSize(4096), [&](const int i) {
+      const int index = indices[i];
+      dst[i] = src[std::clamp(index, 0, last_index)];
    });
  });
 }