BLI: refactor IndexMask for better performance and memory usage #104629

Merged
Jacques Lucke merged 254 commits from JacquesLucke/blender:index-mask-refactor into main 2023-05-24 18:11:47 +02:00
19 changed files with 75 additions and 101 deletions
Showing only changes of commit c5a95db125 - Show all commits

View File

@ -113,8 +113,9 @@ IndexMask indices_for_type(const VArray<int8_t> &types,
return types.get_internal_single() == type ? IndexMask(types.size()) : IndexMask(0);
}
Span<int8_t> types_span = types.get_internal_span();
return IndexMask::from_predicate(
selection, 4096, memory, [&](const int index) { return types_span[index] == type; });
return IndexMask::from_predicate(selection, GrainSize(4096), memory, [&](const int index) {
return types_span[index] == type;
});
}
void foreach_curve_by_type(const VArray<int8_t> &types,

View File

@ -203,12 +203,12 @@ class IndexMask {
static IndexMask from_expr(const Expr &expr, IndexRange universe, IndexMaskMemory &memory);
template<typename Fn>
static IndexMask from_predicate(IndexRange universe,
int64_t grain_size,
GrainSize grain_size,
IndexMaskMemory &memory,
Fn &&predicate);
template<typename Fn>
static IndexMask from_predicate(const IndexMask &universe,
int64_t grain_size,
GrainSize grain_size,
IndexMaskMemory &memory,
Fn &&predicate);
@ -831,14 +831,14 @@ inline void IndexMask::foreach_index_optimized(const GrainSize grain_size, Fn &&
}
template<typename Fn>
inline IndexMask IndexMask::from_predicate(IndexRange universe,
int64_t grain_size,
inline IndexMask IndexMask::from_predicate(const IndexRange universe,
const GrainSize grain_size,
IndexMaskMemory &memory,
Fn &&predicate)
{
BitVector bits(universe.size());
threading::parallel_for_aligned(
bits.index_range(), grain_size, bits::BitsPerInt, [&](const IndexRange range) {
bits.index_range(), grain_size.value, bits::BitsPerInt, [&](const IndexRange range) {
for (const int64_t i : range) {
const int64_t index = universe[i];
const bool result = predicate(index);
@ -850,7 +850,7 @@ inline IndexMask IndexMask::from_predicate(IndexRange universe,
template<typename Fn>
inline IndexMask IndexMask::from_predicate(const IndexMask &universe,
int64_t grain_size,
const GrainSize grain_size,
IndexMaskMemory &memory,
Fn &&predicate)
{

View File

@ -457,7 +457,7 @@ IndexMask IndexMask::from_bools(const IndexMask &universe,
IndexMaskMemory &memory)
{
return IndexMask::from_predicate(
universe, 1024, memory, [&](const int64_t index) { return bools[index]; });
universe, GrainSize(1024), memory, [&](const int64_t index) { return bools[index]; });
}
IndexMask IndexMask::from_bools(const IndexMask &universe,
@ -465,7 +465,7 @@ IndexMask IndexMask::from_bools(const IndexMask &universe,
IndexMaskMemory &memory)
{
return IndexMask::from_predicate(
universe, 512, memory, [&](const int64_t index) { return bools[index]; });
universe, GrainSize(512), memory, [&](const int64_t index) { return bools[index]; });
}
static Set<int64_t> eval_expr(const Expr &base_expr, const IndexRange universe)
@ -883,6 +883,7 @@ void IndexMask::to_bits(MutableBitSpan r_bits, int64_t offset) const
void IndexMask::to_bools(MutableSpan<bool> r_bools, int64_t offset) const
{
BLI_assert(r_bools.size() >= this->min_array_size() - offset);
r_bools.fill(false);
this->foreach_index_optimized([&](const int64_t i) { r_bools[i - offset] = true; });
}

View File

@ -39,13 +39,14 @@ static IndexMask retrieve_selected_curves(const bke::CurvesGeometry &curves,
return selection.get_internal_single() ? IndexMask(curves_range) : IndexMask();
}
const OffsetIndices points_by_curve = curves.points_by_curve();
return IndexMask::from_predicate(curves_range, 512, memory, [&](const int64_t curve_i) {
const IndexRange points = points_by_curve[curve_i];
/* The curve is selected if any of its points are selected. */
Array<bool, 32> point_selection(points.size());
selection.materialize_compressed(points, point_selection);
return point_selection.as_span().contains(true);
});
return IndexMask::from_predicate(
curves_range, GrainSize(512), memory, [&](const int64_t curve_i) {
const IndexRange points = points_by_curve[curve_i];
/* The curve is selected if any of its points are selected. */
Array<bool, 32> point_selection(points.size());
selection.materialize_compressed(points, point_selection);
return point_selection.as_span().contains(true);
});
}
const VArray<bool> selection = attributes.lookup_or_default<bool>(
".selection", ATTR_DOMAIN_CURVE, true);

View File

@ -31,7 +31,7 @@ static IndexMask apply_filter_operation(const VArray<T> &data,
IndexMaskMemory &memory)
{
return IndexMask::from_predicate(
mask, 1024, memory, [&](const int64_t i) { return check_fn(data[i]); });
mask, GrainSize(1024), memory, [&](const int64_t i) { return check_fn(data[i]); });
}
static IndexMask apply_row_filter(const SpreadsheetRowFilter &row_filter,

View File

@ -70,9 +70,10 @@ static IndexMask selected_indices_from_vertex_group(Span<MDeformVert> vertex_gro
const bool invert,
IndexMaskMemory &memory)
{
return IndexMask::from_predicate(vertex_group.index_range(), 512, memory, [&](const int i) {
return (BKE_defvert_find_weight(&vertex_group[i], index) > 0.0f) != invert;
});
return IndexMask::from_predicate(
vertex_group.index_range(), blender::GrainSize(512), memory, [&](const int i) {
return (BKE_defvert_find_weight(&vertex_group[i], index) > 0.0f) != invert;
});
}
static Array<bool> selection_array_from_vertex_group(Span<MDeformVert> vertex_group,

View File

@ -67,12 +67,12 @@ class PointsOfCurveInput final : public bke::CurvesFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> point_of_curve(mask.min_array_size());
threading::parallel_for(mask.index_range(), 256, [&](const IndexRange range) {
mask.foreach_span(GrainSize(256), [&](const auto sliced_mask) {
/* Reuse arrays to avoid allocation. */
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : mask.slice(range)) {
for (const int selection_i : sliced_mask) {
const int curve_i = curve_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!curves.curves_range().contains(curve_i)) {

View File

@ -27,13 +27,13 @@ static Curves *edge_paths_to_curves_convert(
Vector<int> vert_indices;
Vector<int> curve_offsets;
Array<bool> visited(mesh.totvert, false);
for (const int first_vert : start_verts_mask) {
start_verts_mask.foreach_index([&](const int first_vert) {
const int second_vert = next_indices[first_vert];
if (first_vert == second_vert) {
continue;
return;
}
if (second_vert < 0 || second_vert >= mesh.totvert) {
continue;
return;
}
curve_offsets.append(vert_indices.size());
@ -55,7 +55,7 @@ static Curves *edge_paths_to_curves_convert(
for (const int vert_in_curve : vert_indices.as_span().take_back(points_in_curve_num)) {
visited[vert_in_curve] = false;
}
}
});
if (vert_indices.is_empty()) {
return nullptr;

View File

@ -27,13 +27,10 @@ static void edge_paths_to_selection(const Mesh &src_mesh,
{
const Span<MEdge> edges = src_mesh.edges();
Array<bool> selection(src_mesh.totvert, false);
Array<bool> selection(src_mesh.totvert);
start_selection.to_bools(selection);
for (const int start_vert : start_selection) {
selection[start_vert] = true;
}
for (const int start_i : start_selection) {
start_selection.foreach_index([&](const int start_i) {
int iter = start_i;
while (iter != next_indices[iter] && !selection[next_indices[iter]]) {
if (next_indices[iter] < 0 || next_indices[iter] >= src_mesh.totvert) {
@ -42,7 +39,7 @@ static void edge_paths_to_selection(const Mesh &src_mesh,
selection[next_indices[iter]] = true;
iter = next_indices[iter];
}
}
});
for (const int i : edges.index_range()) {
const MEdge &edge = edges[i];

View File

@ -59,9 +59,9 @@ class FaceSetFromBoundariesInput final : public bke::MeshFieldInput {
polys, loops, mesh.totedge);
AtomicDisjointSet islands(polys.size());
for (const int edge : non_boundary_edges) {
non_boundary_edges.foreach_index([&](const int edge) {
join_indices(islands, edge_to_face_map[edge]);
}
});
Array<int> output(polys.size());
islands.calc_reduced_ids(output);

View File

@ -67,12 +67,12 @@ class CornersOfFaceInput final : public bke::MeshFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> corner_of_face(mask.min_array_size());
threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
mask.foreach_span(GrainSize(1024), [&](const auto sliced_mask) {
/* Reuse arrays to avoid allocation. */
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : mask.slice(range)) {
for (const int selection_i : sliced_mask) {
const int poly_i = face_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!polys.index_range().contains(poly_i)) {

View File

@ -33,13 +33,6 @@ static void node_declare(NodeDeclarationBuilder &b)
.description(N_("The number of faces or corners connected to each vertex"));
}
static void convert_span(const Span<int> src, MutableSpan<int64_t> dst)
{
for (const int i : src.index_range()) {
dst[i] = src[i];
}
}
class CornersOfVertInput final : public bke::MeshFieldInput {
const Field<int> vert_index_;
const Field<int> sort_index_;
@ -80,13 +73,12 @@ class CornersOfVertInput final : public bke::MeshFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> corner_of_vertex(mask.min_array_size());
threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
mask.foreach_span(GrainSize(1024), [&](const auto sliced_mask) {
/* Reuse arrays to avoid allocation. */
Array<int64_t> corner_indices;
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : mask.slice(range)) {
for (const int selection_i : sliced_mask) {
const int vert_i = vert_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!vert_range.contains(vert_i)) {
@ -102,13 +94,10 @@ class CornersOfVertInput final : public bke::MeshFieldInput {
const int index_in_sort_wrapped = mod_i(index_in_sort, corners.size());
if (use_sorting) {
/* Retrieve the connected edge indices as 64 bit integers for #materialize_compressed. */
corner_indices.reinitialize(corners.size());
convert_span(corners, corner_indices);
/* Retrieve a compressed array of weights for each edge. */
sort_weights.reinitialize(corners.size());
all_sort_weights.materialize_compressed(IndexMask(corner_indices),
IndexMaskMemory memory;
all_sort_weights.materialize_compressed(IndexMask::from_indices<int>(corners, memory),
sort_weights.as_mutable_span());
/* Sort a separate array of compressed indices corresponding to the compressed weights.
@ -120,7 +109,7 @@ class CornersOfVertInput final : public bke::MeshFieldInput {
std::stable_sort(sort_indices.begin(), sort_indices.end(), [&](int a, int b) {
return sort_weights[a] < sort_weights[b];
});
corner_of_vertex[selection_i] = corner_indices[sort_indices[index_in_sort_wrapped]];
corner_of_vertex[selection_i] = corners[sort_indices[index_in_sort_wrapped]];
}
else {
corner_of_vertex[selection_i] = corners[index_in_sort_wrapped];

View File

@ -33,13 +33,6 @@ static void node_declare(NodeDeclarationBuilder &b)
.description(N_("The number of edges connected to each vertex"));
}
static void convert_span(const Span<int> src, MutableSpan<int64_t> dst)
{
for (const int i : src.index_range()) {
dst[i] = src[i];
}
}
class EdgesOfVertInput final : public bke::MeshFieldInput {
const Field<int> vert_index_;
const Field<int> sort_index_;

View File

@ -162,7 +162,7 @@ class ProximityFunction : public mf::MultiFunction {
* comparison per vertex, so it's likely not worth it. */
MutableSpan<float> distances = params.uninitialized_single_output<float>(2, "Distance");
distances.fill_indices(mask, FLT_MAX);
index_mask::masked_fill(distances, FLT_MAX, mask);
bool success = false;
if (target_.has_mesh()) {
@ -177,10 +177,10 @@ class ProximityFunction : public mf::MultiFunction {
if (!success) {
if (!positions.is_empty()) {
positions.fill_indices(mask, float3(0));
index_mask::masked_fill(positions, float3(0), mask);
}
if (!distances.is_empty()) {
distances.fill_indices(mask, 0.0f);
index_mask::masked_fill(distances, 0.0f, mask);
}
return;
}

View File

@ -149,7 +149,7 @@ static void raycast_to_mesh(IndexMask mask,
/* We shouldn't be rebuilding the BVH tree when calling this function in parallel. */
BLI_assert(tree_data.cached);
for (const int i : mask) {
mask.foreach_index([&](const int i) {
const float ray_length = ray_lengths[i];
const float3 ray_origin = ray_origins[i];
const float3 ray_direction = math::normalize(ray_directions[i]);
@ -199,7 +199,7 @@ static void raycast_to_mesh(IndexMask mask,
r_hit_distances[i] = ray_length;
}
}
}
});
}
class RaycastFunction : public mf::MultiFunction {
@ -271,20 +271,15 @@ class RaycastFunction : public mf::MultiFunction {
hit_count);
if (target_data_) {
IndexMaskMemory memory;
IndexMask hit_mask;
Vector<int64_t> hit_mask_indices;
if (hit_count < mask.size()) {
/* Not all rays hit the target. Create a corrected mask to avoid transferring attribute
* data to invalid indices. An alternative would be handling -1 indices in a separate case
* in #MeshAttributeInterpolator, but since it already has an IndexMask in its constructor,
* it's simpler to use that. */
hit_mask_indices.reserve(hit_count);
for (const int64_t i : mask) {
if (hit_indices[i] != -1) {
hit_mask_indices.append(i);
}
hit_mask = IndexMask(hit_mask_indices);
}
hit_mask = IndexMask::from_predicate(
mask, GrainSize(1024), memory, [&](const int i) { return hit_indices[i] != -1; });
}
else {
hit_mask = mask;

View File

@ -140,15 +140,13 @@ void copy_with_indices(const VArray<T> &src,
{
const IndexRange src_range = src.index_range();
devirtualize_varray2(src, indices, [&](const auto src, const auto indices) {
threading::parallel_for(mask.index_range(), 4096, [&](IndexRange range) {
for (const int i : mask.slice(range)) {
const int index = indices[i];
if (src_range.contains(index)) {
dst[i] = src[index];
}
else {
dst[i] = {};
}
mask.foreach_index(GrainSize(4096), [&](const int i) {
const int index = indices[i];
if (src_range.contains(index)) {
dst[i] = src[index];
}
else {
dst[i] = {};
}
});
});
@ -162,11 +160,9 @@ void copy_with_clamped_indices(const VArray<T> &src,
{
const int last_index = src.index_range().last();
devirtualize_varray2(src, indices, [&](const auto src, const auto indices) {
threading::parallel_for(mask.index_range(), 4096, [&](IndexRange range) {
for (const int i : mask.slice(range)) {
const int index = indices[i];
dst[i] = src[std::clamp(index, 0, last_index)];
}
mask.foreach_index(GrainSize(4096), [&](const int i) {
const int index = indices[i];
dst[i] = src[std::clamp(index, 0, last_index)];
});
});
}

View File

@ -24,7 +24,7 @@ void get_closest_in_bvhtree(BVHTreeFromMesh &tree_data,
BLI_assert(positions.size() >= r_distances_sq.size());
BLI_assert(positions.size() >= r_positions.size());
for (const int i : mask) {
mask.foreach_index([&](const int i) {
BVHTreeNearest nearest;
nearest.dist_sq = FLT_MAX;
const float3 position = positions[i];
@ -39,7 +39,7 @@ void get_closest_in_bvhtree(BVHTreeFromMesh &tree_data,
if (!r_positions.is_empty()) {
r_positions[i] = nearest.co;
}
}
});
}
} // namespace blender::nodes
@ -77,7 +77,7 @@ static void get_closest_pointcloud_points(const PointCloud &pointcloud,
BVHTreeFromPointCloud tree_data;
BKE_bvhtree_from_pointcloud_get(&tree_data, &pointcloud, 2);
for (const int i : mask) {
mask.foreach_index([&](const int i) {
BVHTreeNearest nearest;
nearest.dist_sq = FLT_MAX;
const float3 position = positions[i];
@ -87,7 +87,7 @@ static void get_closest_pointcloud_points(const PointCloud &pointcloud,
if (!r_distances_sq.is_empty()) {
r_distances_sq[i] = nearest.dist_sq;
}
}
});
free_bvhtree_from_pointcloud(&tree_data);
}
@ -149,10 +149,10 @@ static void get_closest_mesh_polys(const Mesh &mesh,
const Span<MLoopTri> looptris = mesh.looptris();
for (const int i : mask) {
mask.foreach_index([&](const int i) {
const MLoopTri &looptri = looptris[looptri_indices[i]];
r_poly_indices[i] = looptri.poly;
}
});
}
/* The closest corner is defined to be the closest corner on the closest face. */
@ -171,7 +171,7 @@ static void get_closest_mesh_corners(const Mesh &mesh,
Array<int> poly_indices(positions.size());
get_closest_mesh_polys(mesh, positions, mask, poly_indices, {}, {});
for (const int i : mask) {
mask.foreach_index([&](const int i) {
const float3 position = positions[i];
const int poly_index = poly_indices[i];
const MPoly &poly = polys[poly_index];
@ -199,7 +199,7 @@ static void get_closest_mesh_corners(const Mesh &mesh,
if (!r_distances_sq.is_empty()) {
r_distances_sq[i] = min_distance_sq;
}
}
});
}
static bool component_is_available(const GeometrySet &geometry,
@ -257,7 +257,7 @@ class SampleNearestFunction : public mf::MultiFunction {
const VArray<float3> &positions = params.readonly_single_input<float3>(0, "Position");
MutableSpan<int> indices = params.uninitialized_single_output<int>(1, "Index");
if (!src_component_) {
indices.fill_indices(mask, 0);
index_mask::masked_fill(indices, 0, mask);
return;
}

View File

@ -52,9 +52,9 @@ static Mesh *triangulate_mesh_selection(const Mesh &mesh,
/* Tag faces to be triangulated from the selection mask. */
BM_mesh_elem_table_ensure(bm, BM_FACE);
for (int i_face : selection) {
selection.foreach_index([&](const int i_face) {
BM_elem_flag_set(BM_face_at_index(bm, i_face), BM_ELEM_TAG, true);
}
});
BM_mesh_triangulate(bm, quad_method, ngon_method, min_vertices, true, nullptr, nullptr, nullptr);
Mesh *result = BKE_mesh_from_bmesh_for_eval_nomain(bm, &cd_mask_extra, &mesh);

View File

@ -57,7 +57,7 @@ static VArray<float3> construct_uv_gvarray(const Mesh &mesh,
evaluator.evaluate();
geometry::ParamHandle *handle = geometry::uv_parametrizer_construct_begin();
for (const int poly_index : selection) {
selection.foreach_index([&](const int poly_index) {
const MPoly &poly = polys[poly_index];
Array<geometry::ParamKey, 16> mp_vkeys(poly.totloop);
Array<bool, 16> mp_pin(poly.totloop);
@ -80,7 +80,7 @@ static VArray<float3> construct_uv_gvarray(const Mesh &mesh,
mp_uv.data(),
mp_pin.data(),
mp_select.data());
}
});
geometry::uv_parametrizer_construct_end(handle, true, true, nullptr);
geometry::uv_parametrizer_pack(handle, margin, rotate, true);