BLI: refactor IndexMask for better performance and memory usage #104629

Merged
Jacques Lucke merged 254 commits from JacquesLucke/blender:index-mask-refactor into main 2023-05-24 18:11:47 +02:00
15 changed files with 78 additions and 88 deletions
Showing only changes of commit 36f2f73449 - Show all commits

View File

@ -370,7 +370,7 @@ class CurvesGeometry : public ::CurvesGeometry {
* Change the direction of selected curves (switch the start and end) without changing their
* shape.
*/
void reverse_curves(cosnt IndexMask &curves_to_reverse);
void reverse_curves(const IndexMask &curves_to_reverse);
/**
* Remove any attributes that are unused based on the types in the curves.

View File

@ -588,9 +588,9 @@ void CurvesGeometry::ensure_nurbs_basis_cache() const
const VArray<int8_t> orders = this->nurbs_orders();
const VArray<int8_t> knots_modes = this->nurbs_knots_modes();
nurbs_mask.foreach_span(GrainSize(64), [&](const auto sliced_mask) {
nurbs_mask.foreach_span(GrainSize(64), [&](const auto mask_segment) {
Vector<float, 32> knots;
for (const int curve_index : sliced_mask) {
for (const int curve_index : mask_segment) {
const IndexRange points = points_by_curve[curve_index];
const IndexRange evaluated_points = evaluated_points_by_curve[curve_index];
@ -1347,11 +1347,8 @@ static void reverse_curve_point_data(const CurvesGeometry &curves,
MutableSpan<T> data)
{
const OffsetIndices points_by_curve = curves.points_by_curve();
threading::parallel_for(curve_selection.index_range(), 256, [&](IndexRange range) {
for (const int curve_i : curve_selection.slice(range)) {
data.slice(points_by_curve[curve_i]).reverse();
}
});
curve_selection.foreach_index(
GrainSize(256), [&](const int curve_i) { data.slice(points_by_curve[curve_i]).reverse(); });
}
template<typename T>
@ -1361,20 +1358,18 @@ static void reverse_swap_curve_point_data(const CurvesGeometry &curves,
MutableSpan<T> data_b)
{
const OffsetIndices points_by_curve = curves.points_by_curve();
threading::parallel_for(curve_selection.index_range(), 256, [&](IndexRange range) {
for (const int curve_i : curve_selection.slice(range)) {
const IndexRange points = points_by_curve[curve_i];
MutableSpan<T> a = data_a.slice(points);
MutableSpan<T> b = data_b.slice(points);
for (const int i : IndexRange(points.size() / 2)) {
const int end_index = points.size() - 1 - i;
std::swap(a[end_index], b[i]);
std::swap(b[end_index], a[i]);
}
if (points.size() % 2) {
const int64_t middle_index = points.size() / 2;
std::swap(a[middle_index], b[middle_index]);
}
curve_selection.foreach_index(GrainSize(256), [&](const int curve_i) {
const IndexRange points = points_by_curve[curve_i];
MutableSpan<T> a = data_a.slice(points);
MutableSpan<T> b = data_b.slice(points);
for (const int i : IndexRange(points.size() / 2)) {
const int end_index = points.size() - 1 - i;
std::swap(a[end_index], b[i]);
std::swap(b[end_index], a[i]);
}
if (points.size() % 2) {
const int64_t middle_index = points.size() / 2;
std::swap(a[middle_index], b[middle_index]);
}
});
}

View File

@ -23,13 +23,13 @@ BLI_NOINLINE static void sample_point_attribute(const Span<int> corner_verts,
const IndexMask &mask,
const MutableSpan<T> dst)
{
for (const int i : mask) {
mask.foreach_index([&](const int i) {
const MLoopTri &tri = looptris[looptri_indices[i]];
dst[i] = attribute_math::mix3(bary_coords[i],
src[corner_verts[tri.tri[0]]],
src[corner_verts[tri.tri[1]]],
src[corner_verts[tri.tri[2]]]);
}
});
}
void sample_point_attribute(const Span<int> corner_verts,
@ -63,7 +63,7 @@ BLI_NOINLINE static void sample_corner_attribute(const Span<MLoopTri> looptris,
const IndexMask &mask,
const MutableSpan<T> dst)
{
for (const int i : mask) {
mask.foreach_index([&](const int i) {
if constexpr (check_indices) {
if (looptri_indices[i] == -1) {
dst[i] = {};
@ -72,7 +72,7 @@ BLI_NOINLINE static void sample_corner_attribute(const Span<MLoopTri> looptris,
}
const MLoopTri &tri = looptris[looptri_indices[i]];
dst[i] = sample_corner_attribute_with_bary_coords(bary_coords[i], tri, src);
}
});
}
void sample_corner_normals(const Span<MLoopTri> looptris,
@ -82,11 +82,11 @@ void sample_corner_normals(const Span<MLoopTri> looptris,
const IndexMask &mask,
const MutableSpan<float3> dst)
{
for (const int i : mask) {
mask.foreach_index([&](const int i) {
const MLoopTri &tri = looptris[looptri_indices[i]];
const float3 value = sample_corner_attribute_with_bary_coords(bary_coords[i], tri, src);
dst[i] = math::normalize(value);
}
});
}
void sample_corner_attribute(const Span<MLoopTri> looptris,
@ -113,11 +113,11 @@ void sample_face_attribute(const Span<int> looptri_polys,
const IndexMask &mask,
const MutableSpan<T> dst)
{
for (const int i : mask) {
mask.foreach_index([&](const int i) {
const int looptri_index = looptri_indices[i];
const int poly_index = looptri_polys[looptri_index];
dst[i] = src[poly_index];
}
});
}
void sample_face_attribute(const Span<int> looptri_polys,
@ -166,7 +166,7 @@ static void sample_nearest_weights(const Span<float3> vert_positions,
const IndexMask &mask,
MutableSpan<float3> bary_coords)
{
for (const int i : mask) {
mask.foreach_index([&](const int i) {
if constexpr (check_indices) {
if (looptri_indices[i] == -1) {
bary_coords[i] = {};
@ -181,7 +181,7 @@ static void sample_nearest_weights(const Span<float3> vert_positions,
float3(1, 0, 0),
float3(0, 1, 0),
float3(0, 0, 1));
}
});
}
int sample_surface_points_spherical(RandomNumberGenerator &rng,

View File

@ -673,8 +673,8 @@ struct DensitySubtractOperationExecutor {
});
/* Detect curves that are too close to other existing curves. */
curve_selection_.foreach_span([&](const auto sliced_selection) {
for (const int curve_i : sliced_selection) {
curve_selection_.foreach_span([&](const auto mask_segment) {
for (const int curve_i : mask_segment) {
if (curves_to_delete[curve_i]) {
continue;
}
@ -762,8 +762,8 @@ struct DensitySubtractOperationExecutor {
});
/* Detect curves that are too close to other existing curves. */
curve_selection_.foreach_span([&](const auto sliced_selection) {
for (const int curve_i : sliced_selection) {
curve_selection_.foreach_span([&](const auto mask_segment) {
for (const int curve_i : mask_segment) {
if (curves_to_delete[curve_i]) {
continue;
}

View File

@ -269,8 +269,8 @@ struct SlideOperationExecutor {
const float brush_radius_sq_cu = pow2f(brush_radius_cu);
const Span<int> offsets = curves_orig_->offsets();
curve_selection_.foreach_span([&](const auto sliced_selection) {
for (const int curve_i : sliced_selection) {
curve_selection_.foreach_span([&](const auto mask_segment) {
for (const int curve_i : mask_segment) {
const int first_point_i = offsets[curve_i];
const float3 old_pos_cu = self_->initial_deformed_positions_cu_[first_point_i];
const float dist_to_brush_sq_cu = math::distance_squared(old_pos_cu, brush_pos_cu);

View File

@ -224,9 +224,9 @@ struct SmoothOperationExecutor {
const OffsetIndices points_by_curve = curves_->points_by_curve();
MutableSpan<float3> positions = curves_->positions_for_write();
curve_selection_.foreach_span(GrainSize(256), [&](const auto sliced_selection) {
curve_selection_.foreach_span(GrainSize(256), [&](const auto mask_segment) {
Vector<float3> old_positions;
for (const int curve_i : sliced_selection) {
for (const int curve_i : mask_segment) {
const IndexRange points = points_by_curve[curve_i];
old_positions.clear();
old_positions.extend(positions.slice(points));

View File

@ -18,7 +18,7 @@ class AddPrefixFunction : public MultiFunction {
this->set_signature(&signature);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
const VArray<std::string> &prefixes = params.readonly_single_input<std::string>(0, "Prefix");
MutableSpan<std::string> strings = params.single_mutable<std::string>(1, "Strings");
@ -43,7 +43,7 @@ class CreateRangeFunction : public MultiFunction {
this->set_signature(&signature);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
const VArray<int> &sizes = params.readonly_single_input<int>(0, "Size");
GVectorArray &ranges = params.vector_output(1, "Range");
@ -70,7 +70,7 @@ class GenericAppendFunction : public MultiFunction {
this->set_signature(&signature_);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
GVectorArray &vectors = params.vector_mutable(0, "Vector");
const GVArray &values = params.readonly_single_input(1, "Value");
@ -98,7 +98,7 @@ class ConcatVectorsFunction : public MultiFunction {
this->set_signature(&signature);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
GVectorArray &a = params.vector_mutable(0);
const GVVectorArray &b = params.readonly_vector_input(1);
@ -120,7 +120,7 @@ class AppendFunction : public MultiFunction {
this->set_signature(&signature);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
GVectorArray_TypedMutableRef<int> vectors = params.vector_mutable<int>(0);
const VArray<int> &values = params.readonly_single_input<int>(1);
@ -145,7 +145,7 @@ class SumVectorFunction : public MultiFunction {
this->set_signature(&signature);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
const VVectorArray<int> &vectors = params.readonly_vector_input<int>(0);
MutableSpan<int> sums = params.uninitialized_single_output<int>(1);
@ -174,7 +174,7 @@ class OptionalOutputsFunction : public MultiFunction {
this->set_signature(&signature);
}
void call(IndexMask mask, Params params, Context /*context*/) const override
void call(const IndexMask &mask, Params params, Context /*context*/) const override
{
if (params.single_output_is_required(0, "Out 1")) {
MutableSpan<int> values = params.uninitialized_single_output<int>(0, "Out 1");

View File

@ -446,13 +446,13 @@ static bke::CurvesGeometry fillet_curves(
dst_handles_r = dst_curves.handle_positions_right_for_write();
}
curve_selection.foreach_span(GrainSize(512), [&](const auto sliced_selection) {
curve_selection.foreach_span(GrainSize(512), [&](const auto mask_segment) {
Array<float3> directions;
Array<float> angles;
Array<float> radii;
Array<float> input_radii_buffer;
for (const int curve_i : sliced_selection) {
for (const int curve_i : mask_segment) {
const IndexRange src_points = src_points_by_curve[curve_i];
const IndexRange offsets_range = bke::curves::per_curve_point_offsets_range(src_points,
curve_i);

View File

@ -511,12 +511,12 @@ static bke::CurvesGeometry convert_curves_to_nurbs(
MutableSpan<int8_t> nurbs_order = dst_curves.nurbs_orders_for_write();
MutableSpan<int8_t> nurbs_knots_modes = dst_curves.nurbs_knots_modes_for_write();
fill_weights_if_necessary(selection);
selection.foreach_span(GrainSize(512), [&](const auto sliced_selection) {
for (const int i : sliced_selection) {
selection.foreach_span(GrainSize(512), [&](const auto mask_segment) {
for (const int i : mask_segment) {
nurbs_order[i] = 4;

Any particular reason not to just use masked_fill here, rather than writing a for loop? Seems like it would be simpler that way.

Any particular reason not to just use `masked_fill` here, rather than writing a for loop? Seems like it would be simpler that way.
nurbs_knots_modes[i] = NURBS_KNOT_MODE_BEZIER;
}
for (const int i : sliced_selection) {
for (const int i : mask_segment) {
const IndexRange src_points = src_points_by_curve[i];
const IndexRange dst_points = dst_points_by_curve[i];
catmull_rom_to_nurbs_positions(

View File

@ -595,17 +595,14 @@ static void trim_attribute_linear(const bke::CurvesGeometry &src_curves,
bke::attribute_math::convert_to_static_type(attribute.meta_data.data_type, [&](auto dummy) {
using T = decltype(dummy);
threading::parallel_for(selection.index_range(), 512, [&](const IndexRange range) {
for (const int64_t curve_i : selection.slice(range)) {
const IndexRange src_points = src_points_by_curve[curve_i];
sample_interval_linear<T>(attribute.src.template typed<T>().slice(src_points),
attribute.dst.span.typed<T>(),
src_ranges[curve_i],
dst_points_by_curve[curve_i],
start_points[curve_i],
end_points[curve_i]);
}
selection.foreach_index(GrainSize(512), [&](const int curve_i) {
const IndexRange src_points = src_points_by_curve[curve_i];
sample_interval_linear<T>(attribute.src.template typed<T>().slice(src_points),
attribute.dst.span.typed<T>(),
src_ranges[curve_i],
dst_points_by_curve[curve_i],
start_points[curve_i],
end_points[curve_i]);
});
});
}
@ -679,19 +676,17 @@ static void trim_catmull_rom_curves(const bke::CurvesGeometry &src_curves,
bke::attribute_math::convert_to_static_type(attribute.meta_data.data_type, [&](auto dummy) {
using T = decltype(dummy);
threading::parallel_for(selection.index_range(), 512, [&](const IndexRange range) {
for (const int64_t curve_i : selection.slice(range)) {
const IndexRange src_points = src_points_by_curve[curve_i];
const IndexRange dst_points = dst_points_by_curve[curve_i];
selection.foreach_index(GrainSize(512), [&](const int curve_i) {
const IndexRange src_points = src_points_by_curve[curve_i];
const IndexRange dst_points = dst_points_by_curve[curve_i];
sample_interval_catmull_rom<T>(attribute.src.template typed<T>().slice(src_points),
attribute.dst.span.typed<T>(),
src_ranges[curve_i],
dst_points,
start_points[curve_i],
end_points[curve_i],
src_cyclic[curve_i]);
}
sample_interval_catmull_rom<T>(attribute.src.template typed<T>().slice(src_points),
attribute.dst.span.typed<T>(),
src_ranges[curve_i],
dst_points,
start_points[curve_i],
end_points[curve_i],
src_cyclic[curve_i]);
});
});
}
@ -779,9 +774,9 @@ static void trim_evaluated_curves(const bke::CurvesGeometry &src_curves,
bke::attribute_math::convert_to_static_type(attribute.meta_data.data_type, [&](auto dummy) {
using T = decltype(dummy);
threading::parallel_for(selection.index_range(), 512, [&](const IndexRange range) {
selection.foreach_span(GrainSize(512), [&](const auto mask_segment) {
Vector<std::byte> evaluated_buffer;
for (const int64_t curve_i : selection.slice(range)) {
for (const int64_t curve_i : mask_segment) {
const IndexRange src_points = src_points_by_curve[curve_i];
/* Interpolate onto the evaluated point domain and sample the evaluated domain. */

View File

@ -67,12 +67,12 @@ class PointsOfCurveInput final : public bke::CurvesFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> point_of_curve(mask.min_array_size());
mask.foreach_span(GrainSize(256), [&](const auto sliced_mask) {
mask.foreach_span(GrainSize(256), [&](const auto mask_segment) {
/* Reuse arrays to avoid allocation. */
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : sliced_mask) {
for (const int selection_i : mask_segment) {
const int curve_i = curve_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!curves.curves_range().contains(curve_i)) {

View File

@ -67,12 +67,12 @@ class CornersOfFaceInput final : public bke::MeshFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> corner_of_face(mask.min_array_size());
mask.foreach_span(GrainSize(1024), [&](const auto sliced_mask) {
mask.foreach_span(GrainSize(1024), [&](const auto mask_segment) {
/* Reuse arrays to avoid allocation. */
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : sliced_mask) {
for (const int selection_i : mask_segment) {
const int poly_i = face_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!polys.index_range().contains(poly_i)) {

View File

@ -73,12 +73,12 @@ class CornersOfVertInput final : public bke::MeshFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> corner_of_vertex(mask.min_array_size());
mask.foreach_span(GrainSize(1024), [&](const auto sliced_mask) {
mask.foreach_span(GrainSize(1024), [&](const auto mask_segment) {
/* Reuse arrays to avoid allocation. */
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : sliced_mask) {
for (const int selection_i : mask_segment) {
const int vert_i = vert_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!vert_range.contains(vert_i)) {

View File

@ -73,12 +73,12 @@ class EdgesOfVertInput final : public bke::MeshFieldInput {
const bool use_sorting = !all_sort_weights.is_single();
Array<int> edge_of_vertex(mask.min_array_size());
mask.foreach_span(GrainSize(1024), [&](const auto sliced_mask) {
mask.foreach_span(GrainSize(1024), [&](const auto mask_segment) {
/* Reuse arrays to avoid allocation. */
Array<float> sort_weights;
Array<int> sort_indices;
for (const int selection_i : sliced_mask) {
for (const int selection_i : mask_segment) {
const int vert_i = vert_indices[selection_i];
const int index_in_sort = indices_in_sort[selection_i];
if (!vert_range.contains(vert_i)) {

View File

@ -109,11 +109,11 @@ static void set_position_in_component(bke::CurvesGeometry &curves,
curves.handle_positions_right_for_write() :
curves.handle_positions_left_for_write();
selection.foreach_span(GrainSize(2048), [&](const auto sliced_selection) {
for (const int i : sliced_selection) {
selection.foreach_span(GrainSize(2048), [&](const auto mask_segment) {
for (const int i : mask_segment) {
update_handle_types_for_movement(handle_types[i], handle_types_other[i]);
}
for (const int i : sliced_selection) {
for (const int i : mask_segment) {
bke::curves::bezier::set_handle_position(positions[i],
HandleType(handle_types[i]),
HandleType(handle_types_other[i]),