BLI: refactor IndexMask for better performance and memory usage #104629

Merged
Jacques Lucke merged 254 commits from JacquesLucke/blender:index-mask-refactor into main 2023-05-24 18:11:47 +02:00
11 changed files with 155 additions and 161 deletions
Showing only changes of commit 2e0364b005 - Show all commits

View File

@ -175,6 +175,7 @@ class IndexMask {
IndexMask slice(int64_t start, int64_t size) const;
IndexMask slice_and_offset(IndexRange range, IndexMaskMemory &memory) const;
IndexMask slice_and_offset(int64_t start, int64_t size, IndexMaskMemory &memory) const;
IndexMask complement(const IndexRange universe, IndexMaskMemory &memory) const;
int64_t operator[](const int64_t i) const;

View File

@ -404,6 +404,14 @@ IndexMask IndexMask::slice_and_offset(const int64_t start,
return IndexMask::from_bits(bits, memory);
}
IndexMask IndexMask::complement(const IndexRange universe, IndexMaskMemory &memory) const
{
IndexMaskMemory memory;
const AtomicExpr atomic_expr{*this};
const ComplementExpr complement_expr{atomic_expr};
return IndexMask::from_expr(complement_expr, universe, memory);
}
static IndexMask bits_to_index_mask(const BitSpan bits,
const int64_t start,
IndexMaskMemory &memory)
@ -897,10 +905,7 @@ Vector<IndexRange> IndexMask::to_ranges() const
Vector<IndexRange> IndexMask::to_ranges_invert(const IndexRange universe) const
{
IndexMaskMemory memory;
const AtomicExpr atomic_expr{*this};
const ComplementExpr complement_expr{atomic_expr};
const IndexMask inverted_mask = IndexMask::from_expr(complement_expr, universe, memory);
return inverted_mask.to_ranges();
return this->complement(universe, memory).to_ranges();
}
template IndexMask IndexMask::from_indices(Span<int32_t>, IndexMaskMemory &);

View File

@ -14,17 +14,17 @@ namespace blender::ed::curves {
void transverts_from_curves_positions_create(bke::CurvesGeometry &curves, TransVertStore *tvs)
{
Vector<int64_t> selected_indices;
IndexMask selection = retrieve_selected_points(curves, selected_indices);
IndexMaskMemory memory;
IndexMask selection = retrieve_selected_points(curves, memory);
MutableSpan<float3> positions = curves.positions_for_write();
tvs->transverts = static_cast<TransVert *>(
MEM_calloc_arrayN(selection.size(), sizeof(TransVert), __func__));
tvs->transverts_tot = selection.size();
selection.foreach_span(GrainSize(1024), [&](const auto mask_segment, const IndexRange range) {
selection.foreach_span(GrainSize(1024), [&](const auto mask_segment, const int64_t start) {
for (const int i : mask_segment.index_range()) {
TransVert &tv = tvs->transverts[range[i]];
TransVert &tv = tvs->transverts[start + i];
tv.loc = positions[mask_segment[i]];
tv.flag = SELECT;
copy_v3_v3(tv.oldloc, tv.loc);

View File

@ -64,10 +64,10 @@ IndexMask retrieve_selected_points(const bke::CurvesGeometry &curves, IndexMaskM
curves.attributes().lookup_or_default<bool>(".selection", ATTR_DOMAIN_POINT, true), memory);
}
IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices)
IndexMask retrieve_selected_points(const Curves &curves_id, IndexMaskMemory &memory)
{
const bke::CurvesGeometry &curves = curves_id.geometry.wrap();
return retrieve_selected_points(curves, r_indices);
return retrieve_selected_points(curves, memory);
}
bke::GSpanAttributeWriter ensure_selection_attribute(bke::CurvesGeometry &curves,

View File

@ -118,8 +118,8 @@ IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_i
* Find points that are selected (a selection factor greater than zero),
* or points in curves with a selection factor greater than zero).
*/
IndexMask retrieve_selected_points(const bke::CurvesGeometry &curves, Vector<int64_t> &r_indices);
IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices);
IndexMask retrieve_selected_points(const bke::CurvesGeometry &curves, IndexMaskMemory &memory);
IndexMask retrieve_selected_points(const Curves &curves_id, IndexMaskMemory &memory);
/**
* If the ".selection" attribute doesn't exist, create it with the requested type (bool or float).

View File

@ -20,6 +20,7 @@
#include "DEG_depsgraph.h"
#include "DEG_depsgraph_query.h"
#include "BLI_enumerable_thread_specific.hh"
#include "BLI_kdtree.h"
#include "BLI_rand.hh"
#include "BLI_task.hh"
@ -588,10 +589,10 @@ struct DensitySubtractOperationExecutor {
root_points_kdtree_ = BLI_kdtree_3d_new(curve_selection_.size());
BLI_SCOPED_DEFER([&]() { BLI_kdtree_3d_free(root_points_kdtree_); });
for (const int curve_i : curve_selection_) {
curve_selection_.foreach_index([&](const int curve_i) {
const float3 &pos_cu = self_->deformed_root_positions_[curve_i];
BLI_kdtree_3d_insert(root_points_kdtree_, curve_i, pos_cu);
}
});
BLI_kdtree_3d_balance(root_points_kdtree_);
/* Find all curves that should be deleted. */
@ -676,35 +677,37 @@ struct DensitySubtractOperationExecutor {
});
/* Detect curves that are too close to other existing curves. */
for (const int curve_i : curve_selection_) {
if (curves_to_delete[curve_i]) {
continue;
}
if (!allow_remove_curve[curve_i]) {
continue;
}
const float3 orig_pos_cu = self_->deformed_root_positions_[curve_i];
const float3 pos_cu = math::transform_point(brush_transform, orig_pos_cu);
float2 pos_re;
ED_view3d_project_float_v2_m4(ctx_.region, pos_cu, pos_re, projection.ptr());
const float dist_to_brush_sq_re = math::distance_squared(brush_pos_re_, pos_re);
if (dist_to_brush_sq_re > brush_radius_sq_re) {
continue;
}
BLI_kdtree_3d_range_search_cb_cpp(
root_points_kdtree_,
orig_pos_cu,
minimum_distance_,
[&](const int other_curve_i, const float * /*co*/, float /*dist_sq*/) {
if (other_curve_i == curve_i) {
curve_selection_.foreach_span([&](const auto sliced_selection) {
for (const int curve_i : sliced_selection) {
if (curves_to_delete[curve_i]) {
continue;
}
if (!allow_remove_curve[curve_i]) {
continue;
}
const float3 orig_pos_cu = self_->deformed_root_positions_[curve_i];
const float3 pos_cu = math::transform_point(brush_transform, orig_pos_cu);
float2 pos_re;
ED_view3d_project_float_v2_m4(ctx_.region, pos_cu, pos_re, projection.ptr());
const float dist_to_brush_sq_re = math::distance_squared(brush_pos_re_, pos_re);
if (dist_to_brush_sq_re > brush_radius_sq_re) {
continue;
}
BLI_kdtree_3d_range_search_cb_cpp(
root_points_kdtree_,
orig_pos_cu,
minimum_distance_,
[&](const int other_curve_i, const float * /*co*/, float /*dist_sq*/) {
if (other_curve_i == curve_i) {
return true;
}
if (allow_remove_curve[other_curve_i]) {
curves_to_delete[other_curve_i] = true;
}
return true;
}
if (allow_remove_curve[other_curve_i]) {
curves_to_delete[other_curve_i] = true;
}
return true;
});
}
});
}
});
}
void reduce_density_spherical_with_symmetry(MutableSpan<bool> curves_to_delete)
@ -763,33 +766,35 @@ struct DensitySubtractOperationExecutor {
});
/* Detect curves that are too close to other existing curves. */
for (const int curve_i : curve_selection_) {
if (curves_to_delete[curve_i]) {
continue;
}
if (!allow_remove_curve[curve_i]) {
continue;
}
const float3 &pos_cu = self_->deformed_root_positions_[curve_i];
const float dist_to_brush_sq_cu = math::distance_squared(pos_cu, brush_pos_cu);
if (dist_to_brush_sq_cu > brush_radius_sq_cu) {
continue;
}
curve_selection_.foreach_span([&](const auto sliced_selection) {
for (const int curve_i : curve_selection_) {
if (curves_to_delete[curve_i]) {
continue;
}
if (!allow_remove_curve[curve_i]) {
continue;
}
const float3 &pos_cu = self_->deformed_root_positions_[curve_i];
const float dist_to_brush_sq_cu = math::distance_squared(pos_cu, brush_pos_cu);
if (dist_to_brush_sq_cu > brush_radius_sq_cu) {
continue;
}
BLI_kdtree_3d_range_search_cb_cpp(
root_points_kdtree_,
pos_cu,
minimum_distance_,
[&](const int other_curve_i, const float * /*co*/, float /*dist_sq*/) {
if (other_curve_i == curve_i) {
BLI_kdtree_3d_range_search_cb_cpp(
root_points_kdtree_,
pos_cu,
minimum_distance_,
[&](const int other_curve_i, const float * /*co*/, float /*dist_sq*/) {
if (other_curve_i == curve_i) {
return true;
}
if (allow_remove_curve[other_curve_i]) {
curves_to_delete[other_curve_i] = true;
}
return true;
}
if (allow_remove_curve[other_curve_i]) {
curves_to_delete[other_curve_i] = true;
}
return true;
});
}
});
}
});
}
};

View File

@ -527,8 +527,8 @@ namespace select_grow {
struct GrowOperatorDataPerCurve : NonCopyable, NonMovable {
Curves *curves_id;
Vector<int64_t> selected_point_indices;
Vector<int64_t> unselected_point_indices;
IndexMaskMemory selected_points_memory;
IndexMaskMemory unselected_points_memory;
IndexMask selected_points;
IndexMask unselected_points;
Array<float> distances_to_selected;
@ -548,36 +548,22 @@ static void update_points_selection(const GrowOperatorDataPerCurve &data,
MutableSpan<float> points_selection)
{
if (distance > 0.0f) {
threading::parallel_for(
data.unselected_points.index_range(), 256, [&](const IndexRange range) {
for (const int i : range) {
const int point_i = data.unselected_points[i];
const float distance_to_selected = data.distances_to_selected[i];
const float selection = distance_to_selected <= distance ? 1.0f : 0.0f;
points_selection[point_i] = selection;
}
});
threading::parallel_for(data.selected_points.index_range(), 512, [&](const IndexRange range) {
for (const int point_i : data.selected_points.slice(range)) {
points_selection[point_i] = 1.0f;
}
data.unselected_points.foreach_index(GrainSize(256), [&](const int point_i, const int i) {
const float distance_to_selected = data.distances_to_selected[i];
const float selection = distance_to_selected <= distance ? 1.0f : 0.0f;
points_selection[point_i] = selection;
});
data.selected_points.foreach_index(
GrainSize(512), [&](const int point_i) { points_selection[point_i] = 1.0f; });
}
else {
threading::parallel_for(data.selected_points.index_range(), 256, [&](const IndexRange range) {
for (const int i : range) {
const int point_i = data.selected_points[i];
const float distance_to_unselected = data.distances_to_unselected[i];
const float selection = distance_to_unselected <= -distance ? 0.0f : 1.0f;
points_selection[point_i] = selection;
}
data.selected_points.foreach_index(GrainSize(256), [&](const int point_i, const int i) {
const float distance_to_unselected = data.distances_to_unselected[i];
const float selection = distance_to_unselected <= -distance ? 0.0f : 1.0f;
points_selection[point_i] = selection;
});
threading::parallel_for(
data.unselected_points.index_range(), 512, [&](const IndexRange range) {
for (const int point_i : data.unselected_points.slice(range)) {
points_selection[point_i] = 0.0f;
}
});
data.unselected_points.foreach_index(
GrainSize(512), [&](const int point_i) { points_selection[point_i] = 0.0f; });
}
}
@ -646,9 +632,9 @@ static void select_grow_invoke_per_curve(const Curves &curves_id,
/* Find indices of selected and unselected points. */
curve_op_data.selected_points = curves::retrieve_selected_points(
curves_id, curve_op_data.selected_point_indices);
curve_op_data.unselected_points = curve_op_data.selected_points.invert(
curves.points_range(), curve_op_data.unselected_point_indices);
curves_id, curve_op_data.selected_points_memory);
curve_op_data.unselected_points = curve_op_data.selected_points.complement(
curves.points_range(), curve_op_data.unselected_points_memory);
threading::parallel_invoke(
1024 < curve_op_data.selected_points.size() + curve_op_data.unselected_points.size(),
@ -656,10 +642,10 @@ static void select_grow_invoke_per_curve(const Curves &curves_id,
/* Build KD-tree for the selected points. */
KDTree_3d *kdtree = BLI_kdtree_3d_new(curve_op_data.selected_points.size());
BLI_SCOPED_DEFER([&]() { BLI_kdtree_3d_free(kdtree); });
for (const int point_i : curve_op_data.selected_points) {
curve_op_data.selected_points.foreach_index([&](const int point_i) {
const float3 &position = positions[point_i];
BLI_kdtree_3d_insert(kdtree, point_i, position);
}
});
BLI_kdtree_3d_balance(kdtree);
/* For each unselected point, compute the distance to the closest selected point. */
@ -679,10 +665,10 @@ static void select_grow_invoke_per_curve(const Curves &curves_id,
/* Build KD-tree for the unselected points. */
KDTree_3d *kdtree = BLI_kdtree_3d_new(curve_op_data.unselected_points.size());
BLI_SCOPED_DEFER([&]() { BLI_kdtree_3d_free(kdtree); });
for (const int point_i : curve_op_data.unselected_points) {
curve_op_data.unselected_points.foreach_index([&](const int point_i) {
const float3 &position = positions[point_i];
BLI_kdtree_3d_insert(kdtree, point_i, position);
}
});
BLI_kdtree_3d_balance(kdtree);
/* For each selected point, compute the distance to the closest unselected point. */

View File

@ -252,7 +252,7 @@ struct PinchOperationExecutor {
const OffsetIndices points_by_curve = curves_->points_by_curve();
curve_selection_.foreach_span(GrainSize(256), [&](const auto mask_segment) {
for (const int curve_i : curve_selection_.slice(range)) {
for (const int curve_i : mask_segment) {
const IndexRange points = points_by_curve[curve_i];
for (const int point_i : points.drop_front(1)) {
const float3 old_pos_cu = deformation.positions[point_i];

View File

@ -165,9 +165,11 @@ struct PuffOperationExecutor {
changed_curves_indices.append(curve_selection_[select_i]);
}
}
Review

It looks like this whole changed_curves_indices thing could be replaced with from_predicate?

It looks like this whole `changed_curves_indices` thing could be replaced with `from_predicate`?
Review

Might not be entirely trivial right now, but generally I agree. Will leave that for later.

Might not be entirely trivial right now, but generally I agree. Will leave that for later.
IndexMaskMemory memory;
const IndexMask changed_curves_mask = IndexMask::from_indices<int64_t>(changed_curves_indices,
memory);
self_->constraint_solver_.solve_step(
*curves_, IndexMask(changed_curves_indices), surface_, transforms_);
self_->constraint_solver_.solve_step(*curves_, changed_curves_mask, surface_, transforms_);
curves_->tag_positions_changed();
DEG_id_tag_update(&curves_id_->id, ID_RECALC_GEOMETRY);

View File

@ -141,29 +141,27 @@ struct SmoothOperationExecutor {
bke::crazyspace::get_evaluated_curves_deformation(*ctx_.depsgraph, *object_);
const OffsetIndices points_by_curve = curves_->points_by_curve();
threading::parallel_for(curve_selection_.index_range(), 256, [&](const IndexRange range) {
for (const int curve_i : curve_selection_.slice(range)) {
const IndexRange points = points_by_curve[curve_i];
for (const int point_i : points) {
const float3 &pos_cu = math::transform_point(brush_transform_inv,
deformation.positions[point_i]);
float2 pos_re;
ED_view3d_project_float_v2_m4(ctx_.region, pos_cu, pos_re, projection.ptr());
const float dist_to_brush_sq_re = math::distance_squared(pos_re, brush_pos_re_);
if (dist_to_brush_sq_re > brush_radius_sq_re) {
continue;
}
const float dist_to_brush_re = std::sqrt(dist_to_brush_sq_re);
const float radius_falloff = BKE_brush_curve_strength(
brush_, dist_to_brush_re, brush_radius_re);
/* Used to make the brush easier to use. Otherwise a strength of 1 would be way too
* large. */
const float weight_factor = 0.1f;
const float weight = weight_factor * brush_strength_ * radius_falloff *
point_factors_[point_i];
math::max_inplace(r_point_smooth_factors[point_i], weight);
curve_selection_.foreach_index(GrainSize(256), [&](const int curve_i) {
const IndexRange points = points_by_curve[curve_i];
for (const int point_i : points) {
const float3 &pos_cu = math::transform_point(brush_transform_inv,
deformation.positions[point_i]);
float2 pos_re;
ED_view3d_project_float_v2_m4(ctx_.region, pos_cu, pos_re, projection.ptr());
const float dist_to_brush_sq_re = math::distance_squared(pos_re, brush_pos_re_);
if (dist_to_brush_sq_re > brush_radius_sq_re) {
continue;
}
const float dist_to_brush_re = std::sqrt(dist_to_brush_sq_re);
const float radius_falloff = BKE_brush_curve_strength(
brush_, dist_to_brush_re, brush_radius_re);
/* Used to make the brush easier to use. Otherwise a strength of 1 would be way too
* large. */
const float weight_factor = 0.1f;
const float weight = weight_factor * brush_strength_ * radius_falloff *
point_factors_[point_i];
math::max_inplace(r_point_smooth_factors[point_i], weight);
}
});
}
@ -201,26 +199,24 @@ struct SmoothOperationExecutor {
bke::crazyspace::get_evaluated_curves_deformation(*ctx_.depsgraph, *object_);
const OffsetIndices points_by_curve = curves_->points_by_curve();
threading::parallel_for(curve_selection_.index_range(), 256, [&](const IndexRange range) {
for (const int curve_i : curve_selection_.slice(range)) {
const IndexRange points = points_by_curve[curve_i];
for (const int point_i : points) {
const float3 &pos_cu = deformation.positions[point_i];
const float dist_to_brush_sq_cu = math::distance_squared(pos_cu, brush_pos_cu);
if (dist_to_brush_sq_cu > brush_radius_sq_cu) {
continue;
}
const float dist_to_brush_cu = std::sqrt(dist_to_brush_sq_cu);
const float radius_falloff = BKE_brush_curve_strength(
brush_, dist_to_brush_cu, brush_radius_cu);
/* Used to make the brush easier to use. Otherwise a strength of 1 would be way too
* large. */
const float weight_factor = 0.1f;
const float weight = weight_factor * brush_strength_ * radius_falloff *
point_factors_[point_i];
math::max_inplace(r_point_smooth_factors[point_i], weight);
curve_selection_.foreach_index(GrainSize(256), [&](const int curve_i) {
const IndexRange points = points_by_curve[curve_i];
for (const int point_i : points) {
const float3 &pos_cu = deformation.positions[point_i];
const float dist_to_brush_sq_cu = math::distance_squared(pos_cu, brush_pos_cu);
if (dist_to_brush_sq_cu > brush_radius_sq_cu) {
continue;
}
const float dist_to_brush_cu = std::sqrt(dist_to_brush_sq_cu);
const float radius_falloff = BKE_brush_curve_strength(
brush_, dist_to_brush_cu, brush_radius_cu);
/* Used to make the brush easier to use. Otherwise a strength of 1 would be way too
* large. */
const float weight_factor = 0.1f;
const float weight = weight_factor * brush_strength_ * radius_falloff *
point_factors_[point_i];
math::max_inplace(r_point_smooth_factors[point_i], weight);
}
});
}
@ -229,9 +225,10 @@ struct SmoothOperationExecutor {
{
const OffsetIndices points_by_curve = curves_->points_by_curve();
MutableSpan<float3> positions = curves_->positions_for_write();
threading::parallel_for(curve_selection_.index_range(), 256, [&](const IndexRange range) {
curve_selection_.foreach_span(GrainSize(256), [&](const auto sliced_selection) {
Vector<float3> old_positions;
for (const int curve_i : curve_selection_.slice(range)) {
for (const int curve_i : sliced_selection) {
const IndexRange points = points_by_curve[curve_i];
old_positions.clear();
old_positions.extend(positions.slice(points));

View File

@ -264,19 +264,17 @@ static bool transform_active_color(Mesh &mesh, const TransformFn &transform_fn)
attribute_math::convert_to_static_type(color_attribute.varray.type(), [&](auto dummy) {
using T = decltype(dummy);
threading::parallel_for(selection.index_range(), 1024, [&](IndexRange range) {
for ([[maybe_unused]] const int i : selection.slice(range)) {
if constexpr (std::is_same_v<T, ColorGeometry4f>) {
ColorGeometry4f color = color_attribute.varray.get<ColorGeometry4f>(i);
transform_fn(color);
color_attribute.varray.set_by_copy(i, &color);
}
else if constexpr (std::is_same_v<T, ColorGeometry4b>) {
ColorGeometry4f color = color_attribute.varray.get<ColorGeometry4b>(i).decode();
transform_fn(color);
ColorGeometry4b color_encoded = color.encode();
color_attribute.varray.set_by_copy(i, &color_encoded);
}
selection.foreach_index(GrainSize(1024), [&](const int i) {
if constexpr (std::is_same_v<T, ColorGeometry4f>) {
ColorGeometry4f color = color_attribute.varray.get<ColorGeometry4f>(i);
transform_fn(color);
color_attribute.varray.set_by_copy(i, &color);
}
else if constexpr (std::is_same_v<T, ColorGeometry4b>) {
ColorGeometry4f color = color_attribute.varray.get<ColorGeometry4b>(i).decode();
transform_fn(color);
ColorGeometry4b color_encoded = color.encode();
color_attribute.varray.set_by_copy(i, &color_encoded);
}
});
});