Geometry Node: Index of Nearest #104619
|
@ -1,7 +1,5 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BKE_attribute_math.hh"
|
||||
|
||||
#include "BLI_kdtree.h"
|
||||
#include "BLI_multi_value_map.hh"
|
||||
#include "BLI_task.hh"
|
||||
|
@ -19,6 +17,55 @@ static void node_declare(NodeDeclarationBuilder &b)
|
|||
b.add_output<decl::Bool>(N_("Has Neighbor")).field_source();
|
||||
}
|
||||
|
||||
static KDTree_3d *build_kdtree(const Span<float3> &positions, const IndexMask mask)
|
||||
{
|
||||
KDTree_3d *tree = BLI_kdtree_3d_new(mask.size());
|
||||
for (const int i : mask) {
|
||||
BLI_kdtree_3d_insert(tree, i, positions[i]);
|
||||
mod_moder marked this conversation as resolved
Outdated
|
||||
}
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
We settled on We settled on `Self Group ID` and `Nearest Group ID` in the module meeting.
|
||||
BLI_kdtree_3d_balance(tree);
|
||||
return tree;
|
||||
}
|
||||
|
||||
static int find_nearest_non_self(const KDTree_3d &tree, const float3 &position, const int index)
|
||||
{
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
Pass spans by value Pass spans by value
|
||||
return BLI_kdtree_3d_find_nearest_cb_cpp(
|
||||
&tree, position, 0, [index](const int other, const float * /*co*/, const float /*dist_sq*/) {
|
||||
return index == other ? 0 : 1;
|
||||
});
|
||||
}
|
||||
|
||||
static void find_neighbors(const KDTree_3d &tree,
|
||||
const Span<float3> positions,
|
||||
const IndexMask mask,
|
||||
MutableSpan<int> indices)
|
||||
{
|
||||
threading::parallel_for(mask.index_range(), 1024, [&](const IndexRange range) {
|
||||
for (const int i : mask.slice(range)) {
|
||||
indices[i] = find_nearest_non_self(tree, positions[i], i);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static Vector<IndexMask> masks_from_group_ids(const Span<int> group_ids,
|
||||
const IndexMask mask,
|
||||
Hans Goudey
commented
I don't think there's a benefit to specifying the capture in a case like this (instead of I don't think there's a benefit to specifying the capture in a case like this (instead of `[&]`)
|
||||
MultiValueMap<int, int64_t> &storage)
|
||||
{
|
||||
mask.foreach_index([&](const int i) { storage.add(group_ids[i], i); });
|
||||
Vector<IndexMask> masks;
|
||||
masks.reserve(storage.size());
|
||||
for (const Span<int64_t> indices : storage.values()) {
|
||||
masks.append(indices);
|
||||
}
|
||||
return masks;
|
||||
}
|
||||
|
||||
static Vector<IndexMask> masks_from_group_ids(const Span<int> group_ids,
|
||||
MultiValueMap<int, int64_t> &storage)
|
||||
{
|
||||
return masks_from_group_ids(group_ids, group_ids.index_range(), storage);
|
||||
Jacques Lucke
commented
Can just use a normal for loop here. The performance benefit of using Can just use a normal for loop here. The performance benefit of using `foreach_index` is negligible, because most time is spend in the kdtree traversal.
|
||||
}
|
||||
|
||||
class IndexOfNearestFieldInput final : public bke::GeometryFieldInput {
|
||||
private:
|
||||
const Field<float3> positions_field_;
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
I'd change these names to I'd change these names to `use_group` and `use_search_group` for consistency with your other variable names and because `use` at front sounds more natural.
|
||||
|
@ -26,70 +73,70 @@ class IndexOfNearestFieldInput final : public bke::GeometryFieldInput {
|
|||
|
||||
public:
|
||||
IndexOfNearestFieldInput(Field<float3> positions_field, Field<int> group_field)
|
||||
: bke::GeometryFieldInput(CPPType::get<int>(), "Nearest to"),
|
||||
: bke::GeometryFieldInput(CPPType::get<int>(), "Index of Nearest"),
|
||||
positions_field_(std::move(positions_field)),
|
||||
group_field_(std::move(group_field))
|
||||
Hans Goudey
commented
This comment is a bit vague, but I'm finding it hard to keep track of the separate This comment is a bit vague, but I'm finding it hard to keep track of the separate `group_use` and `group_to_find_use` conditions. I wonder if the first condition could be handled with by splitting logic into a separate function, like `SampleCurveFunction` does with its `sample_curve` lambda.
|
||||
{
|
||||
}
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
No strong opinion, but using regular No strong opinion, but using regular `=` assignment seems more consistent here.
|
||||
|
||||
GVArray get_varray_for_context(const bke::GeometryFieldContext &context,
|
||||
IndexMask mask) const final
|
||||
const IndexMask mask) const final
|
||||
{
|
||||
fn::FieldEvaluator evaluator{context, &mask};
|
||||
if (!context.attributes()) {
|
||||
return {};
|
||||
}
|
||||
const int domain_size = context.attributes()->domain_size(context.domain());
|
||||
fn::FieldEvaluator evaluator{context, domain_size};
|
||||
Hans Goudey
commented
It seems like creating a KD tree for every group ID will mean that the KD tree for the same set of search points will potentially be built multiple times. There should probably be some way to avoid building a tree multiple times for the same key. Overall this area looks much cleaner than the last time I checked though! It seems like creating a KD tree for every group ID will mean that the KD tree for the same set of search points will potentially be built multiple times. There should probably be some way to avoid building a tree multiple times for the same key.
Overall this area looks much cleaner than the last time I checked though!
Iliya Katushenock
commented
After you point it out... it seems I made some kind of logical error in the implementation of switching groups when iterating over them. Or not .. i need to redo this part of the code After you point it out... it seems I made some kind of logical error in the implementation of switching groups when iterating over them. Or not .. i need to redo this part of the code
Hans Goudey
commented
Remove empty line between Remove empty line between `group_indexing` declaration and loop that creates it
|
||||
evaluator.add(positions_field_);
|
||||
evaluator.add(group_field_);
|
||||
evaluator.evaluate();
|
||||
const VArraySpan<float3> positions = evaluator.get_evaluated<float3>(0);
|
||||
const VArray<int> group = evaluator.get_evaluated<int>(1);
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
We can't selectively ignore the mask actually, it may be incorrect to write the output to non-selected indices. So this check really has to be We can't selectively ignore the mask actually, it may be incorrect to write the output to non-selected indices. So this check really has to be `mask.size() == domain_size`
Iliya Katushenock
commented
I see. I see.
Just if domain size is 10000, mask is 9999, is no much sense to compute the cheap mask. I think, is better to just allocate all elements if cheap mask is used.
|
||||
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
Replace this with Replace this with `const bool mask_is_full = mask.size() == domain_size;`
|
||||
const VArray<float3> &positions = evaluator.get_evaluated<float3>(0);
|
||||
const VArray<int> &group = evaluator.get_evaluated<int>(1);
|
||||
Array<int> result(mask.min_array_size());
|
||||
|
||||
MultiValueMap<int, int64_t> group_masks;
|
||||
mask.foreach_index([&](const int index) { group_masks.add(group[index], index); });
|
||||
|
||||
Array<int> indices(mask.min_array_size());
|
||||
|
||||
const auto nearest_for = [this, &positions](const IndexMask mask, MutableSpan<int> r_indices) {
|
||||
devirtualize_varray(positions, [mask, r_indices, this](const auto positions) {
|
||||
KDTree_3d *tree = BLI_kdtree_3d_new(mask.size());
|
||||
mask.foreach_index([tree, positions](const int index) {
|
||||
BLI_kdtree_3d_insert(tree, index, positions[index]);
|
||||
});
|
||||
|
||||
BLI_kdtree_3d_balance(tree);
|
||||
|
||||
threading::parallel_for(mask.index_range(), 512, [&](const IndexRange range) {
|
||||
mask.slice(range).foreach_index([&](const auto index) {
|
||||
r_indices[index] = this->kdtree_find_neighboard(tree, positions[index], index);
|
||||
});
|
||||
});
|
||||
|
||||
BLI_kdtree_3d_free(tree);
|
||||
});
|
||||
};
|
||||
|
||||
for (const Span<int64_t> mask_span : group_masks.values()) {
|
||||
if (mask_span.size() == 1) {
|
||||
indices[mask_span.first()] = -1;
|
||||
}
|
||||
nearest_for(mask_span, indices);
|
||||
if (group.is_single()) {
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
`int key` -> `const int key`
|
||||
const IndexMask full_mask = positions.index_range();
|
||||
KDTree_3d *tree = build_kdtree(positions, full_mask);
|
||||
find_neighbors(*tree, positions, mask, result);
|
||||
BLI_kdtree_3d_free(tree);
|
||||
}
|
||||
else {
|
||||
/* The goal is to build each tree and use it immediately, rather than building all trees and
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
The result array should never need to be larger than The result array should never need to be larger than `mask.min_array_size()`
|
||||
* sampling them later. That should help to keep the tree in caches before balancing and when
|
||||
* sampling many points. */
|
||||
const VArraySpan<int> group_ids(group);
|
||||
MultiValueMap<int, int64_t> group_mask_storage;
|
||||
const Vector<IndexMask> tree_masks = masks_from_group_ids(group_ids, group_mask_storage);
|
||||
|
||||
mod_moder marked this conversation as resolved
Outdated
Jacques Lucke
commented
While forest is used as a technical term for a graph containing multiple trees, I don't think the term should be used for a collection of multiple independent kd trees. Just use While forest is used as a technical term for a graph containing multiple trees, I don't think the term should be used for a collection of multiple independent kd trees. Just use `kdtrees`.
Iliya Katushenock
commented
If i do merge If i do merge `parallel_for`s below, i can delete this vector.
|
||||
return VArray<int>::ForContainer(std::move(indices));
|
||||
}
|
||||
MultiValueMap<int, int64_t> evaluate_masks_storage;
|
||||
Vector<IndexMask> evaluate_masks;
|
||||
if (mask.size() < domain_size) {
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
I think this should be I think this should be `!mask_is_cheap`
Iliya Katushenock
commented
I was isn't invented the best name, I was isn't invented the best name, `mask_is_cheap` == true if computing of indices make sense and cheap mask is used.
|
||||
/* Separate masks for evaluation are only necessary if the mask mask
|
||||
Hans Goudey
commented
TBH I think it's better to just use TBH I think it's better to just use `int64_t` here to avoid duplicating the functions above. Eventually when `IndexMask` is refactored, these could benefit from using that, and the `int64_t` will make that more clear too.
|
||||
* for field input evaluation doesn't have every element selected. */
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
`Vector` -> `Array` here
|
||||
evaluate_masks = masks_from_group_ids(group_ids, mask, evaluate_masks_storage);
|
||||
}
|
||||
|
||||
protected:
|
||||
static int kdtree_find_neighboard(KDTree_3d *tree, const float3 &position, const int &index)
|
||||
{
|
||||
return BLI_kdtree_3d_find_nearest_cb_cpp(
|
||||
tree,
|
||||
position,
|
||||
0,
|
||||
[index](const int other_new_i, const float * /*co*/, const float /*dist_sq*/) {
|
||||
if (index == other_new_i) {
|
||||
return 0;
|
||||
/* The grain size should be larger as each tree gets smaller. */
|
||||
const int avg_tree_size = group_ids.size() / group_mask_storage.size();
|
||||
Hans Goudey
commented
`domain_size > 1024 && use_cheap_mask` -> `domain_size > 1024 && !mask_is_full`
|
||||
const int grain_size = std::max(8192 / avg_tree_size, 1);
|
||||
Iliya Katushenock
commented
Are Are `grain_size` is a power of 2?
Hans Goudey
commented
They don't need to be powers of two, though I think it can be slightly beneficial. Since these numbers are a bit arbitrary anyway I started using powers of two. They don't need to be powers of two, though I think it can be slightly beneficial. Since these numbers are a bit arbitrary anyway I started using powers of two.
Iliya Katushenock
commented
I remember fixing a bug, only related to the fact that there was no power of 2x. I remember fixing a bug, only related to the fact that there was no power of 2x.
For the blur node, I made the search function for the top bit part in the threading namespace.
|
||||
threading::parallel_for(tree_masks.index_range(), grain_size, [&](const IndexRange range) {
|
||||
for (const int i : range) {
|
||||
const IndexMask tree_mask = tree_masks[i];
|
||||
Iliya Katushenock
commented
Why It seems that trying to use a mask to multiply all user groups by groups from the mask leads to much more overhead in most cases. Why `tree_masks[i]` and `evaluate_masks[i]` is groups with the same id? this isn't sorted somether or protected, that mask isn't avoid all elements of some grouop and create offset for all other groups even all of that added in same order.
It seems that trying to use a mask to multiply all user groups by groups from the mask leads to much more overhead in most cases.
If you don't do some kind of analogue of a tree, then it's like doing a boolean through mask1 * mask2. When there is something like bvh. so I think it's easier to just calculate everything. I'm not sure there are many cases now where the mask can actually be a small part.
Hans Goudey
commented
Oh, great point! I think it's still worth having a separate evaluation mask. Even just the set position node with a small selection would benefit from it, I think kd tree lookups are fairly expensive. I guess both masks have to be created at the same time. Oh, great point! I think it's still worth having a separate evaluation mask. Even just the set position node with a small selection would benefit from it, I think kd tree lookups are fairly expensive. I guess both masks have to be created at the same time.
|
||||
const IndexMask evaluate_mask = evaluate_masks.is_empty() ? tree_mask :
|
||||
evaluate_masks[i];
|
||||
if (tree_masks[i].size() < 2) {
|
||||
result.as_mutable_span().fill_indices(evaluate_mask.indices(), 0);
|
||||
}
|
||||
Hans Goudey
commented
I do think one of these indices could be skipped if the selection is complete I do think one of these indices could be skipped if the selection is complete
Hans Goudey
commented
Use Use `IndexMask` instead of `Span<int64_t>` for these `mask_of_tree` and `mask` variables
|
||||
return 1;
|
||||
});
|
||||
else {
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
References typically indicate a lack of ownership, better to stick with a pointer here References typically indicate a lack of ownership, better to stick with a pointer here
|
||||
KDTree_3d *tree = build_kdtree(positions, tree_mask);
|
||||
find_neighbors(*tree, positions, evaluate_mask, result);
|
||||
Jacques Lucke
commented
Feels like this Feels like this `parallel_for` loop and the one below can be combined into one. This could potentially improve multi-threaded performance.
|
||||
BLI_kdtree_3d_free(tree);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
return VArray<int>::ForContainer(std::move(result));
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -99,53 +146,105 @@ class IndexOfNearestFieldInput final : public bke::GeometryFieldInput {
|
|||
group_field_.node().for_each_field_input_recursive(fn);
|
||||
}
|
||||
|
||||
uint64_t hash() const override
|
||||
uint64_t hash() const final
|
||||
{
|
||||
return get_default_hash_2(positions_field_, group_field_);
|
||||
}
|
||||
|
||||
bool is_equal_to(const fn::FieldNode &other) const override
|
||||
bool is_equal_to(const fn::FieldNode &other) const final
|
||||
{
|
||||
if (const IndexOfNearestFieldInput *other_field =
|
||||
dynamic_cast<const IndexOfNearestFieldInput *>(&other)) {
|
||||
if (const auto *other_field = dynamic_cast<const IndexOfNearestFieldInput *>(&other)) {
|
||||
return positions_field_ == other_field->positions_field_ &&
|
||||
group_field_ == other_field->group_field_;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<eAttrDomain> preferred_domain(const GeometryComponent &component) const override
|
||||
std::optional<eAttrDomain> preferred_domain(const GeometryComponent &component) const final
|
||||
{
|
||||
return bke::try_detect_field_domain(component, positions_field_);
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
If using I think it would be fine to retrieve them directly in If using `std::move` with these variables, don't declare them const.
I think it would be fine to retrieve them directly in `make_shared` though, rather than having temporary variables.
|
||||
}
|
||||
};
|
||||
|
||||
class HasNeighborFieldInput final : public bke::GeometryFieldInput {
|
||||
private:
|
||||
const Field<int> group_field_;
|
||||
|
||||
public:
|
||||
HasNeighborFieldInput(Field<int> group_field)
|
||||
: bke::GeometryFieldInput(CPPType::get<bool>(), "Has Neighbor"),
|
||||
group_field_(std::move(group_field))
|
||||
{
|
||||
}
|
||||
|
||||
GVArray get_varray_for_context(const bke::GeometryFieldContext &context,
|
||||
const IndexMask mask) const final
|
||||
{
|
||||
if (!context.attributes()) {
|
||||
return {};
|
||||
}
|
||||
const int domain_size = context.attributes()->domain_size(context.domain());
|
||||
fn::FieldEvaluator evaluator{context, domain_size};
|
||||
evaluator.add(group_field_);
|
||||
evaluator.evaluate();
|
||||
const VArray<int> group = evaluator.get_evaluated<int>(0);
|
||||
|
||||
if (group.is_single()) {
|
||||
return VArray<bool>::ForSingle(true, mask.min_array_size());
|
||||
}
|
||||
|
||||
/* When a group ID is contained in the set, it means there is only one element with that ID. */
|
||||
Map<int, int> counts;
|
||||
const VArraySpan<int> group_span(group);
|
||||
mask.foreach_index([&](const int i) {
|
||||
counts.add_or_modify(
|
||||
group_span[i], [](int *count) { *count = 0; }, [](int *count) { (*count)++; });
|
||||
});
|
||||
Array<bool> result(mask.min_array_size());
|
||||
mask.foreach_index([&](const int i) { result[i] = counts.lookup(group_span[i]) > 1; });
|
||||
mod_moder marked this conversation as resolved
Outdated
Hans Goudey
commented
Oops, I forgot to remove this comment. Oops, I forgot to remove this comment.
|
||||
return VArray<bool>::ForContainer(std::move(result));
|
||||
}
|
||||
|
||||
public:
|
||||
void for_each_field_input_recursive(FunctionRef<void(const FieldInput &)> fn) const
|
||||
{
|
||||
group_field_.node().for_each_field_input_recursive(fn);
|
||||
}
|
||||
|
||||
uint64_t hash() const final
|
||||
{
|
||||
return get_default_hash_2(3984756934876, group_field_);
|
||||
}
|
||||
|
||||
bool is_equal_to(const fn::FieldNode &other) const final
|
||||
{
|
||||
if (const auto *other_field = dynamic_cast<const HasNeighborFieldInput *>(&other)) {
|
||||
return group_field_ == other_field->group_field_;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::optional<eAttrDomain> preferred_domain(const GeometryComponent &component) const final
|
||||
{
|
||||
return bke::try_detect_field_domain(component, group_field_);
|
||||
}
|
||||
};
|
||||
|
||||
static void node_geo_exec(GeoNodeExecParams params)
|
||||
{
|
||||
Field<float3> position_field = params.extract_input<Field<float3>>("Position");
|
||||
Field<int> group_field = params.extract_input<Field<int>>("Group ID");
|
||||
|
||||
Field<int> index_of_nearest_field(std::make_shared<IndexOfNearestFieldInput>(
|
||||
std::move(position_field), std::move(group_field)));
|
||||
|
||||
if (params.output_is_required("Index")) {
|
||||
static auto clamp_fn = mf::build::SI1_SO<int, int>(
|
||||
"Index Clamping",
|
||||
[](const int index) { return math::max(0, index); },
|
||||
mf::build::exec_presets::Materialized());
|
||||
auto clamp_op = std::make_shared<FieldOperation>(
|
||||
FieldOperation(std::move(clamp_fn), {index_of_nearest_field}));
|
||||
params.set_output("Index", Field<int>(clamp_op, 0));
|
||||
params.set_output("Index",
|
||||
Field<int>(std::make_shared<IndexOfNearestFieldInput>(
|
||||
std::move(position_field), group_field)));
|
||||
}
|
||||
|
||||
if (params.output_is_required("Has Neighbor")) {
|
||||
static auto valid_fn = mf::build::SI1_SO<int, bool>(
|
||||
"Index Validating",
|
||||
[](const int index) { return index != -1; },
|
||||
mf::build::exec_presets::Materialized());
|
||||
auto valid_op = std::make_shared<FieldOperation>(
|
||||
FieldOperation(std::move(valid_fn), {std::move(index_of_nearest_field)}));
|
||||
params.set_output("Has Neighbor", Field<bool>(valid_op, 0));
|
||||
params.set_output(
|
||||
"Has Neighbor",
|
||||
Field<bool>(std::make_shared<HasNeighborFieldInput>(std::move(group_field))));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
These identifiers/names are missing the translation macro
N_