Geometry Nodes: speedup Separate XYZ node
This speeds up the node ~20% in common cases, e.g. when only the X axis is used. The main optimization comes from not writing to memory that's not used afterwards anymore anyway. The "optimal code" for just extracting the x axis in a separate loop was not faster for me. That indicates that the node is bottlenecked by memory bandwidth, which seems reasonable.
This commit is contained in:
@@ -48,16 +48,36 @@ class MF_SeparateXYZ : public fn::MultiFunction {
|
|||||||
void call(IndexMask mask, fn::MFParams params, fn::MFContext UNUSED(context)) const override
|
void call(IndexMask mask, fn::MFParams params, fn::MFContext UNUSED(context)) const override
|
||||||
{
|
{
|
||||||
const VArray<float3> &vectors = params.readonly_single_input<float3>(0, "XYZ");
|
const VArray<float3> &vectors = params.readonly_single_input<float3>(0, "XYZ");
|
||||||
MutableSpan<float> xs = params.uninitialized_single_output<float>(1, "X");
|
MutableSpan<float> xs = params.uninitialized_single_output_if_required<float>(1, "X");
|
||||||
MutableSpan<float> ys = params.uninitialized_single_output<float>(2, "Y");
|
MutableSpan<float> ys = params.uninitialized_single_output_if_required<float>(2, "Y");
|
||||||
MutableSpan<float> zs = params.uninitialized_single_output<float>(3, "Z");
|
MutableSpan<float> zs = params.uninitialized_single_output_if_required<float>(3, "Z");
|
||||||
|
|
||||||
for (int64_t i : mask) {
|
std::array<MutableSpan<float>, 3> outputs = {xs, ys, zs};
|
||||||
float3 xyz = vectors[i];
|
Vector<int> used_outputs;
|
||||||
xs[i] = xyz.x;
|
if (!xs.is_empty()) {
|
||||||
ys[i] = xyz.y;
|
used_outputs.append(0);
|
||||||
zs[i] = xyz.z;
|
|
||||||
}
|
}
|
||||||
|
if (!ys.is_empty()) {
|
||||||
|
used_outputs.append(1);
|
||||||
|
}
|
||||||
|
if (!zs.is_empty()) {
|
||||||
|
used_outputs.append(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
devirtualize_varray(vectors, [&](auto vectors) {
|
||||||
|
mask.to_best_mask_type([&](auto mask) {
|
||||||
|
const int used_outputs_num = used_outputs.size();
|
||||||
|
const int *used_outputs_data = used_outputs.data();
|
||||||
|
|
||||||
|
for (const int64_t i : mask) {
|
||||||
|
const float3 &vector = vectors[i];
|
||||||
|
for (const int out_i : IndexRange(used_outputs_num)) {
|
||||||
|
const int coordinate = used_outputs_data[out_i];
|
||||||
|
outputs[coordinate][i] = vector[coordinate];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user