This repository has been archived on 2023-10-09. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
blender-archive/source/blender/nodes/shader/nodes/node_shader_sepcomb_xyz.cc
Jacques Lucke b8bd304bd4 Geometry Nodes: speedup Separate XYZ node
This speeds up the node ~20% in common cases, e.g. when only the
X axis is used. The main optimization comes from not writing to memory
that's not used afterwards anymore anyway.

The "optimal code" for just extracting the x axis in a separate loop was
not faster for me. That indicates that the node is bottlenecked by
memory bandwidth, which seems reasonable.
2022-06-18 13:41:08 +02:00

150 lines
4.6 KiB
C++

/* SPDX-License-Identifier: GPL-2.0-or-later
* Copyright 2014 Blender Foundation. All rights reserved. */
/** \file
* \ingroup shdnodes
*/
#include "node_shader_util.hh"
namespace blender::nodes::node_shader_sepcomb_xyz_cc {
static void sh_node_sepxyz_declare(NodeDeclarationBuilder &b)
{
b.is_function_node();
b.add_input<decl::Vector>(N_("Vector")).min(-10000.0f).max(10000.0f);
b.add_output<decl::Float>(N_("X"));
b.add_output<decl::Float>(N_("Y"));
b.add_output<decl::Float>(N_("Z"));
}
static int gpu_shader_sepxyz(GPUMaterial *mat,
bNode *node,
bNodeExecData *UNUSED(execdata),
GPUNodeStack *in,
GPUNodeStack *out)
{
return GPU_stack_link(mat, node, "separate_xyz", in, out);
}
class MF_SeparateXYZ : public fn::MultiFunction {
public:
MF_SeparateXYZ()
{
static fn::MFSignature signature = create_signature();
this->set_signature(&signature);
}
static fn::MFSignature create_signature()
{
fn::MFSignatureBuilder signature{"Separate XYZ"};
signature.single_input<float3>("XYZ");
signature.single_output<float>("X");
signature.single_output<float>("Y");
signature.single_output<float>("Z");
return signature.build();
}
void call(IndexMask mask, fn::MFParams params, fn::MFContext UNUSED(context)) const override
{
const VArray<float3> &vectors = params.readonly_single_input<float3>(0, "XYZ");
MutableSpan<float> xs = params.uninitialized_single_output_if_required<float>(1, "X");
MutableSpan<float> ys = params.uninitialized_single_output_if_required<float>(2, "Y");
MutableSpan<float> zs = params.uninitialized_single_output_if_required<float>(3, "Z");
std::array<MutableSpan<float>, 3> outputs = {xs, ys, zs};
Vector<int> used_outputs;
if (!xs.is_empty()) {
used_outputs.append(0);
}
if (!ys.is_empty()) {
used_outputs.append(1);
}
if (!zs.is_empty()) {
used_outputs.append(2);
}
devirtualize_varray(vectors, [&](auto vectors) {
mask.to_best_mask_type([&](auto mask) {
const int used_outputs_num = used_outputs.size();
const int *used_outputs_data = used_outputs.data();
for (const int64_t i : mask) {
const float3 &vector = vectors[i];
for (const int out_i : IndexRange(used_outputs_num)) {
const int coordinate = used_outputs_data[out_i];
outputs[coordinate][i] = vector[coordinate];
}
}
});
});
}
};
static void sh_node_sepxyz_build_multi_function(NodeMultiFunctionBuilder &builder)
{
static MF_SeparateXYZ separate_fn;
builder.set_matching_fn(separate_fn);
}
} // namespace blender::nodes::node_shader_sepcomb_xyz_cc
void register_node_type_sh_sepxyz()
{
namespace file_ns = blender::nodes::node_shader_sepcomb_xyz_cc;
static bNodeType ntype;
sh_fn_node_type_base(&ntype, SH_NODE_SEPXYZ, "Separate XYZ", NODE_CLASS_CONVERTER);
ntype.declare = file_ns::sh_node_sepxyz_declare;
node_type_gpu(&ntype, file_ns::gpu_shader_sepxyz);
ntype.build_multi_function = file_ns::sh_node_sepxyz_build_multi_function;
nodeRegisterType(&ntype);
}
namespace blender::nodes::node_shader_sepcomb_xyz_cc {
static void sh_node_combxyz_declare(NodeDeclarationBuilder &b)
{
b.is_function_node();
b.add_input<decl::Float>(N_("X")).min(-10000.0f).max(10000.0f);
b.add_input<decl::Float>(N_("Y")).min(-10000.0f).max(10000.0f);
b.add_input<decl::Float>(N_("Z")).min(-10000.0f).max(10000.0f);
b.add_output<decl::Vector>(N_("Vector"));
}
static int gpu_shader_combxyz(GPUMaterial *mat,
bNode *node,
bNodeExecData *UNUSED(execdata),
GPUNodeStack *in,
GPUNodeStack *out)
{
return GPU_stack_link(mat, node, "combine_xyz", in, out);
}
static void sh_node_combxyz_build_multi_function(NodeMultiFunctionBuilder &builder)
{
static fn::CustomMF_SI_SI_SI_SO<float, float, float, float3> fn{
"Combine Vector",
[](float x, float y, float z) { return float3(x, y, z); },
fn::CustomMF_presets::AllSpanOrSingle()};
builder.set_matching_fn(fn);
}
} // namespace blender::nodes::node_shader_sepcomb_xyz_cc
void register_node_type_sh_combxyz()
{
namespace file_ns = blender::nodes::node_shader_sepcomb_xyz_cc;
static bNodeType ntype;
sh_fn_node_type_base(&ntype, SH_NODE_COMBXYZ, "Combine XYZ", NODE_CLASS_CONVERTER);
ntype.declare = file_ns::sh_node_combxyz_declare;
node_type_gpu(&ntype, file_ns::gpu_shader_combxyz);
ntype.build_multi_function = file_ns::sh_node_combxyz_build_multi_function;
nodeRegisterType(&ntype);
}