Geometry Nodes: use lazy evaluation in repeat zone #112421

Merged
Jacques Lucke merged 26 commits from JacquesLucke/blender:lazy-repeat-zone into main 2023-09-22 08:58:26 +02:00
3 changed files with 365 additions and 171 deletions

View File

@ -491,6 +491,9 @@ static void find_side_effect_nodes_for_viewer_path(
return;
}
local_side_effect_nodes.nodes_by_context.add(compute_context_builder.hash(), lf_zone_node);
local_side_effect_nodes.iterations_by_repeat_zone.add(
{compute_context_builder.hash(), typed_elem.repeat_output_node_id},
typed_elem.iteration);
compute_context_builder.push<bke::RepeatZoneComputeContext>(*next_zone->output_node,
typed_elem.iteration);
zone = next_zone;
@ -556,6 +559,9 @@ static void find_side_effect_nodes_for_viewer_path(
for (const auto item : local_side_effect_nodes.nodes_by_context.items()) {
r_side_effect_nodes.nodes_by_context.add_multiple(item.key, item.value);
}
for (const auto item : local_side_effect_nodes.iterations_by_repeat_zone.items()) {
r_side_effect_nodes.iterations_by_repeat_zone.add_multiple(item.key, item.value);
}
}
static void find_side_effect_nodes(const NodesModifierData &nmd,

View File

@ -130,6 +130,11 @@ class GeoNodesSimulationParams {
struct GeoNodesSideEffectNodes {
MultiValueMap<ComputeContextHash, const lf::FunctionNode *> nodes_by_context;
/**
* The repeat zone is identified by the compute context of the parent and the identifier of the
JacquesLucke marked this conversation as resolved Outdated

I guess it's the compute context of the parent of the repeat zone? Might be worth clarifying

I guess it's the compute context of the _parent_ of the repeat zone? Might be worth clarifying
* repeat output node.
*/
MultiValueMap<std::pair<ComputeContextHash, int32_t>, int> iterations_by_repeat_zone;
};
/**

View File

@ -1392,6 +1392,159 @@ struct ZoneBodyFunction {
ZoneFunctionIndices indices;
};
/**
* Wraps the execution of a repeat loop body. The purpose is to setup the correct #ComputeContext
* inside of the loop body. This is necessary to support correct logging inside of a repeat zone.
* An alternative would be to use a separate `LazyFunction` for every iteration, but that would
* have higher overhead.
*/
class RepeatBodyNodeExecuteWrapper : public lf::GraphExecutorNodeExecuteWrapper {
public:
const bNode *repeat_output_bnode_ = nullptr;
VectorSet<lf::FunctionNode *> *lf_body_nodes_ = nullptr;
void execute_node(const lf::FunctionNode &node,
lf::Params &params,
const lf::Context &context) const
{
GeoNodesLFUserData &user_data = *static_cast<GeoNodesLFUserData *>(context.user_data);
const int iteration = lf_body_nodes_->index_of_try(const_cast<lf::FunctionNode *>(&node));
const LazyFunction &fn = node.function();
if (iteration == -1) {
/* The node is not a loop body node, just execute it normally. */
fn.execute(params, context);
return;
}
/* Setup context for the loop body evaluation. */
bke::RepeatZoneComputeContext body_compute_context{
user_data.compute_context, *repeat_output_bnode_, iteration};
GeoNodesLFUserData body_user_data = user_data;
body_user_data.compute_context = &body_compute_context;
if (user_data.modifier_data && user_data.modifier_data->socket_log_contexts) {
body_user_data.log_socket_values = user_data.modifier_data->socket_log_contexts->contains(
body_compute_context.hash());
}
GeoNodesLFLocalUserData body_local_user_data{body_user_data};
lf::Context body_context{context.storage, &body_user_data, &body_local_user_data};
/* Actually execute the loop body. */
fn.execute(params, body_context);
}
};
/**
* Knows which iterations of the loop evaluation have side effects.
*/
class RepeatZoneSideEffectProvider : public lf::GraphExecutorSideEffectProvider {
public:
const bNode *repeat_output_bnode_ = nullptr;
Span<lf::FunctionNode *> lf_body_nodes_;
Vector<const lf::FunctionNode *> get_nodes_with_side_effects(
const lf::Context &context) const override
{
GeoNodesLFUserData &user_data = *static_cast<GeoNodesLFUserData *>(context.user_data);
if (!user_data.modifier_data) {
return {};
}
if (!user_data.modifier_data->side_effect_nodes) {
return {};
}
const ComputeContextHash &context_hash = user_data.compute_context->hash();
const Span<int> iterations_with_side_effects =
user_data.modifier_data->side_effect_nodes->iterations_by_repeat_zone.lookup(
{context_hash, repeat_output_bnode_->identifier});
Vector<const lf::FunctionNode *> lf_nodes;
for (const int i : iterations_with_side_effects) {
if (i >= 0 && i < lf_body_nodes_.size()) {
lf_nodes.append(lf_body_nodes_[i]);
}
}
return lf_nodes;
}
};
struct RepeatEvalStorage {
LinearAllocator<> allocator;
VectorSet<lf::FunctionNode *> lf_body_nodes;
lf::Graph graph;
std::optional<LazyFunctionForLogicalOr> or_function;
std::optional<RepeatZoneSideEffectProvider> side_effect_provider;
std::optional<RepeatBodyNodeExecuteWrapper> body_execute_wrapper;
std::optional<lf::GraphExecutor> graph_executor;
void *graph_executor_storage = nullptr;
bool multi_threading_enabled = false;
IndexRange input_index_map;
Vector<int> output_index_map;
};
class ParamsForRepeatZoneGraph : public lf::Params {
private:
lf::Params &zone_params_;
RepeatEvalStorage &eval_storage_;
public:
ParamsForRepeatZoneGraph(RepeatEvalStorage &eval_storage, lf::Params &zone_params)
: lf::Params(*eval_storage.graph_executor, eval_storage.multi_threading_enabled),
zone_params_{zone_params},
eval_storage_(eval_storage)
{
}
int map_input_index(const int index) const
{
return eval_storage_.input_index_map[index];
}
int map_output_index(const int index) const
{
return eval_storage_.output_index_map[index];
}
void *try_get_input_data_ptr_impl(const int index) const
{
return zone_params_.try_get_input_data_ptr(this->map_input_index(index));
}
void *try_get_input_data_ptr_or_request_impl(const int index)
{
return zone_params_.try_get_input_data_ptr_or_request(this->map_input_index(index));
}
void *get_output_data_ptr_impl(const int index)
{
return zone_params_.get_output_data_ptr(this->map_output_index(index));
}
void output_set_impl(const int index)
{
return zone_params_.output_set(this->map_output_index(index));
}
bool output_was_set_impl(const int index) const
{
return zone_params_.output_was_set(this->map_output_index(index));
}
lf::ValueUsage get_output_usage_impl(const int index) const
{
return zone_params_.get_output_usage(this->map_output_index(index));
}
void set_input_unused_impl(const int index)
{
return zone_params_.set_input_unused(this->map_input_index(index));
}
bool try_enable_multi_threading_impl()
{
if (eval_storage_.multi_threading_enabled) {
return true;
}
if (zone_params_.try_enable_multi_threading()) {
eval_storage_.multi_threading_enabled = true;
return true;
}
return false;
}
};
class LazyFunctionForRepeatZone : public LazyFunction {
private:
const bNodeTreeZone &zone_;
@ -1410,19 +1563,23 @@ class LazyFunctionForRepeatZone : public LazyFunction {
{
debug_name_ = "Repeat Zone";
for (const bNodeSocket *socket : zone.input_node->input_sockets().drop_back(1)) {
inputs_.append_as(socket->name, *socket->typeinfo->geometry_nodes_cpp_type);
inputs_.append_as("Iterations", CPPType::get<ValueOrField<int>>(), lf::ValueUsage::Used);
for (const bNodeSocket *socket : zone.input_node->input_sockets().drop_front(1).drop_back(1)) {
inputs_.append_as(
socket->name, *socket->typeinfo->geometry_nodes_cpp_type, lf::ValueUsage::Maybe);
}
zone_info.indices.inputs.main = inputs_.index_range();
for (const bNodeLink *link : zone.border_links) {
inputs_.append_as(link->fromsock->name, *link->tosock->typeinfo->geometry_nodes_cpp_type);
inputs_.append_as(link->fromsock->name,
*link->tosock->typeinfo->geometry_nodes_cpp_type,
lf::ValueUsage::Maybe);
}
zone_info.indices.inputs.border_links = inputs_.index_range().take_back(
zone.border_links.size());
for (const bNodeSocket *socket : zone.output_node->output_sockets().drop_back(1)) {
mod_moder marked this conversation as resolved Outdated

Can it be IndexRange instead of Vector, filled by iota?

Can it be `IndexRange` instead of `Vector`, filled by `iota`?

Note that the output_index_map is not an IndexRange, the input map could be one in theory though.

Note that the `output_index_map` is not an `IndexRange`, the input map could be one in theory though.

Main reason I didn't do this before is that I was contemplating to make this index-mapped-params a more general thing, but that doesn't seem necessary currently.

Main reason I didn't do this before is that I was contemplating to make this index-mapped-params a more general thing, but that doesn't seem necessary currently.
inputs_.append_as("Usage", CPPType::get<bool>());
inputs_.append_as("Usage", CPPType::get<bool>(), lf::ValueUsage::Maybe);
outputs_.append_as(socket->name, *socket->typeinfo->geometry_nodes_cpp_type);
}
zone_info.indices.inputs.output_usages = inputs_.index_range().take_back(
@ -1443,203 +1600,229 @@ class LazyFunctionForRepeatZone : public LazyFunction {
for (const auto item : body_fn_.indices.inputs.attributes_by_field_source_index.items()) {
const int index = inputs_.append_and_get_index_as(
"Attribute Set", CPPType::get<bke::AnonymousAttributeSet>());
"Attribute Set", CPPType::get<bke::AnonymousAttributeSet>(), lf::ValueUsage::Maybe);
zone_info.indices.inputs.attributes_by_field_source_index.add_new(item.key, index);
}
for (const auto item : body_fn_.indices.inputs.attributes_by_caller_propagation_index.items())
{
const int index = inputs_.append_and_get_index_as(
"Attribute Set", CPPType::get<bke::AnonymousAttributeSet>());
"Attribute Set", CPPType::get<bke::AnonymousAttributeSet>(), lf::ValueUsage::Maybe);
zone_info.indices.inputs.attributes_by_caller_propagation_index.add_new(item.key, index);
}
}
void *init_storage(LinearAllocator<> &allocator) const override
{
return allocator.construct<RepeatEvalStorage>().release();
}
void destruct_storage(void *storage) const override
{
RepeatEvalStorage *s = static_cast<RepeatEvalStorage *>(storage);
if (s->graph_executor_storage) {
s->graph_executor->destruct_storage(s->graph_executor_storage);
}
std::destroy_at(s);
}
void execute_impl(lf::Params &params, const lf::Context &context) const override
{
GeoNodesLFUserData &user_data = *static_cast<GeoNodesLFUserData *>(context.user_data);
const NodeGeometryRepeatOutput &node_storage = *static_cast<const NodeGeometryRepeatOutput *>(
repeat_output_bnode_.storage);
RepeatEvalStorage &eval_storage = *static_cast<RepeatEvalStorage *>(context.storage);
const int iterations_usage_index = zone_info_.indices.outputs.input_usages[0];
if (params.output_was_set(iterations_usage_index)) {
/* The iterations input is always used. */
params.set_output(iterations_usage_index, true);
}
if (!eval_storage.graph_executor) {
/* Create the execution graph in the first evaluation. */
this->initialize_execution_graph(params, eval_storage, node_storage);
}
/* Execute the graph for the repeat zone. */
ParamsForRepeatZoneGraph eval_graph_params{eval_storage, params};
lf::Context eval_graph_context{
eval_storage.graph_executor_storage, context.user_data, context.local_user_data};
eval_storage.graph_executor->execute(eval_graph_params, eval_graph_context);
}
/**
* Generate a lazy-function graph that contains the loop body (`body_fn_`) as many times
* as there are iterations. Since this graph depends on the number of iterations, it can't be
* reused in general. We could consider caching a version of this graph per number of iterations,
* but right now that doesn't seem worth it. In practice, it takes much less time to create the
* graph than to execute it (for intended use cases of this generic implementation, more special
* case repeat loop evaluations could be implemented separately).
*/
void initialize_execution_graph(lf::Params &params,
RepeatEvalStorage &eval_storage,
const NodeGeometryRepeatOutput &node_storage) const
{
const int num_repeat_items = node_storage.items_num;
const int num_border_links = body_fn_.indices.inputs.border_links.size();
/* Number of iterations to evaluate. */
const int iterations = std::max<int>(
0, params.get_input<ValueOrField<int>>(zone_info_.indices.inputs.main[0]).as_value());
/* Take iterations input into account. */
const int main_inputs_offset = 1;
const int repeat_items_num = node_storage.items_num;
/* Gather data types of the repeat items. */
Array<const CPPType *> repeat_item_types(repeat_items_num);
for (const int i : body_fn_.indices.inputs.main.index_range()) {
const int input_i = body_fn_.indices.inputs.main[i];
const CPPType &type = *body_fn_.function->inputs()[input_i].type;
repeat_item_types[i] = &type;
lf::Graph &lf_graph = eval_storage.graph;
Vector<lf::GraphInputSocket *> lf_inputs;
Vector<lf::GraphOutputSocket *> lf_outputs;
for (const int i : inputs_.index_range()) {
const lf::Input &input = inputs_[i];
lf_inputs.append(&lf_graph.add_input(*input.type, input.debug_name));
}
for (const int i : outputs_.index_range()) {
const lf::Output &output = outputs_[i];
lf_outputs.append(&lf_graph.add_output(*output.type, output.debug_name));
}
LinearAllocator<> allocator;
Array<void *, 64> repeat_item_values((iterations + 1) * repeat_items_num, nullptr);
/* Allocate memory for the looped values. */
for (const int iteration : IndexRange(iterations)) {
MutableSpan<void *> item_values = repeat_item_values.as_mutable_span().slice(
(iteration + 1) * repeat_items_num, repeat_items_num);
for (const int item_i : IndexRange(repeat_items_num)) {
const CPPType &type = *repeat_item_types[item_i];
void *buffer = allocator.allocate(type.size(), type.alignment());
item_values[item_i] = buffer;
/* Create body nodes. */
VectorSet<lf::FunctionNode *> &lf_body_nodes = eval_storage.lf_body_nodes;
for ([[maybe_unused]] const int i : IndexRange(iterations)) {
lf::FunctionNode &lf_node = lf_graph.add_function(*body_fn_.function);
lf_body_nodes.add_new(&lf_node);
}
/* Create nodes for combining border link usages. A border link is used when any of the loop
* bodies uses the border link, so an "or" node is necessary. */
Array<lf::FunctionNode *> lf_border_link_usage_or_nodes(num_border_links);
eval_storage.or_function.emplace(iterations);
for (const int i : IndexRange(num_border_links)) {
lf::FunctionNode &lf_node = lf_graph.add_function(*eval_storage.or_function);
lf_border_link_usage_or_nodes[i] = &lf_node;
}
/* Handle body nodes one by one. */
for (const int iter_i : lf_body_nodes.index_range()) {
lf::FunctionNode &lf_node = *lf_body_nodes[iter_i];
for (const int i : IndexRange(num_border_links)) {
lf_graph.add_link(*lf_inputs[zone_info_.indices.inputs.border_links[i]],
lf_node.input(body_fn_.indices.inputs.border_links[i]));
lf_graph.add_link(lf_node.output(body_fn_.indices.outputs.border_link_usages[i]),
lf_border_link_usage_or_nodes[i]->input(iter_i));
}
}
/* Load the inputs of the first repeat iteration. */
MutableSpan<void *> first_item_values = repeat_item_values.as_mutable_span().take_front(
repeat_items_num);
for (const int i : IndexRange(repeat_items_num)) {
/* +1 because of the iterations input. */
const int input_index = zone_info_.indices.inputs.main[i + 1];
void *value = params.try_get_input_data_ptr(input_index);
BLI_assert(value != nullptr);
first_item_values[i] = value;
}
/* Load border link values. */
const int border_links_num = zone_info_.indices.inputs.border_links.size();
Array<void *> border_link_input_values(border_links_num, nullptr);
for (const int i : IndexRange(border_links_num)) {
const int input_index = zone_info_.indices.inputs.border_links[i];
void *value = params.try_get_input_data_ptr(input_index);
BLI_assert(value != nullptr);
border_link_input_values[i] = value;
}
/* Load attribute sets that are needed to propagate attributes correctly in the zone. */
Map<int, bke::AnonymousAttributeSet *> attribute_set_by_field_source_index;
Map<int, bke::AnonymousAttributeSet *> attribute_set_by_caller_propagation_index;
for (const auto item : zone_info_.indices.inputs.attributes_by_field_source_index.items()) {
bke::AnonymousAttributeSet &attribute_set = params.get_input<bke::AnonymousAttributeSet>(
item.value);
attribute_set_by_field_source_index.add_new(item.key, &attribute_set);
}
for (const auto item :
zone_info_.indices.inputs.attributes_by_caller_propagation_index.items()) {
bke::AnonymousAttributeSet &attribute_set = params.get_input<bke::AnonymousAttributeSet>(
item.value);
attribute_set_by_caller_propagation_index.add_new(item.key, &attribute_set);
}
const int body_inputs_num = body_fn_.function->inputs().size();
const int body_outputs_num = body_fn_.function->outputs().size();
/* Evaluate the repeat zone eagerly, one iteration at a time.
* This can be made more lazy as a separate step. */
for (const int iteration : IndexRange(iterations)) {
/* Prepare all data that has to be passed into the evaluation of the repeat zone body. */
Array<GMutablePointer> inputs(body_inputs_num);
Array<GMutablePointer> outputs(body_outputs_num);
Array<std::optional<lf::ValueUsage>> input_usages(body_inputs_num);
Array<lf::ValueUsage> output_usages(body_outputs_num, lf::ValueUsage::Used);
Array<bool> set_outputs(body_outputs_num, false);
/* Prepare pointers to the main input and output values of the repeat zone,
* as well as their usages. */
Array<bool> tmp_main_input_usages(repeat_items_num);
for (const int i : IndexRange(repeat_items_num)) {
const CPPType &type = *repeat_item_types[i];
void *prev_value = repeat_item_values[iteration * repeat_items_num + i];
void *next_value = repeat_item_values[(iteration + 1) * repeat_items_num + i];
inputs[body_fn_.indices.inputs.main[i]] = {type, prev_value};
outputs[body_fn_.indices.outputs.main[i]] = {type, next_value};
outputs[body_fn_.indices.outputs.input_usages[i]] = &tmp_main_input_usages[i];
}
static bool static_true = true;
for (const int input_index : body_fn_.indices.inputs.output_usages) {
/* All main outputs are used currently. */
inputs[input_index] = &static_true;
}
/* Prepare border link values for the repeat body. */
Array<bool> tmp_border_link_usages(border_links_num);
for (const int i : IndexRange(border_links_num)) {
const int input_index = body_fn_.indices.inputs.border_links[i];
const int usage_index = body_fn_.indices.outputs.border_link_usages[i];
const CPPType &type = *body_fn_.function->inputs()[input_index].type;
/* Need to copy because a lazy function is allowed to modify the input (e.g. move from
* it). */
void *value_copy = allocator.allocate(type.size(), type.alignment());
type.copy_construct(border_link_input_values[i], value_copy);
inputs[input_index] = {type, value_copy};
outputs[usage_index] = &tmp_border_link_usages[i];
}
/* Prepare attribute sets that are passed into the repeat body. */
for (const auto item : body_fn_.indices.inputs.attributes_by_field_source_index.items()) {
bke::AnonymousAttributeSet &attribute_set =
*allocator
.construct<bke::AnonymousAttributeSet>(
*attribute_set_by_field_source_index.lookup(item.key))
.release();
inputs[item.value] = &attribute_set;
lf_graph.add_link(
*lf_inputs[zone_info_.indices.inputs.attributes_by_field_source_index.lookup(
item.key)],
lf_node.input(item.value));
}
for (const auto item :
body_fn_.indices.inputs.attributes_by_caller_propagation_index.items()) {
bke::AnonymousAttributeSet &attribute_set =
*allocator
.construct<bke::AnonymousAttributeSet>(
*attribute_set_by_caller_propagation_index.lookup(item.key))
.release();
inputs[item.value] = &attribute_set;
}
/* Prepare evaluation context for the repeat body. */
bke::RepeatZoneComputeContext body_compute_context{
user_data.compute_context, repeat_output_bnode_, iteration};
GeoNodesLFUserData body_user_data = user_data;
body_user_data.compute_context = &body_compute_context;
if (user_data.modifier_data && user_data.modifier_data->socket_log_contexts) {
body_user_data.log_socket_values = user_data.modifier_data->socket_log_contexts->contains(
body_compute_context.hash());
}
GeoNodesLFLocalUserData body_local_user_data{body_user_data};
void *body_storage = body_fn_.function->init_storage(allocator);
lf::Context body_context{body_storage, &body_user_data, &body_local_user_data};
lf::BasicParams body_params{
*body_fn_.function, inputs, outputs, input_usages, output_usages, set_outputs};
/* Actually evaluate the repeat body. */
body_fn_.function->execute(body_params, body_context);
/* Destruct values that are not needed after the evaluation anymore. */
body_fn_.function->destruct_storage(body_storage);
for (const int i : body_fn_.indices.inputs.border_links) {
inputs[i].destruct();
}
for (const int i : body_fn_.indices.inputs.attributes_by_field_source_index.values()) {
inputs[i].destruct();
}
for (const int i : body_fn_.indices.inputs.attributes_by_caller_propagation_index.values()) {
inputs[i].destruct();
lf_graph.add_link(
*lf_inputs[zone_info_.indices.inputs.attributes_by_caller_propagation_index.lookup(
item.key)],
lf_node.input(item.value));
}
}
/* Set outputs of the repeat zone. */
for (const int i : IndexRange(repeat_items_num)) {
void *computed_value = repeat_item_values[iterations * repeat_items_num + i];
const int output_index = zone_info_.indices.outputs.main[i];
void *r_value = params.get_output_data_ptr(output_index);
const CPPType &type = *repeat_item_types[i];
type.move_construct(computed_value, r_value);
params.output_set(output_index);
}
for (const int i : zone_info_.indices.outputs.input_usages) {
params.set_output(i, true);
}
for (const int i : IndexRange(border_links_num)) {
params.set_output(zone_info_.indices.outputs.border_link_usages[i], true);
}
/* Destruct remaining values. */
for (const int iteration : IndexRange(iterations)) {
MutableSpan<void *> item_values = repeat_item_values.as_mutable_span().slice(
(iteration + 1) * repeat_items_num, repeat_items_num);
for (const int item_i : IndexRange(repeat_items_num)) {
const CPPType &type = *repeat_item_types[item_i];
type.destruct(item_values[item_i]);
/* Handle body nodes pair-wise. */
for (const int iter_i : lf_body_nodes.index_range().drop_back(1)) {
lf::FunctionNode &lf_node = *lf_body_nodes[iter_i];
lf::FunctionNode &lf_next_node = *lf_body_nodes[iter_i + 1];
for (const int i : IndexRange(num_repeat_items)) {
lf_graph.add_link(lf_node.output(body_fn_.indices.outputs.main[i]),
lf_next_node.input(body_fn_.indices.inputs.main[i]));
/* TODO: Add back-link after being able to check for cyclic dependencies. */
// lf_graph.add_link(lf_next_node.output(body_fn_.indices.outputs.input_usages[i]),
// lf_node.input(body_fn_.indices.inputs.output_usages[i]));
static bool static_true = true;
lf_node.input(body_fn_.indices.inputs.output_usages[i]).set_default_value(&static_true);
}
}
/* Handle border link usage outputs. */
for (const int i : IndexRange(num_border_links)) {
lf_graph.add_link(lf_border_link_usage_or_nodes[i]->output(0),
*lf_outputs[zone_info_.indices.outputs.border_link_usages[i]]);
}
if (iterations > 0) {
{
/* Link first body node to input/output nodes. */
lf::FunctionNode &lf_first_body_node = *lf_body_nodes[0];
for (const int i : IndexRange(num_repeat_items)) {
lf_graph.add_link(*lf_inputs[zone_info_.indices.inputs.main[i + main_inputs_offset]],
lf_first_body_node.input(body_fn_.indices.inputs.main[i]));
lf_graph.add_link(
lf_first_body_node.output(body_fn_.indices.outputs.input_usages[i]),
JacquesLucke marked this conversation as resolved Outdated

Typo: contains contains

Typo: `contains contains`
*lf_outputs[zone_info_.indices.outputs.input_usages[i + main_inputs_offset]]);
}
}
{
/* Link last body node to input/output nodes. */
lf::FunctionNode &lf_last_body_node = *lf_body_nodes.as_span().last();
for (const int i : IndexRange(num_repeat_items)) {
lf_graph.add_link(lf_last_body_node.output(body_fn_.indices.outputs.main[i]),
*lf_outputs[zone_info_.indices.outputs.main[i]]);
lf_graph.add_link(*lf_inputs[zone_info_.indices.inputs.output_usages[i]],
lf_last_body_node.input(body_fn_.indices.inputs.output_usages[i]));
}
}
}
else {
/* There are no iterations, just link the input directly to the output. */
for (const int i : IndexRange(num_repeat_items)) {
lf_graph.add_link(*lf_inputs[zone_info_.indices.inputs.main[i + main_inputs_offset]],
*lf_outputs[zone_info_.indices.outputs.main[i]]);
lf_graph.add_link(
*lf_inputs[zone_info_.indices.inputs.output_usages[i]],
*lf_outputs[zone_info_.indices.outputs.input_usages[i + main_inputs_offset]]);
}
for (const int i : IndexRange(num_border_links)) {
static bool static_false = false;
lf_outputs[zone_info_.indices.outputs.border_link_usages[i]]->set_default_value(
&static_false);
}
}
/* The graph is ready, update the node indices which are required by the executor. */
lf_graph.update_node_indices();
// std::cout << "\n\n" << lf_graph.to_dot() << "\n\n";
/* Create a mapping from parameter indices inside of this graph to parameters of the repeat
* zone. The main complexity below stems from the fact that the iterations input is handled
* outside of this graph. */
JacquesLucke marked this conversation as resolved Outdated

Shorter alias: lf_body_nodes.add_new(&lf_node);;

Shorter alias: `lf_body_nodes.add_new(&lf_node);`;
eval_storage.output_index_map.reinitialize(outputs_.size() - 1);
eval_storage.input_index_map = inputs_.index_range().drop_front(1);
Vector<const lf::GraphInputSocket *> lf_graph_inputs = lf_inputs.as_span().drop_front(1);
const int iteration_usage_index = zone_info_.indices.outputs.input_usages[0];
std::iota(eval_storage.output_index_map.begin(),
eval_storage.output_index_map.begin() + iteration_usage_index,
0);
std::iota(eval_storage.output_index_map.begin() + iteration_usage_index,
eval_storage.output_index_map.end(),
iteration_usage_index + 1);
Vector<const lf::GraphOutputSocket *> lf_graph_outputs = lf_outputs.as_span().take_front(
iteration_usage_index);
lf_graph_outputs.extend(lf_outputs.as_span().drop_front(iteration_usage_index + 1));
eval_storage.body_execute_wrapper.emplace();
eval_storage.body_execute_wrapper->repeat_output_bnode_ = &repeat_output_bnode_;
eval_storage.body_execute_wrapper->lf_body_nodes_ = &lf_body_nodes;
eval_storage.side_effect_provider.emplace();
eval_storage.side_effect_provider->repeat_output_bnode_ = &repeat_output_bnode_;
eval_storage.side_effect_provider->lf_body_nodes_ = lf_body_nodes;
eval_storage.graph_executor.emplace(lf_graph,
JacquesLucke marked this conversation as resolved
Review

std::move?

`std::move`?
std::move(lf_graph_inputs),
std::move(lf_graph_outputs),
nullptr,
&*eval_storage.side_effect_provider,
&*eval_storage.body_execute_wrapper);
eval_storage.graph_executor_storage = eval_storage.graph_executor->init_storage(
eval_storage.allocator);
}
std::string input_name(const int i) const override