WIP: Functions: new local allocator for better memory reuse and performance #104630

Draft
Jacques Lucke wants to merge 44 commits from JacquesLucke/blender:local-allocator into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
1 changed files with 47 additions and 26 deletions
Showing only changes of commit e6c5186dd3 - Show all commits

View File

@ -153,7 +153,8 @@ struct NodeState {
/**
* Set to true once the node is done running for the first time.
*/
bool had_initialization = false;
bool always_used_linked_inputs_requested = false;
bool is_first_execution = true;
/**
* Nodes with side effects should always be executed when their required inputs have been
* computed.
@ -733,41 +734,23 @@ class Executor {
return;
}
if (!node_state.had_initialization) {
/* Initialize storage. */
node_state.storage = fn.init_storage(allocator);
/* Load unlinked inputs. */
for (const int input_index : node.inputs().index_range()) {
const InputSocket &input_socket = node.input(input_index);
if (input_socket.origin() != nullptr) {
continue;
}
InputState &input_state = node_state.inputs[input_index];
const CPPType &type = input_socket.type();
const void *default_value = input_socket.default_value();
BLI_assert(default_value != nullptr);
if (self_.logger_ != nullptr) {
self_.logger_->log_socket_value(input_socket, {type, default_value}, *context_);
}
void *buffer = allocator.allocate(type.size(), type.alignment());
type.copy_construct(default_value, buffer);
this->forward_value_to_input(locked_node, input_state, {type, buffer}, current_task);
}
if (!node_state.always_used_linked_inputs_requested) {
/* Request linked inputs that are always needed. */
const Span<Input> fn_inputs = fn.inputs();
for (const int input_index : fn_inputs.index_range()) {
const Input &fn_input = fn_inputs[input_index];
if (fn_input.usage == ValueUsage::Used) {
const InputSocket &input_socket = node.input(input_index);
this->set_input_required(locked_node, input_socket);
if (input_socket.origin() != nullptr) {
this->set_input_required(locked_node, input_socket);
}
}
}
node_state.had_initialization = true;
node_state.always_used_linked_inputs_requested = true;
}
const bool allow_missing_requested_inputs = fn.allow_missing_requested_inputs();
for (const int input_index : node_state.inputs.index_range()) {
InputState &input_state = node_state.inputs[input_index];
if (input_state.was_ready_for_execution) {
@ -777,7 +760,11 @@ class Executor {
input_state.was_ready_for_execution = true;
continue;
}
if (!fn.allow_missing_requested_inputs()) {
const InputSocket &socket = node.input(input_index);
if (socket.origin() == nullptr) {
continue;
}
if (!allow_missing_requested_inputs) {
if (input_state.usage == ValueUsage::Used) {
return;
}
@ -788,6 +775,40 @@ class Executor {
});
if (node_needs_execution) {
if (node_state.is_first_execution) {
/* Initialize storage. */
node_state.storage = fn.init_storage(allocator);
/* Load unlinked inputs. */
for (const int input_index : node.inputs().index_range()) {
const InputSocket &input_socket = node.input(input_index);
if (input_socket.origin() != nullptr) {
continue;
}
InputState &input_state = node_state.inputs[input_index];
if (input_state.usage == ValueUsage::Unused) {
continue;
}
const CPPType &type = input_socket.type();
const void *default_value = input_socket.default_value();
BLI_assert(default_value != nullptr);
if (self_.logger_ != nullptr) {
self_.logger_->log_socket_value(input_socket, {type, default_value}, *context_);
}
void *buffer = allocator.allocate(type.size(), type.alignment());
type.copy_construct(default_value, buffer);
input_state.value = buffer;
BLI_assert(!input_state.was_ready_for_execution);
input_state.was_ready_for_execution = true;
if (input_state.usage == ValueUsage::Used) {
node_state.missing_required_inputs -= 1;
}
}
node_state.is_first_execution = false;
}
/* Importantly, the node must not be locked when it is executed. That would result in locks
* being hold very long in some cases and results in multiple locks being hold by the same
* thread in the same graph which can lead to deadlocks. */