From 3d3c4216b3fd610c684bf88d5ac2f09d5e0072d0 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 30 Dec 2022 12:35:29 +0100 Subject: [PATCH 01/34] initial allocator --- source/blender/blenlib/BLI_local_pool.hh | 132 ++++++++++++++++++ source/blender/blenlib/CMakeLists.txt | 2 + .../blenlib/tests/BLI_local_pool_test.cc | 18 +++ source/blender/functions/FN_lazy_function.hh | 5 +- .../functions/FN_lazy_function_execute.hh | 3 +- .../FN_lazy_function_graph_executor.hh | 2 +- .../blender/functions/intern/lazy_function.cc | 2 +- .../intern/lazy_function_graph_executor.cc | 41 +++--- source/blender/modifiers/intern/MOD_nodes.cc | 3 +- .../intern/geometry_nodes_lazy_function.cc | 2 +- 10 files changed, 185 insertions(+), 25 deletions(-) create mode 100644 source/blender/blenlib/BLI_local_pool.hh create mode 100644 source/blender/blenlib/tests/BLI_local_pool_test.cc diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh new file mode 100644 index 00000000000..422289c7290 --- /dev/null +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#pragma once + +#include + +#include "BLI_allocator.hh" +#include "BLI_asan.h" +#include "BLI_map.hh" +#include "BLI_math_bits.h" +#include "BLI_stack.hh" +#include "BLI_utility_mixins.hh" +#include "BLI_vector.hh" + +namespace blender { + +class LocalPoolScope { +}; + +template class LocalPool : NonCopyable, NonMovable { + private: + static constexpr int64_t s_alignment = 64; + + const LocalPoolScope &pool_scope_; + Vector> owned_buffers_; + + struct BufferStack { + int64_t element_size = -1; + Stack stack; + }; + + std::array small_stacks_; + std::unique_ptr> large_stacks_; + + BLI_NO_UNIQUE_ADDRESS Allocator allocator_; + + public: + LocalPool(const LocalPoolScope &pool_scope) : pool_scope_(pool_scope) + { + for (const int64_t i : IndexRange(small_stacks_.size())) { + small_stacks_[i].element_size = 8 * (i + 1); + } + } + + ~LocalPool() + { + for (MutableSpan buffer : owned_buffers_) { + BLI_asan_unpoison(buffer.data(), buffer.size()); + allocator_.deallocate(buffer.data()); + } + } + + void *allocate(const int64_t size, const int64_t alignment) + { + BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); + BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); + if (!buffer_stack.stack.is_empty()) { + void *buffer = buffer_stack.stack.pop(); + BLI_asan_unpoison(buffer, size); + return buffer; + } + if (size <= 4096) { + const int64_t allocation_size = std::clamp( + buffer_stack.element_size * 16, 512, 4096); + void *buffer = allocator_.allocate(allocation_size, s_alignment, __func__); + BLI_asan_poison(buffer, allocation_size); + const int64_t num = allocation_size / buffer_stack.element_size; + for (int64_t i = num - 1; i > 0; i--) { + buffer_stack.stack.push(POINTER_OFFSET(buffer, buffer_stack.element_size * i)); + } + owned_buffers_.append({static_cast(buffer), allocation_size}); + BLI_asan_unpoison(buffer, size); + return buffer; + } + void *buffer = allocator_.allocate( + size_t(size), std::max(s_alignment, size_t(alignment)), __func__); + owned_buffers_.append({static_cast(buffer), size}); + return buffer; + } + + void deallocate(const void *buffer, const int64_t size, const int64_t alignment) + { + BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); +#ifdef DEBUG + memset(buffer, -1, size); +#endif + BLI_asan_poison(buffer, alignment); + BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); + buffer_stack.stack.push(buffer); + } + + template destruct_ptr construct(Args &&...args) + { + void *buffer = this->allocate(sizeof(T), alignof(T)); + T *value = new (buffer) T(std::forward(args)...); + return destruct_ptr(value); + } + + template MutableSpan allocate_array(int64_t size) + { + T *array = static_cast(this->allocate(sizeof(T) * size, alignof(T))); + return MutableSpan(array, size); + } + + template + MutableSpan construct_array(int64_t size, Args &&...args) + { + MutableSpan array = this->allocate_array(size); + for (const int64_t i : IndexRange(size)) { + new (&array[i]) T(std::forward(args)...); + } + return array; + } + + private: + BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/) + { + if (size <= 64) { + return small_stacks_[(size - (size != 0)) >> 3]; + } + if (!large_stacks_) { + large_stacks_ = std::make_unique>(); + } + const int key = bitscan_reverse_uint64(uint64_t(size)); + return large_stacks_->lookup_or_add_cb(key, [&]() { + BufferStack buffer_stack; + buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key); + return buffer_stack; + }); + } +}; + +} // namespace blender diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index 17218a2daed..9e31aec32d2 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -255,6 +255,7 @@ set(SRC BLI_linklist_stack.h BLI_listbase.h BLI_listbase_wrapper.hh + BLI_local_pool.hh BLI_map.hh BLI_map_slots.hh BLI_math.h @@ -479,6 +480,7 @@ if(WITH_GTESTS) tests/BLI_linear_allocator_test.cc tests/BLI_linklist_lockfree_test.cc tests/BLI_listbase_test.cc + tests/BLI_local_pool_test.cc tests/BLI_map_test.cc tests/BLI_math_base_safe_test.cc tests/BLI_math_base_test.cc diff --git a/source/blender/blenlib/tests/BLI_local_pool_test.cc b/source/blender/blenlib/tests/BLI_local_pool_test.cc new file mode 100644 index 00000000000..ed12b6e3b89 --- /dev/null +++ b/source/blender/blenlib/tests/BLI_local_pool_test.cc @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: Apache-2.0 */ + +#include "BLI_local_pool.hh" +#include "BLI_strict_flags.h" + +#include "testing/testing.h" + +namespace blender::tests { + +TEST(local_pool, Test) +{ + LocalPoolScope pool_scope; + LocalPool pool(pool_scope); + + std::cout << pool.allocate(30000, 8) << "\n"; +} + +} // namespace blender::tests diff --git a/source/blender/functions/FN_lazy_function.hh b/source/blender/functions/FN_lazy_function.hh index bc85c155c2e..b2227ffe321 100644 --- a/source/blender/functions/FN_lazy_function.hh +++ b/source/blender/functions/FN_lazy_function.hh @@ -42,6 +42,7 @@ #include "BLI_function_ref.hh" #include "BLI_generic_pointer.hh" #include "BLI_linear_allocator.hh" +#include "BLI_local_pool.hh" #include "BLI_vector.hh" #include @@ -98,6 +99,8 @@ struct Context { * Custom user data that can be used in the function. */ UserData *user_data; + + LocalPool<> *local_pool = nullptr; }; /** @@ -276,7 +279,7 @@ class LazyFunction { * Allocates storage for this function. The storage will be passed to every call to #execute. * If the function does not keep track of any state, this does not have to be implemented. */ - virtual void *init_storage(LinearAllocator<> &allocator) const; + virtual void *init_storage(LocalPool<> &allocator) const; /** * Destruct the storage created in #init_storage. diff --git a/source/blender/functions/FN_lazy_function_execute.hh b/source/blender/functions/FN_lazy_function_execute.hh index 1d82ac94ee8..8a82e611d48 100644 --- a/source/blender/functions/FN_lazy_function_execute.hh +++ b/source/blender/functions/FN_lazy_function_execute.hh @@ -85,7 +85,8 @@ inline void execute_lazy_function_eagerly_impl( ...); output_usages.fill(ValueUsage::Used); set_outputs.fill(false); - LinearAllocator<> allocator; + LocalPoolScope local_pool_scope; + LocalPool<> allocator(local_pool_scope); Context context; context.user_data = user_data; context.storage = fn.init_storage(allocator); diff --git a/source/blender/functions/FN_lazy_function_graph_executor.hh b/source/blender/functions/FN_lazy_function_graph_executor.hh index a6ae5cac967..ee647c21e57 100644 --- a/source/blender/functions/FN_lazy_function_graph_executor.hh +++ b/source/blender/functions/FN_lazy_function_graph_executor.hh @@ -88,7 +88,7 @@ class GraphExecutor : public LazyFunction { const Logger *logger, const SideEffectProvider *side_effect_provider); - void *init_storage(LinearAllocator<> &allocator) const override; + void *init_storage(LocalPool<> &allocator) const override; void destruct_storage(void *storage) const override; private: diff --git a/source/blender/functions/intern/lazy_function.cc b/source/blender/functions/intern/lazy_function.cc index d42b1889160..951f10ab32c 100644 --- a/source/blender/functions/intern/lazy_function.cc +++ b/source/blender/functions/intern/lazy_function.cc @@ -25,7 +25,7 @@ std::string LazyFunction::output_name(int index) const return outputs_[index].debug_name; } -void *LazyFunction::init_storage(LinearAllocator<> & /*allocator*/) const +void *LazyFunction::init_storage(LocalPool<> & /*allocator*/) const { return nullptr; } diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 83b14952829..3d12092d5ed 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -241,15 +241,16 @@ class Executor { #ifdef FN_LAZY_FUNCTION_DEBUG_THREADS std::thread::id current_main_thread_; #endif - /** - * A separate linear allocator for every thread. We could potentially reuse some memory, but that - * doesn't seem worth it yet. - */ + LocalPoolScope local_pool_scope_; struct ThreadLocalData { - LinearAllocator<> allocator; + LocalPool<> local_pool; + + ThreadLocalData(const LocalPoolScope &local_pool_scope) : local_pool(local_pool_scope) + { + } }; std::unique_ptr> thread_locals_; - LinearAllocator<> main_allocator_; + LocalPool<> main_allocator_; /** * Set to false when the first execution ends. */ @@ -258,7 +259,8 @@ class Executor { friend GraphExecutorLFParams; public: - Executor(const GraphExecutor &self) : self_(self), loaded_inputs_(self.graph_inputs_.size()) + Executor(const GraphExecutor &self) + : self_(self), loaded_inputs_(self.graph_inputs_.size()), main_allocator_(local_pool_scope_) { /* The indices are necessary, because they are used as keys in #node_states_. */ BLI_assert(self_.graph_.node_indices_are_valid()); @@ -340,7 +342,7 @@ class Executor { Span nodes = self_.graph_.nodes(); node_states_.reinitialize(nodes.size()); - auto construct_node_range = [&](const IndexRange range, LinearAllocator<> &allocator) { + auto construct_node_range = [&](const IndexRange range, LocalPool<> &allocator) { for (const int i : range) { const Node &node = *nodes[i]; NodeState &node_state = *allocator.construct().release(); @@ -355,13 +357,13 @@ class Executor { this->ensure_thread_locals(); /* Construct all node states in parallel. */ threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { - LinearAllocator<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_main_or_local_allocator(); construct_node_range(range, allocator); }); } } - void construct_initial_node_state(LinearAllocator<> &allocator, + void construct_initial_node_state(LocalPool<> &allocator, const Node &node, NodeState &node_state) { @@ -533,7 +535,7 @@ class Executor { void forward_newly_provided_inputs(CurrentTask ¤t_task) { - LinearAllocator<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_main_or_local_allocator(); for (const int graph_input_index : self_.graph_inputs_.index_range()) { std::atomic &was_loaded = loaded_inputs_[graph_input_index]; if (was_loaded.load()) { @@ -552,7 +554,7 @@ class Executor { } void forward_newly_provided_input(CurrentTask ¤t_task, - LinearAllocator<> &allocator, + LocalPool<> &allocator, const int graph_input_index, void *input_data) { @@ -706,7 +708,7 @@ class Executor { void run_node_task(const FunctionNode &node, CurrentTask ¤t_task) { NodeState &node_state = *node_states_[node.index_in_graph()]; - LinearAllocator<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_main_or_local_allocator(); const LazyFunction &fn = node.function(); bool node_needs_execution = false; @@ -965,7 +967,7 @@ class Executor { CurrentTask ¤t_task) { BLI_assert(value_to_forward.get() != nullptr); - LinearAllocator<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_main_or_local_allocator(); const CPPType &type = *value_to_forward.type(); if (self_.logger_ != nullptr) { @@ -1091,7 +1093,8 @@ class Executor { } #endif if (!thread_locals_) { - thread_locals_ = std::make_unique>(); + thread_locals_ = std::make_unique>( + [scope = &local_pool_scope_]() { return ThreadLocalData{*scope}; }); } } @@ -1130,10 +1133,10 @@ class Executor { }); } - LinearAllocator<> &get_main_or_local_allocator() + LocalPool<> &get_main_or_local_allocator() { if (this->use_multi_threading()) { - return thread_locals_->local().allocator; + return thread_locals_->local().local_pool; } return main_allocator_; } @@ -1184,7 +1187,7 @@ class GraphExecutorLFParams final : public Params { OutputState &output_state = node_state_.outputs[index]; BLI_assert(!output_state.has_been_computed); if (output_state.value == nullptr) { - LinearAllocator<> &allocator = executor_.get_main_or_local_allocator(); + LocalPool<> &allocator = executor_.get_main_or_local_allocator(); const CPPType &type = node_.output(index).type(); output_state.value = allocator.allocate(type.size(), type.alignment()); } @@ -1296,7 +1299,7 @@ void GraphExecutor::execute_impl(Params ¶ms, const Context &context) const executor.execute(params, context); } -void *GraphExecutor::init_storage(LinearAllocator<> &allocator) const +void *GraphExecutor::init_storage(LocalPool<> &allocator) const { Executor &executor = *allocator.construct(*this).release(); return &executor; diff --git a/source/blender/modifiers/intern/MOD_nodes.cc b/source/blender/modifiers/intern/MOD_nodes.cc index ebd5bf351ea..adce9c7d0a0 100644 --- a/source/blender/modifiers/intern/MOD_nodes.cc +++ b/source/blender/modifiers/intern/MOD_nodes.cc @@ -1163,7 +1163,8 @@ static GeometrySet compute_geometry( blender::bke::ModifierComputeContext modifier_compute_context{nullptr, nmd->modifier.name}; user_data.compute_context = &modifier_compute_context; - blender::LinearAllocator<> allocator; + blender::LocalPoolScope local_pool_scope; + blender::LocalPool<> allocator(local_pool_scope); Vector inputs_to_destruct; int input_index; diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 454fe4da23b..977b5296c86 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -689,7 +689,7 @@ class LazyFunctionForGroupNode : public LazyFunction { graph_executor_->execute(params, group_context); } - void *init_storage(LinearAllocator<> &allocator) const override + void *init_storage(LocalPool<> &allocator) const override { Storage *s = allocator.construct().release(); s->graph_executor_storage = graph_executor_->init_storage(allocator); -- 2.30.2 From 9308a4c428a608a5caf04e6fadd74ffa8500bab2 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 30 Dec 2022 12:42:02 +0100 Subject: [PATCH 02/34] progress --- source/blender/blenlib/BLI_local_pool.hh | 2 +- .../intern/lazy_function_graph_executor.cc | 26 ++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index 422289c7290..b5b0f848967 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -85,7 +85,7 @@ template class LocalPool : NonCopyable, N #endif BLI_asan_poison(buffer, alignment); BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); - buffer_stack.stack.push(buffer); + buffer_stack.stack.push(const_cast(buffer)); } template destruct_ptr construct(Args &&...args) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 3d12092d5ed..e6ea8633d0c 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -382,10 +382,11 @@ class Executor { fn.destruct_storage(node_state.storage); } } + LocalPool<> &allocator = this->get_main_or_local_allocator(); for (const int i : node.inputs().index_range()) { InputState &input_state = node_state.inputs[i]; const InputSocket &input_socket = node.input(i); - this->destruct_input_value_if_exists(input_state, input_socket.type()); + this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); } std::destroy_at(&node_state); } @@ -798,7 +799,7 @@ class Executor { this->assert_expected_outputs_have_been_computed(locked_node); } #endif - this->finish_node_if_possible(locked_node); + this->finish_node_if_possible(locked_node, allocator); const bool reschedule_requested = node_state.schedule_state == NodeScheduleState::RunningAndRescheduled; node_state.schedule_state = NodeScheduleState::NotScheduled; @@ -836,7 +837,7 @@ class Executor { } } - void finish_node_if_possible(LockedNode &locked_node) + void finish_node_if_possible(LockedNode &locked_node, LocalPool<> &allocator) { const Node &node = locked_node.node; NodeState &node_state = locked_node.node_state; @@ -864,10 +865,10 @@ class Executor { const InputSocket &input_socket = node.input(input_index); InputState &input_state = node_state.inputs[input_index]; if (input_state.usage == ValueUsage::Maybe) { - this->set_input_unused(locked_node, input_socket); + this->set_input_unused(locked_node, input_socket, allocator); } else if (input_state.usage == ValueUsage::Used) { - this->destruct_input_value_if_exists(input_state, input_socket.type()); + this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); } } @@ -880,10 +881,13 @@ class Executor { } } - void destruct_input_value_if_exists(InputState &input_state, const CPPType &type) + void destruct_input_value_if_exists(InputState &input_state, + const CPPType &type, + LocalPool<> &allocator) { if (input_state.value != nullptr) { type.destruct(input_state.value); + allocator.deallocate(input_state.value, type.size(), type.alignment()); input_state.value = nullptr; } } @@ -895,13 +899,16 @@ class Executor { const int input_index, CurrentTask ¤t_task) { + LocalPool<> &allocator = this->get_main_or_local_allocator(); const InputSocket &input_socket = node.input(input_index); this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { - this->set_input_unused(locked_node, input_socket); + this->set_input_unused(locked_node, input_socket, allocator); }); } - void set_input_unused(LockedNode &locked_node, const InputSocket &input_socket) + void set_input_unused(LockedNode &locked_node, + const InputSocket &input_socket, + LocalPool<> &allocator) { NodeState &node_state = locked_node.node_state; const int input_index = input_socket.index(); @@ -913,7 +920,7 @@ class Executor { } input_state.usage = ValueUsage::Unused; - this->destruct_input_value_if_exists(input_state, input_socket.type()); + this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); if (input_state.was_ready_for_execution) { return; } @@ -1030,6 +1037,7 @@ class Executor { } if (value_to_forward.get() != nullptr) { value_to_forward.destruct(); + allocator.deallocate(value_to_forward.get(), type.size(), type.alignment()); } } -- 2.30.2 From 8502ae5233c299319e96c7167595cda13d9c7829 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 30 Dec 2022 12:50:56 +0100 Subject: [PATCH 03/34] progress --- source/blender/blenlib/BLI_local_pool.hh | 2 +- .../intern/lazy_function_graph_executor.cc | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index b5b0f848967..5e167bef629 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -81,7 +81,7 @@ template class LocalPool : NonCopyable, N { BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); #ifdef DEBUG - memset(buffer, -1, size); + memset(const_cast(buffer), -1, size); #endif BLI_asan_poison(buffer, alignment); BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index e6ea8633d0c..f19a8875918 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -351,7 +351,8 @@ class Executor { } }; if (nodes.size() <= 256) { - construct_node_range(nodes.index_range(), main_allocator_); + LocalPool<> &allocator = this->get_main_or_local_allocator(); + construct_node_range(nodes.index_range(), allocator); } else { this->ensure_thread_locals(); @@ -790,7 +791,7 @@ class Executor { /* Importantly, the node must not be locked when it is executed. That would result in locks * being hold very long in some cases and results in multiple locks being hold by the same * thread in the same graph which can lead to deadlocks. */ - this->execute_node(node, node_state, current_task); + this->execute_node(node, node_state, current_task, allocator); } this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { @@ -892,7 +893,10 @@ class Executor { } } - void execute_node(const FunctionNode &node, NodeState &node_state, CurrentTask ¤t_task); + void execute_node(const FunctionNode &node, + NodeState &node_state, + CurrentTask ¤t_task, + LocalPool<> &allocator); void set_input_unused_during_execution(const Node &node, NodeState &node_state, @@ -1146,6 +1150,9 @@ class Executor { if (this->use_multi_threading()) { return thread_locals_->local().local_pool; } + if (context_ != nullptr && context_->local_pool) { + return *context_->local_pool; + } return main_allocator_; } }; @@ -1244,13 +1251,15 @@ class GraphExecutorLFParams final : public Params { */ inline void Executor::execute_node(const FunctionNode &node, NodeState &node_state, - CurrentTask ¤t_task) + CurrentTask ¤t_task, + LocalPool<> &allocator) { const LazyFunction &fn = node.function(); GraphExecutorLFParams node_params{fn, *this, node, node_state, current_task}; BLI_assert(context_ != nullptr); Context fn_context = *context_; fn_context.storage = node_state.storage; + fn_context.local_pool = &allocator; if (self_.logger_ != nullptr) { self_.logger_->log_before_node_execute(node, node_params, fn_context); -- 2.30.2 From e6c5186dd3e7aa6d029cbac15ef7a902b0d92d49 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 30 Dec 2022 13:10:19 +0100 Subject: [PATCH 04/34] initialize storage and default values a bit later --- .../intern/lazy_function_graph_executor.cc | 73 ++++++++++++------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index f19a8875918..6527fa2e1ac 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -153,7 +153,8 @@ struct NodeState { /** * Set to true once the node is done running for the first time. */ - bool had_initialization = false; + bool always_used_linked_inputs_requested = false; + bool is_first_execution = true; /** * Nodes with side effects should always be executed when their required inputs have been * computed. @@ -733,41 +734,23 @@ class Executor { return; } - if (!node_state.had_initialization) { - /* Initialize storage. */ - node_state.storage = fn.init_storage(allocator); - - /* Load unlinked inputs. */ - for (const int input_index : node.inputs().index_range()) { - const InputSocket &input_socket = node.input(input_index); - if (input_socket.origin() != nullptr) { - continue; - } - InputState &input_state = node_state.inputs[input_index]; - const CPPType &type = input_socket.type(); - const void *default_value = input_socket.default_value(); - BLI_assert(default_value != nullptr); - if (self_.logger_ != nullptr) { - self_.logger_->log_socket_value(input_socket, {type, default_value}, *context_); - } - void *buffer = allocator.allocate(type.size(), type.alignment()); - type.copy_construct(default_value, buffer); - this->forward_value_to_input(locked_node, input_state, {type, buffer}, current_task); - } - + if (!node_state.always_used_linked_inputs_requested) { /* Request linked inputs that are always needed. */ const Span fn_inputs = fn.inputs(); for (const int input_index : fn_inputs.index_range()) { const Input &fn_input = fn_inputs[input_index]; if (fn_input.usage == ValueUsage::Used) { const InputSocket &input_socket = node.input(input_index); - this->set_input_required(locked_node, input_socket); + if (input_socket.origin() != nullptr) { + this->set_input_required(locked_node, input_socket); + } } } - node_state.had_initialization = true; + node_state.always_used_linked_inputs_requested = true; } + const bool allow_missing_requested_inputs = fn.allow_missing_requested_inputs(); for (const int input_index : node_state.inputs.index_range()) { InputState &input_state = node_state.inputs[input_index]; if (input_state.was_ready_for_execution) { @@ -777,7 +760,11 @@ class Executor { input_state.was_ready_for_execution = true; continue; } - if (!fn.allow_missing_requested_inputs()) { + const InputSocket &socket = node.input(input_index); + if (socket.origin() == nullptr) { + continue; + } + if (!allow_missing_requested_inputs) { if (input_state.usage == ValueUsage::Used) { return; } @@ -788,6 +775,40 @@ class Executor { }); if (node_needs_execution) { + if (node_state.is_first_execution) { + /* Initialize storage. */ + node_state.storage = fn.init_storage(allocator); + + /* Load unlinked inputs. */ + for (const int input_index : node.inputs().index_range()) { + const InputSocket &input_socket = node.input(input_index); + if (input_socket.origin() != nullptr) { + continue; + } + InputState &input_state = node_state.inputs[input_index]; + if (input_state.usage == ValueUsage::Unused) { + continue; + } + const CPPType &type = input_socket.type(); + const void *default_value = input_socket.default_value(); + BLI_assert(default_value != nullptr); + if (self_.logger_ != nullptr) { + self_.logger_->log_socket_value(input_socket, {type, default_value}, *context_); + } + void *buffer = allocator.allocate(type.size(), type.alignment()); + type.copy_construct(default_value, buffer); + + input_state.value = buffer; + BLI_assert(!input_state.was_ready_for_execution); + input_state.was_ready_for_execution = true; + if (input_state.usage == ValueUsage::Used) { + node_state.missing_required_inputs -= 1; + } + } + + node_state.is_first_execution = false; + } + /* Importantly, the node must not be locked when it is executed. That would result in locks * being hold very long in some cases and results in multiple locks being hold by the same * thread in the same graph which can lead to deadlocks. */ -- 2.30.2 From 1471befe975068cdea80dddbdbdc4bd645f4e3ba Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 30 Dec 2022 13:26:14 +0100 Subject: [PATCH 05/34] progress --- .../intern/lazy_function_graph_executor.cc | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 6527fa2e1ac..d5f2823e10f 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -323,7 +323,7 @@ class Executor { } } - this->initialize_static_value_usages(side_effect_nodes); + this->initialize_static_value_usages(side_effect_nodes, this->get_main_or_local_allocator()); this->schedule_side_effect_nodes(side_effect_nodes, current_task); } @@ -343,25 +343,13 @@ class Executor { Span nodes = self_.graph_.nodes(); node_states_.reinitialize(nodes.size()); - auto construct_node_range = [&](const IndexRange range, LocalPool<> &allocator) { - for (const int i : range) { - const Node &node = *nodes[i]; - NodeState &node_state = *allocator.construct().release(); - node_states_[i] = &node_state; - this->construct_initial_node_state(allocator, node, node_state); - } - }; - if (nodes.size() <= 256) { - LocalPool<> &allocator = this->get_main_or_local_allocator(); - construct_node_range(nodes.index_range(), allocator); - } - else { - this->ensure_thread_locals(); - /* Construct all node states in parallel. */ - threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { - LocalPool<> &allocator = this->get_main_or_local_allocator(); - construct_node_range(range, allocator); - }); + LocalPool<> &allocator = this->get_main_or_local_allocator(); + + for (const int i : nodes.index_range()) { + const Node &node = *nodes[i]; + NodeState &node_state = *allocator.construct().release(); + node_states_[i] = &node_state; + this->construct_initial_node_state(allocator, node, node_state); } } @@ -452,13 +440,19 @@ class Executor { * Most importantly, this function initializes `InputState.usage` and * `OutputState.potential_target_sockets`. */ - void initialize_static_value_usages(const Span side_effect_nodes) + void initialize_static_value_usages(const Span side_effect_nodes, + LocalPool<> &allocator) { const Span all_nodes = self_.graph_.nodes(); /* Used for a search through all nodes that outputs depend on. */ - Stack reachable_nodes_to_check; - Array reachable_node_flags(all_nodes.size(), false); + Stack reachable_nodes_to_check; + MutableSpan reachable_node_flags = allocator.allocate_array(all_nodes.size()); + BLI_SCOPED_DEFER([&]() { + allocator.deallocate( + reachable_node_flags.data(), reachable_node_flags.size() * sizeof(bool), alignof(bool)); + }); + reachable_node_flags.fill(false); /* Graph outputs are always reachable. */ for (const InputSocket *socket : self_.graph_outputs_) { -- 2.30.2 From ae4672bd29e626b817455f790c9d734abc82b14a Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 30 Dec 2022 13:39:34 +0100 Subject: [PATCH 06/34] progress --- source/blender/functions/FN_lazy_function.hh | 2 +- .../functions/FN_lazy_function_execute.hh | 2 +- .../FN_lazy_function_graph_executor.hh | 2 +- .../blender/functions/intern/lazy_function.cc | 2 +- .../intern/lazy_function_graph_executor.cc | 18 ++++++++++-------- source/blender/modifiers/intern/MOD_nodes.cc | 2 +- .../intern/geometry_nodes_lazy_function.cc | 4 ++-- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/source/blender/functions/FN_lazy_function.hh b/source/blender/functions/FN_lazy_function.hh index b2227ffe321..fc351bee1d9 100644 --- a/source/blender/functions/FN_lazy_function.hh +++ b/source/blender/functions/FN_lazy_function.hh @@ -284,7 +284,7 @@ class LazyFunction { /** * Destruct the storage created in #init_storage. */ - virtual void destruct_storage(void *storage) const; + virtual void destruct_storage(void *storage, LocalPool<> &allocator) const; /** * Calls `fn` with the input indices that the given `output_index` may depend on. By default diff --git a/source/blender/functions/FN_lazy_function_execute.hh b/source/blender/functions/FN_lazy_function_execute.hh index 8a82e611d48..cf20ab37282 100644 --- a/source/blender/functions/FN_lazy_function_execute.hh +++ b/source/blender/functions/FN_lazy_function_execute.hh @@ -93,7 +93,7 @@ inline void execute_lazy_function_eagerly_impl( BasicParams params{ fn, input_pointers, output_pointers, input_usages, output_usages, set_outputs}; fn.execute(params, context); - fn.destruct_storage(context.storage); + fn.destruct_storage(context.storage, allocator); /* Make sure all outputs have been computed. */ BLI_assert(!Span(set_outputs).contains(false)); diff --git a/source/blender/functions/FN_lazy_function_graph_executor.hh b/source/blender/functions/FN_lazy_function_graph_executor.hh index ee647c21e57..5495b13fbfa 100644 --- a/source/blender/functions/FN_lazy_function_graph_executor.hh +++ b/source/blender/functions/FN_lazy_function_graph_executor.hh @@ -89,7 +89,7 @@ class GraphExecutor : public LazyFunction { const SideEffectProvider *side_effect_provider); void *init_storage(LocalPool<> &allocator) const override; - void destruct_storage(void *storage) const override; + void destruct_storage(void *storage, LocalPool<> &allocator) const override; private: void execute_impl(Params ¶ms, const Context &context) const override; diff --git a/source/blender/functions/intern/lazy_function.cc b/source/blender/functions/intern/lazy_function.cc index 951f10ab32c..4befc23c963 100644 --- a/source/blender/functions/intern/lazy_function.cc +++ b/source/blender/functions/intern/lazy_function.cc @@ -30,7 +30,7 @@ void *LazyFunction::init_storage(LocalPool<> & /*allocator*/) const return nullptr; } -void LazyFunction::destruct_storage(void *storage) const +void LazyFunction::destruct_storage(void *storage, LocalPool<> & /*allocator*/) const { BLI_assert(storage == nullptr); UNUSED_VARS_NDEBUG(storage); diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index d5f2823e10f..4051f74233b 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -267,7 +267,7 @@ class Executor { BLI_assert(self_.graph_.node_indices_are_valid()); } - ~Executor() + void destruct_self(LocalPool<> & /*parent_allocator*/) { if (TaskPool *task_pool = task_pool_.load()) { BLI_task_pool_free(task_pool); @@ -276,9 +276,10 @@ class Executor { for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; NodeState &node_state = *node_states_[node_index]; - this->destruct_node_state(node, node_state); + this->destruct_node_state(node, node_state, this->get_main_or_local_allocator()); } }); + this->~Executor(); } /** @@ -364,15 +365,14 @@ class Executor { node_state.outputs = allocator.construct_array(node_outputs.size()); } - void destruct_node_state(const Node &node, NodeState &node_state) + void destruct_node_state(const Node &node, NodeState &node_state, LocalPool<> &allocator) { if (node.is_function()) { const LazyFunction &fn = static_cast(node).function(); if (node_state.storage != nullptr) { - fn.destruct_storage(node_state.storage); + fn.destruct_storage(node_state.storage, allocator); } } - LocalPool<> &allocator = this->get_main_or_local_allocator(); for (const int i : node.inputs().index_range()) { InputState &input_state = node_state.inputs[i]; const InputSocket &input_socket = node.input(i); @@ -891,7 +891,7 @@ class Executor { if (node_state.storage != nullptr) { if (node.is_function()) { const FunctionNode &fn_node = static_cast(node); - fn_node.function().destruct_storage(node_state.storage); + fn_node.function().destruct_storage(node_state.storage, allocator); } node_state.storage = nullptr; } @@ -1337,9 +1337,11 @@ void *GraphExecutor::init_storage(LocalPool<> &allocator) const return &executor; } -void GraphExecutor::destruct_storage(void *storage) const +void GraphExecutor::destruct_storage(void *storage, LocalPool<> &allocator) const { - std::destroy_at(static_cast(storage)); + Executor *executor = static_cast(storage); + executor->destruct_self(allocator); + allocator.deallocate(executor, sizeof(Executor), alignof(Executor)); } void GraphExecutorLogger::log_socket_value(const Socket &socket, diff --git a/source/blender/modifiers/intern/MOD_nodes.cc b/source/blender/modifiers/intern/MOD_nodes.cc index adce9c7d0a0..2592001b103 100644 --- a/source/blender/modifiers/intern/MOD_nodes.cc +++ b/source/blender/modifiers/intern/MOD_nodes.cc @@ -1199,7 +1199,7 @@ static GeometrySet compute_geometry( param_output_usages, param_set_outputs}; graph_executor.execute(lf_params, lf_context); - graph_executor.destruct_storage(lf_context.storage); + graph_executor.destruct_storage(lf_context.storage, allocator); for (GMutablePointer &ptr : inputs_to_destruct) { ptr.destruct(); diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 977b5296c86..6b5eb26342c 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -696,10 +696,10 @@ class LazyFunctionForGroupNode : public LazyFunction { return s; } - void destruct_storage(void *storage) const override + void destruct_storage(void *storage, LocalPool<> &allocator) const override { Storage *s = static_cast(storage); - graph_executor_->destruct_storage(s->graph_executor_storage); + graph_executor_->destruct_storage(s->graph_executor_storage, allocator); std::destroy_at(s); } }; -- 2.30.2 From 4d34d6716ad326aaa2110432a2db1787ac283796 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 19:58:00 +0100 Subject: [PATCH 07/34] progress --- source/blender/blenlib/BLI_local_pool.hh | 23 ++++- .../blenlib/tests/BLI_local_pool_test.cc | 3 +- source/blender/functions/FN_lazy_function.hh | 6 +- .../functions/FN_lazy_function_execute.hh | 9 +- .../FN_lazy_function_graph_executor.hh | 4 +- .../blender/functions/intern/lazy_function.cc | 4 +- .../intern/lazy_function_graph_executor.cc | 94 ++++++------------- source/blender/modifiers/intern/MOD_nodes.cc | 14 ++- .../intern/geometry_nodes_lazy_function.cc | 10 +- 9 files changed, 76 insertions(+), 91 deletions(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index 5e167bef629..bd5b05e0bde 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -5,6 +5,7 @@ #include "BLI_allocator.hh" #include "BLI_asan.h" +#include "BLI_enumerable_thread_specific.hh" #include "BLI_map.hh" #include "BLI_math_bits.h" #include "BLI_stack.hh" @@ -13,14 +14,10 @@ namespace blender { -class LocalPoolScope { -}; - template class LocalPool : NonCopyable, NonMovable { private: static constexpr int64_t s_alignment = 64; - const LocalPoolScope &pool_scope_; Vector> owned_buffers_; struct BufferStack { @@ -34,7 +31,7 @@ template class LocalPool : NonCopyable, N BLI_NO_UNIQUE_ADDRESS Allocator allocator_; public: - LocalPool(const LocalPoolScope &pool_scope) : pool_scope_(pool_scope) + LocalPool() { for (const int64_t i : IndexRange(small_stacks_.size())) { small_stacks_[i].element_size = 8 * (i + 1); @@ -129,4 +126,20 @@ template class LocalPool : NonCopyable, N } }; +class LocalMemoryPools { + private: + threading::EnumerableThreadSpecific> pool_by_thread_; + + public: + LocalPool<> &local() + { + return pool_by_thread_.local(); + } +}; + +struct Pools { + LocalMemoryPools *pools = nullptr; + LocalPool<> *local = nullptr; +}; + } // namespace blender diff --git a/source/blender/blenlib/tests/BLI_local_pool_test.cc b/source/blender/blenlib/tests/BLI_local_pool_test.cc index ed12b6e3b89..88cc63ea803 100644 --- a/source/blender/blenlib/tests/BLI_local_pool_test.cc +++ b/source/blender/blenlib/tests/BLI_local_pool_test.cc @@ -9,8 +9,7 @@ namespace blender::tests { TEST(local_pool, Test) { - LocalPoolScope pool_scope; - LocalPool pool(pool_scope); + LocalPool pool; std::cout << pool.allocate(30000, 8) << "\n"; } diff --git a/source/blender/functions/FN_lazy_function.hh b/source/blender/functions/FN_lazy_function.hh index fc351bee1d9..301808ac88d 100644 --- a/source/blender/functions/FN_lazy_function.hh +++ b/source/blender/functions/FN_lazy_function.hh @@ -100,7 +100,7 @@ struct Context { */ UserData *user_data; - LocalPool<> *local_pool = nullptr; + Pools pools; }; /** @@ -279,12 +279,12 @@ class LazyFunction { * Allocates storage for this function. The storage will be passed to every call to #execute. * If the function does not keep track of any state, this does not have to be implemented. */ - virtual void *init_storage(LocalPool<> &allocator) const; + virtual void *init_storage(Pools &pools) const; /** * Destruct the storage created in #init_storage. */ - virtual void destruct_storage(void *storage, LocalPool<> &allocator) const; + virtual void destruct_storage(void *storage, Pools &pools) const; /** * Calls `fn` with the input indices that the given `output_index` may depend on. By default diff --git a/source/blender/functions/FN_lazy_function_execute.hh b/source/blender/functions/FN_lazy_function_execute.hh index e7db66dc450..75d522754f0 100644 --- a/source/blender/functions/FN_lazy_function_execute.hh +++ b/source/blender/functions/FN_lazy_function_execute.hh @@ -85,15 +85,16 @@ inline void execute_lazy_function_eagerly_impl( ...); output_usages.fill(ValueUsage::Used); set_outputs.fill(false); - LocalPoolScope local_pool_scope; - LocalPool<> allocator(local_pool_scope); + LocalMemoryPools local_pools; + Pools pools{&local_pools, &local_pools.local()}; Context context; context.user_data = user_data; - context.storage = fn.init_storage(allocator); + context.storage = fn.init_storage(pools); + context.pools = pools; BasicParams params{ fn, input_pointers, output_pointers, input_usages, output_usages, set_outputs}; fn.execute(params, context); - fn.destruct_storage(context.storage, allocator); + fn.destruct_storage(context.storage, pools); /* Make sure all outputs have been computed. */ BLI_assert(!Span(set_outputs).contains(false)); diff --git a/source/blender/functions/FN_lazy_function_graph_executor.hh b/source/blender/functions/FN_lazy_function_graph_executor.hh index 5495b13fbfa..ae1b0ac95fe 100644 --- a/source/blender/functions/FN_lazy_function_graph_executor.hh +++ b/source/blender/functions/FN_lazy_function_graph_executor.hh @@ -88,8 +88,8 @@ class GraphExecutor : public LazyFunction { const Logger *logger, const SideEffectProvider *side_effect_provider); - void *init_storage(LocalPool<> &allocator) const override; - void destruct_storage(void *storage, LocalPool<> &allocator) const override; + void *init_storage(Pools &pools) const override; + void destruct_storage(void *storage, Pools &pools) const override; private: void execute_impl(Params ¶ms, const Context &context) const override; diff --git a/source/blender/functions/intern/lazy_function.cc b/source/blender/functions/intern/lazy_function.cc index 4befc23c963..71e8ad6b39b 100644 --- a/source/blender/functions/intern/lazy_function.cc +++ b/source/blender/functions/intern/lazy_function.cc @@ -25,12 +25,12 @@ std::string LazyFunction::output_name(int index) const return outputs_[index].debug_name; } -void *LazyFunction::init_storage(LocalPool<> & /*allocator*/) const +void *LazyFunction::init_storage(Pools & /*pools*/) const { return nullptr; } -void LazyFunction::destruct_storage(void *storage, LocalPool<> & /*allocator*/) const +void LazyFunction::destruct_storage(void *storage, Pools & /*pools*/) const { BLI_assert(storage == nullptr); UNUSED_VARS_NDEBUG(storage); diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 6050a8e9ab0..af6d76583fa 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -247,16 +247,7 @@ class Executor { #ifdef FN_LAZY_FUNCTION_DEBUG_THREADS std::thread::id current_main_thread_; #endif - LocalPoolScope local_pool_scope_; - struct ThreadLocalData { - LocalPool<> local_pool; - ThreadLocalData(const LocalPoolScope &local_pool_scope) : local_pool(local_pool_scope) - { - } - }; - std::unique_ptr> thread_locals_; - LocalPool<> main_allocator_; /** * Set to false when the first execution ends. */ @@ -265,14 +256,13 @@ class Executor { friend GraphExecutorLFParams; public: - Executor(const GraphExecutor &self) - : self_(self), loaded_inputs_(self.graph_inputs_.size()), main_allocator_(local_pool_scope_) + Executor(const GraphExecutor &self) : self_(self), loaded_inputs_(self.graph_inputs_.size()) { /* The indices are necessary, because they are used as keys in #node_states_. */ BLI_assert(self_.graph_.node_indices_are_valid()); } - void destruct_self(LocalPool<> & /*parent_allocator*/) + void destruct_self(Pools &pools) { if (TaskPool *task_pool = task_pool_.load()) { BLI_task_pool_free(task_pool); @@ -281,7 +271,7 @@ class Executor { for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; NodeState &node_state = *node_states_[node_index]; - this->destruct_node_state(node, node_state, this->get_main_or_local_allocator()); + this->destruct_node_state(node, node_state, pools); } }); this->~Executor(); @@ -329,7 +319,7 @@ class Executor { } } - this->initialize_static_value_usages(side_effect_nodes, this->get_main_or_local_allocator()); + this->initialize_static_value_usages(side_effect_nodes, this->get_local_allocator()); this->schedule_side_effect_nodes(side_effect_nodes, current_task); } @@ -349,25 +339,16 @@ class Executor { Span nodes = self_.graph_.nodes(); node_states_.reinitialize(nodes.size()); - auto construct_node_range = [&](const IndexRange range, LocalPool<> &allocator) { + /* Construct all node states in parallel. */ + threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { + LocalPool<> &allocator = this->get_local_allocator(); for (const int i : range) { const Node &node = *nodes[i]; NodeState &node_state = *allocator.construct().release(); node_states_[i] = &node_state; this->construct_initial_node_state(allocator, node, node_state); } - }; - if (nodes.size() <= 256) { - construct_node_range(nodes.index_range(), main_allocator_); - } - else { - this->ensure_thread_locals(); - /* Construct all node states in parallel. */ - threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { - LocalPool<> &allocator = thread_locals_->local().local_pool; - construct_node_range(range, allocator); - }); - } + }); } void construct_initial_node_state(LocalPool<> &allocator, @@ -381,18 +362,18 @@ class Executor { node_state.outputs = allocator.construct_array(node_outputs.size()); } - void destruct_node_state(const Node &node, NodeState &node_state, LocalPool<> &allocator) + void destruct_node_state(const Node &node, NodeState &node_state, Pools &pools) { if (node.is_function()) { const LazyFunction &fn = static_cast(node).function(); if (node_state.storage != nullptr) { - fn.destruct_storage(node_state.storage, allocator); + fn.destruct_storage(node_state.storage, pools); } } for (const int i : node.inputs().index_range()) { InputState &input_state = node_state.inputs[i]; const InputSocket &input_socket = node.input(i); - this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); + this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local); } std::destroy_at(&node_state); } @@ -548,7 +529,7 @@ class Executor { void forward_newly_provided_inputs(CurrentTask ¤t_task) { - LocalPool<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_local_allocator(); for (const int graph_input_index : self_.graph_inputs_.index_range()) { std::atomic &was_loaded = loaded_inputs_[graph_input_index]; if (was_loaded.load()) { @@ -602,7 +583,7 @@ class Executor { return; } this->forward_newly_provided_input( - current_task, this->get_main_or_local_allocator(), graph_input_index, input_data); + current_task, this->get_local_allocator(), graph_input_index, input_data); return; } @@ -721,7 +702,7 @@ class Executor { void run_node_task(const FunctionNode &node, CurrentTask ¤t_task) { NodeState &node_state = *node_states_[node.index_in_graph()]; - LocalPool<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_local_allocator(); const LazyFunction &fn = node.function(); bool node_needs_execution = false; @@ -787,7 +768,8 @@ class Executor { if (node_needs_execution) { if (!node_state.storage_and_defaults_initialized) { /* Initialize storage. */ - node_state.storage = fn.init_storage(allocator); + Pools pools{context_->pools.pools, &allocator}; + node_state.storage = fn.init_storage(pools); /* Load unlinked inputs. */ for (const int input_index : node.inputs().index_range()) { @@ -899,7 +881,8 @@ class Executor { if (node_state.storage != nullptr) { if (node.is_function()) { const FunctionNode &fn_node = static_cast(node); - fn_node.function().destruct_storage(node_state.storage, allocator); + Pools pools{context_->pools.pools, &allocator}; + fn_node.function().destruct_storage(node_state.storage, pools); } node_state.storage = nullptr; } @@ -926,7 +909,7 @@ class Executor { const int input_index, CurrentTask ¤t_task) { - LocalPool<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_local_allocator(); const InputSocket &input_socket = node.input(input_index); this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { this->set_input_unused(locked_node, input_socket, allocator); @@ -1001,7 +984,7 @@ class Executor { CurrentTask ¤t_task) { BLI_assert(value_to_forward.get() != nullptr); - LocalPool<> &allocator = this->get_main_or_local_allocator(); + LocalPool<> &allocator = this->get_local_allocator(); const CPPType &type = *value_to_forward.type(); if (self_.logger_ != nullptr) { @@ -1115,24 +1098,10 @@ class Executor { if (BLI_system_thread_count() <= 1) { return false; } - this->ensure_thread_locals(); task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH)); return true; } - void ensure_thread_locals() - { -#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS - if (current_main_thread_ != std::this_thread::get_id()) { - BLI_assert_unreachable(); - } -#endif - if (!thread_locals_) { - thread_locals_ = std::make_unique>( - [scope = &local_pool_scope_]() { return ThreadLocalData{*scope}; }); - } - } - /** * Allow other threads to steal all the nodes that are currently scheduled on this thread. */ @@ -1168,15 +1137,12 @@ class Executor { }); } - LocalPool<> &get_main_or_local_allocator() + LocalPool<> &get_local_allocator() { if (this->use_multi_threading()) { - return thread_locals_->local().local_pool; + return context_->pools.pools->local(); } - if (context_ != nullptr && context_->local_pool) { - return *context_->local_pool; - } - return main_allocator_; + return *context_->pools.local; } }; @@ -1225,7 +1191,7 @@ class GraphExecutorLFParams final : public Params { OutputState &output_state = node_state_.outputs[index]; BLI_assert(!output_state.has_been_computed); if (output_state.value == nullptr) { - LocalPool<> &allocator = executor_.get_main_or_local_allocator(); + LocalPool<> &allocator = executor_.get_local_allocator(); const CPPType &type = node_.output(index).type(); output_state.value = allocator.allocate(type.size(), type.alignment()); } @@ -1282,7 +1248,7 @@ inline void Executor::execute_node(const FunctionNode &node, BLI_assert(context_ != nullptr); Context fn_context = *context_; fn_context.storage = node_state.storage; - fn_context.local_pool = &allocator; + fn_context.pools.local = &allocator; if (self_.logger_ != nullptr) { self_.logger_->log_before_node_execute(node, node_params, fn_context); @@ -1339,17 +1305,17 @@ void GraphExecutor::execute_impl(Params ¶ms, const Context &context) const executor.execute(params, context); } -void *GraphExecutor::init_storage(LocalPool<> &allocator) const +void *GraphExecutor::init_storage(Pools &pools) const { - Executor &executor = *allocator.construct(*this).release(); + Executor &executor = *pools.local->construct(*this).release(); return &executor; } -void GraphExecutor::destruct_storage(void *storage, LocalPool<> &allocator) const +void GraphExecutor::destruct_storage(void *storage, Pools &pools) const { Executor *executor = static_cast(storage); - executor->destruct_self(allocator); - allocator.deallocate(executor, sizeof(Executor), alignof(Executor)); + executor->destruct_self(pools); + pools.local->deallocate(executor, sizeof(Executor), alignof(Executor)); } void GraphExecutorLogger::log_socket_value(const Socket &socket, diff --git a/source/blender/modifiers/intern/MOD_nodes.cc b/source/blender/modifiers/intern/MOD_nodes.cc index 2592001b103..fb802adab8a 100644 --- a/source/blender/modifiers/intern/MOD_nodes.cc +++ b/source/blender/modifiers/intern/MOD_nodes.cc @@ -1163,8 +1163,12 @@ static GeometrySet compute_geometry( blender::bke::ModifierComputeContext modifier_compute_context{nullptr, nmd->modifier.name}; user_data.compute_context = &modifier_compute_context; - blender::LocalPoolScope local_pool_scope; - blender::LocalPool<> allocator(local_pool_scope); + blender::LocalMemoryPools local_pools; + blender::Pools pools; + pools.pools = &local_pools; + pools.local = &local_pools.local(); + blender::LocalPool<> &allocator = *pools.local; + Vector inputs_to_destruct; int input_index; @@ -1190,8 +1194,9 @@ static GeometrySet compute_geometry( } lf::Context lf_context; - lf_context.storage = graph_executor.init_storage(allocator); + lf_context.storage = graph_executor.init_storage(pools); lf_context.user_data = &user_data; + lf_context.pools = pools; lf::BasicParams lf_params{graph_executor, param_inputs, param_outputs, @@ -1199,7 +1204,7 @@ static GeometrySet compute_geometry( param_output_usages, param_set_outputs}; graph_executor.execute(lf_params, lf_context); - graph_executor.destruct_storage(lf_context.storage, allocator); + graph_executor.destruct_storage(lf_context.storage, pools); for (GMutablePointer &ptr : inputs_to_destruct) { ptr.destruct(); @@ -1272,6 +1277,7 @@ static void modifyGeometry(ModifierData *md, const ModifierEvalContext *ctx, GeometrySet &geometry_set) { + SCOPED_TIMER_AVERAGED(__func__); NodesModifierData *nmd = reinterpret_cast(md); if (nmd->node_group == nullptr) { return; diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 6b5eb26342c..5159e7a352b 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -689,17 +689,17 @@ class LazyFunctionForGroupNode : public LazyFunction { graph_executor_->execute(params, group_context); } - void *init_storage(LocalPool<> &allocator) const override + void *init_storage(Pools &pools) const override { - Storage *s = allocator.construct().release(); - s->graph_executor_storage = graph_executor_->init_storage(allocator); + Storage *s = pools.local->construct().release(); + s->graph_executor_storage = graph_executor_->init_storage(pools); return s; } - void destruct_storage(void *storage, LocalPool<> &allocator) const override + void destruct_storage(void *storage, Pools &pools) const override { Storage *s = static_cast(storage); - graph_executor_->destruct_storage(s->graph_executor_storage, allocator); + graph_executor_->destruct_storage(s->graph_executor_storage, pools); std::destroy_at(s); } }; -- 2.30.2 From 630a44d18e803442cffa2dc9762b5453ea99ef25 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 20:03:24 +0100 Subject: [PATCH 08/34] progress --- source/blender/blenlib/BLI_local_pool.hh | 28 +++++------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index bd5b05e0bde..20d7c5f03be 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -6,6 +6,7 @@ #include "BLI_allocator.hh" #include "BLI_asan.h" #include "BLI_enumerable_thread_specific.hh" +#include "BLI_linear_allocator.hh" #include "BLI_map.hh" #include "BLI_math_bits.h" #include "BLI_stack.hh" @@ -17,8 +18,7 @@ namespace blender { template class LocalPool : NonCopyable, NonMovable { private: static constexpr int64_t s_alignment = 64; - - Vector> owned_buffers_; + LinearAllocator<> linear_allocator_; struct BufferStack { int64_t element_size = -1; @@ -28,8 +28,6 @@ template class LocalPool : NonCopyable, N std::array small_stacks_; std::unique_ptr> large_stacks_; - BLI_NO_UNIQUE_ADDRESS Allocator allocator_; - public: LocalPool() { @@ -40,10 +38,6 @@ template class LocalPool : NonCopyable, N ~LocalPool() { - for (MutableSpan buffer : owned_buffers_) { - BLI_asan_unpoison(buffer.data(), buffer.size()); - allocator_.deallocate(buffer.data()); - } } void *allocate(const int64_t size, const int64_t alignment) @@ -56,22 +50,10 @@ template class LocalPool : NonCopyable, N return buffer; } if (size <= 4096) { - const int64_t allocation_size = std::clamp( - buffer_stack.element_size * 16, 512, 4096); - void *buffer = allocator_.allocate(allocation_size, s_alignment, __func__); - BLI_asan_poison(buffer, allocation_size); - const int64_t num = allocation_size / buffer_stack.element_size; - for (int64_t i = num - 1; i > 0; i--) { - buffer_stack.stack.push(POINTER_OFFSET(buffer, buffer_stack.element_size * i)); - } - owned_buffers_.append({static_cast(buffer), allocation_size}); - BLI_asan_unpoison(buffer, size); - return buffer; + return linear_allocator_.allocate(size, alignment); } - void *buffer = allocator_.allocate( - size_t(size), std::max(s_alignment, size_t(alignment)), __func__); - owned_buffers_.append({static_cast(buffer), size}); - return buffer; + return linear_allocator_.allocate(size_t(size), + std::max(s_alignment, size_t(alignment))); } void deallocate(const void *buffer, const int64_t size, const int64_t alignment) -- 2.30.2 From 99625360a34096fb501582748dd096c7c5fe02db Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 21:25:57 +0100 Subject: [PATCH 09/34] progress --- source/blender/blenlib/BLI_local_pool.hh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index 20d7c5f03be..56c95ff99a1 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -44,6 +44,7 @@ template class LocalPool : NonCopyable, N { BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); + BLI_assert(buffer_stack.element_size >= size); if (!buffer_stack.stack.is_empty()) { void *buffer = buffer_stack.stack.pop(); BLI_asan_unpoison(buffer, size); @@ -62,8 +63,9 @@ template class LocalPool : NonCopyable, N #ifdef DEBUG memset(const_cast(buffer), -1, size); #endif - BLI_asan_poison(buffer, alignment); + BLI_asan_poison(buffer, size); BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); + BLI_assert(buffer_stack.element_size >= size); buffer_stack.stack.push(const_cast(buffer)); } -- 2.30.2 From 16cd6162fe962e5ac18d0ddb7c7619ca6befb25c Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 21:27:55 +0100 Subject: [PATCH 10/34] progress --- .../blender/functions/intern/lazy_function_graph_executor.cc | 4 +++- source/blender/nodes/intern/geometry_nodes_lazy_function.cc | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index af6d76583fa..c2ed78542ce 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -268,10 +268,12 @@ class Executor { BLI_task_pool_free(task_pool); } threading::parallel_for(node_states_.index_range(), 1024, [&](const IndexRange range) { + LocalPool<> &local = pools.pools->local(); for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; NodeState &node_state = *node_states_[node_index]; - this->destruct_node_state(node, node_state, pools); + Pools sub_pools = {pools.pools, &local}; + this->destruct_node_state(node, node_state, sub_pools); } }); this->~Executor(); diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 5159e7a352b..c3ac178b5ad 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -701,6 +701,7 @@ class LazyFunctionForGroupNode : public LazyFunction { Storage *s = static_cast(storage); graph_executor_->destruct_storage(s->graph_executor_storage, pools); std::destroy_at(s); + pools.local->deallocate(storage, sizeof(Storage), alignof(Storage)); } }; -- 2.30.2 From a48c1eb20d13c5f7536779a6aa3d8ced440858fe Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 22:11:05 +0100 Subject: [PATCH 11/34] progress --- source/blender/blenlib/BLI_local_pool.hh | 60 +++++++++++++++---- .../intern/lazy_function_graph_executor.cc | 16 +++-- .../intern/geometry_nodes_lazy_function.cc | 3 +- 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index 56c95ff99a1..2f28fe8928d 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -22,6 +22,7 @@ template class LocalPool : NonCopyable, N struct BufferStack { int64_t element_size = -1; + int64_t min_alignment = -1; Stack stack; }; @@ -32,7 +33,9 @@ template class LocalPool : NonCopyable, N LocalPool() { for (const int64_t i : IndexRange(small_stacks_.size())) { - small_stacks_[i].element_size = 8 * (i + 1); + BufferStack &buffer_stack = small_stacks_[i]; + buffer_stack.element_size = 8 * (i + 1); + buffer_stack.min_alignment = power_of_2_min_u(buffer_stack.element_size); } } @@ -42,24 +45,33 @@ template class LocalPool : NonCopyable, N void *allocate(const int64_t size, const int64_t alignment) { - BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); + BLI_assert(size > 0); + BLI_assert(alignment <= size && alignment <= s_alignment); + BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); BLI_assert(buffer_stack.element_size >= size); + BLI_assert(buffer_stack.min_alignment >= alignment); + + void *buffer; if (!buffer_stack.stack.is_empty()) { - void *buffer = buffer_stack.stack.pop(); + buffer = buffer_stack.stack.pop(); BLI_asan_unpoison(buffer, size); - return buffer; } - if (size <= 4096) { - return linear_allocator_.allocate(size, alignment); + else if (size <= 4096) { + buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.min_alignment); } - return linear_allocator_.allocate(size_t(size), - std::max(s_alignment, size_t(alignment))); + else { + buffer = linear_allocator_.allocate(size_t(size), + std::max(s_alignment, size_t(alignment))); + } + return buffer; } void deallocate(const void *buffer, const int64_t size, const int64_t alignment) { - BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); + BLI_assert(size > 0); + BLI_assert(alignment <= size && alignment <= s_alignment); + #ifdef DEBUG memset(const_cast(buffer), -1, size); #endif @@ -78,6 +90,9 @@ template class LocalPool : NonCopyable, N template MutableSpan allocate_array(int64_t size) { + if (size == 0) { + return {}; + } T *array = static_cast(this->allocate(sizeof(T) * size, alignof(T))); return MutableSpan(array, size); } @@ -92,11 +107,31 @@ template class LocalPool : NonCopyable, N return array; } + template void destruct_array(Span data) + { + if (data.is_empty()) { + return; + } + destruct_n(const_cast(data.data()), data.size()); + this->deallocate(data.data(), data.size() * sizeof(T), alignof(T)); + } + + template void destruct_array(MutableSpan data) + { + this->destruct_array(data.as_span()); + } + + template void destruct(const T *value) + { + std::destroy_at(value); + this->deallocate(value, sizeof(T), alignof(T)); + } + private: BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/) { if (size <= 64) { - return small_stacks_[(size - (size != 0)) >> 3]; + return small_stacks_[(size - 1) >> 3]; } if (!large_stacks_) { large_stacks_ = std::make_unique>(); @@ -105,6 +140,7 @@ template class LocalPool : NonCopyable, N return large_stacks_->lookup_or_add_cb(key, [&]() { BufferStack buffer_stack; buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key); + buffer_stack.min_alignment = s_alignment; return buffer_stack; }); } @@ -115,6 +151,10 @@ class LocalMemoryPools { threading::EnumerableThreadSpecific> pool_by_thread_; public: + ~LocalMemoryPools() + { + } + LocalPool<> &local() { return pool_by_thread_.local(); diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index c2ed78542ce..387c5486493 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -262,7 +262,7 @@ class Executor { BLI_assert(self_.graph_.node_indices_are_valid()); } - void destruct_self(Pools &pools) + void destruct_state(Pools &pools) { if (TaskPool *task_pool = task_pool_.load()) { BLI_task_pool_free(task_pool); @@ -276,7 +276,6 @@ class Executor { this->destruct_node_state(node, node_state, sub_pools); } }); - this->~Executor(); } /** @@ -377,7 +376,9 @@ class Executor { const InputSocket &input_socket = node.input(i); this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local); } - std::destroy_at(&node_state); + pools.local->destruct_array(node_state.inputs); + pools.local->destruct_array(node_state.outputs); + pools.local->destruct(&node_state); } void schedule_newly_requested_outputs(CurrentTask ¤t_task) @@ -447,10 +448,7 @@ class Executor { /* Used for a search through all nodes that outputs depend on. */ Stack reachable_nodes_to_check; MutableSpan reachable_node_flags = allocator.allocate_array(all_nodes.size()); - BLI_SCOPED_DEFER([&]() { - allocator.deallocate( - reachable_node_flags.data(), reachable_node_flags.size() * sizeof(bool), alignof(bool)); - }); + BLI_SCOPED_DEFER([&]() { allocator.destruct_array(reachable_node_flags); }); reachable_node_flags.fill(false); /* Graph outputs are always reachable. */ @@ -1316,8 +1314,8 @@ void *GraphExecutor::init_storage(Pools &pools) const void GraphExecutor::destruct_storage(void *storage, Pools &pools) const { Executor *executor = static_cast(storage); - executor->destruct_self(pools); - pools.local->deallocate(executor, sizeof(Executor), alignof(Executor)); + executor->destruct_state(pools); + pools.local->destruct(executor); } void GraphExecutorLogger::log_socket_value(const Socket &socket, diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index c3ac178b5ad..dc346752d86 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -700,8 +700,7 @@ class LazyFunctionForGroupNode : public LazyFunction { { Storage *s = static_cast(storage); graph_executor_->destruct_storage(s->graph_executor_storage, pools); - std::destroy_at(s); - pools.local->deallocate(storage, sizeof(Storage), alignof(Storage)); + pools.local->destruct(s); } }; -- 2.30.2 From 822a11dd6b2615e9b7c60c79edadfea0b79f88a6 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 22:28:49 +0100 Subject: [PATCH 12/34] cleanup --- source/blender/blenlib/BLI_local_pool.hh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh index 2f28fe8928d..05e525b34f3 100644 --- a/source/blender/blenlib/BLI_local_pool.hh +++ b/source/blender/blenlib/BLI_local_pool.hh @@ -57,13 +57,10 @@ template class LocalPool : NonCopyable, N buffer = buffer_stack.stack.pop(); BLI_asan_unpoison(buffer, size); } - else if (size <= 4096) { + else { buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.min_alignment); } - else { - buffer = linear_allocator_.allocate(size_t(size), - std::max(s_alignment, size_t(alignment))); - } + return buffer; } -- 2.30.2 From 4d1966291c21c6e9794fe2f10f9104ab546e9b2b Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 22:33:37 +0100 Subject: [PATCH 13/34] speedup --- .../blender/functions/intern/lazy_function_graph_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 387c5486493..f440a22ec4e 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -268,7 +268,8 @@ class Executor { BLI_task_pool_free(task_pool); } threading::parallel_for(node_states_.index_range(), 1024, [&](const IndexRange range) { - LocalPool<> &local = pools.pools->local(); + LocalPool<> &local = (range.size() == node_states_.size()) ? *pools.local : + pools.pools->local(); for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; NodeState &node_state = *node_states_[node_index]; -- 2.30.2 From c744dc625c6286fe7ae67706837642c43472736a Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Wed, 4 Jan 2023 22:44:40 +0100 Subject: [PATCH 14/34] improve node states --- .../intern/lazy_function_graph_executor.cc | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index f440a22ec4e..8cb5a65965e 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -233,7 +233,7 @@ class Executor { /** * State of every node, indexed by #Node::index_in_graph. */ - Array node_states_; + MutableSpan node_states_; /** * Parameters provided by the caller. This is always non-null, while a node is running. */ @@ -272,11 +272,12 @@ class Executor { pools.pools->local(); for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; - NodeState &node_state = *node_states_[node_index]; + NodeState &node_state = node_states_[node_index]; Pools sub_pools = {pools.pools, &local}; this->destruct_node_state(node, node_state, sub_pools); } }); + pools.local->destruct_array(node_states_); } /** @@ -316,7 +317,7 @@ class Executor { side_effect_nodes = self_.side_effect_provider_->get_nodes_with_side_effects(*context_); for (const FunctionNode *node : side_effect_nodes) { const int node_index = node->index_in_graph(); - NodeState &node_state = *node_states_[node_index]; + NodeState &node_state = node_states_[node_index]; node_state.has_side_effects = true; } } @@ -339,16 +340,15 @@ class Executor { void initialize_node_states() { Span nodes = self_.graph_.nodes(); - node_states_.reinitialize(nodes.size()); + node_states_ = context_->pools.local->construct_array(nodes.size()); /* Construct all node states in parallel. */ threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { - LocalPool<> &allocator = this->get_local_allocator(); + LocalPool<> &allocator = (range.size() == nodes.size()) ? *context_->pools.local : + this->get_local_allocator(); for (const int i : range) { const Node &node = *nodes[i]; - NodeState &node_state = *allocator.construct().release(); - node_states_[i] = &node_state; - this->construct_initial_node_state(allocator, node, node_state); + this->construct_initial_node_state(allocator, node, node_states_[i]); } }); } @@ -379,7 +379,6 @@ class Executor { } pools.local->destruct_array(node_state.inputs); pools.local->destruct_array(node_state.outputs); - pools.local->destruct(&node_state); } void schedule_newly_requested_outputs(CurrentTask ¤t_task) @@ -393,7 +392,7 @@ class Executor { } const InputSocket &socket = *self_.graph_outputs_[graph_output_index]; const Node &node = socket.node(); - NodeState &node_state = *node_states_[node.index_in_graph()]; + NodeState &node_state = node_states_[node.index_in_graph()]; this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { this->set_input_required(locked_node, socket); }); @@ -426,7 +425,7 @@ class Executor { for (const int i : self_.graph_inputs_.index_range()) { const OutputSocket &socket = *self_.graph_inputs_[i]; const Node &node = socket.node(); - const NodeState &node_state = *node_states_[node.index_in_graph()]; + const NodeState &node_state = node_states_[node.index_in_graph()]; const OutputState &output_state = node_state.outputs[socket.index()]; if (output_state.usage == ValueUsage::Unused) { params_->set_input_unused(i); @@ -487,7 +486,7 @@ class Executor { for (const int node_index : reachable_node_flags.index_range()) { const Node &node = *all_nodes[node_index]; - NodeState &node_state = *node_states_[node_index]; + NodeState &node_state = node_states_[node_index]; const bool node_is_reachable = reachable_node_flags[node_index]; if (node_is_reachable) { for (const int output_index : node.outputs().index_range()) { @@ -521,7 +520,7 @@ class Executor { CurrentTask ¤t_task) { for (const FunctionNode *node : side_effect_nodes) { - NodeState &node_state = *node_states_[node->index_in_graph()]; + NodeState &node_state = node_states_[node->index_in_graph()]; this->with_locked_node(*node, node_state, current_task, [&](LockedNode &locked_node) { this->schedule_node(locked_node, current_task); }); @@ -564,7 +563,7 @@ class Executor { { const Node &node = socket.node(); const int index_in_node = socket.index(); - NodeState &node_state = *node_states_[node.index_in_graph()]; + NodeState &node_state = node_states_[node.index_in_graph()]; OutputState &output_state = node_state.outputs[index_in_node]; /* The notified output socket might be an input of the entire graph. In this case, notify the @@ -602,7 +601,7 @@ class Executor { { const Node &node = socket.node(); const int index_in_node = socket.index(); - NodeState &node_state = *node_states_[node.index_in_graph()]; + NodeState &node_state = node_states_[node.index_in_graph()]; OutputState &output_state = node_state.outputs[index_in_node]; this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { @@ -658,7 +657,7 @@ class Executor { CurrentTask ¤t_task, const FunctionRef f) { - BLI_assert(&node_state == node_states_[node.index_in_graph()]); + BLI_assert(&node_state == &node_states_[node.index_in_graph()]); LockedNode locked_node{node, node_state}; if (this->use_multi_threading()) { @@ -702,7 +701,7 @@ class Executor { void run_node_task(const FunctionNode &node, CurrentTask ¤t_task) { - NodeState &node_state = *node_states_[node.index_in_graph()]; + NodeState &node_state = node_states_[node.index_in_graph()]; LocalPool<> &allocator = this->get_local_allocator(); const LazyFunction &fn = node.function(); @@ -995,7 +994,7 @@ class Executor { const Span targets = from_socket.targets(); for (const InputSocket *target_socket : targets) { const Node &target_node = target_socket->node(); - NodeState &node_state = *node_states_[target_node.index_in_graph()]; + NodeState &node_state = node_states_[target_node.index_in_graph()]; const int input_index = target_socket->index(); InputState &input_state = node_state.inputs[input_index]; const bool is_last_target = target_socket == targets.last(); -- 2.30.2 From afae5f603e7bf17a46d31cd7fca3036967f4ae27 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 16:16:54 +0100 Subject: [PATCH 15/34] progress --- source/blender/blenlib/BLI_local_allocator.hh | 179 ++++++++++++++++++ source/blender/blenlib/CMakeLists.txt | 3 +- .../blender/blenlib/intern/local_allocator.cc | 25 +++ ...ol_test.cc => BLI_local_allocator_test.cc} | 9 +- 4 files changed, 207 insertions(+), 9 deletions(-) create mode 100644 source/blender/blenlib/BLI_local_allocator.hh create mode 100644 source/blender/blenlib/intern/local_allocator.cc rename source/blender/blenlib/tests/{BLI_local_pool_test.cc => BLI_local_allocator_test.cc} (56%) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh new file mode 100644 index 00000000000..6c9c3c9e0b6 --- /dev/null +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -0,0 +1,179 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#pragma once + +#include + +#include "BLI_allocator.hh" +#include "BLI_asan.h" +#include "BLI_enumerable_thread_specific.hh" +#include "BLI_linear_allocator.hh" +#include "BLI_map.hh" +#include "BLI_math_bits.h" +#include "BLI_stack.hh" +#include "BLI_utility_mixins.hh" +#include "BLI_vector.hh" + +namespace blender { + +class LocalAllocatorSet; + +class LocalAllocator : NonCopyable, NonMovable { + private: + static constexpr int64_t s_alignment = 64; + LocalAllocatorSet &owner_set_; + LinearAllocator<> linear_allocator_; + + struct BufferStack { + Stack stack; + int64_t element_size = -1; + int64_t alignment = -1; + }; + + std::array small_stacks_; + Map large_stacks_; + + friend LocalAllocatorSet; + + LocalAllocator(LocalAllocatorSet &owner_set); + ~LocalAllocator(); + + public: + bool is_local() const; + LocalAllocator &local(); + + void *allocate(const int64_t size, const int64_t alignment); + void deallocate(const void *buffer, const int64_t size, const int64_t alignment); + + template T &allocate_new(Args &&...args); + template void destruct_free(const T &value); + template MutableSpan allocate_new_array(const int64_t size); + template void destruct_free_array(Span data); + template void destruct_free_array(MutableSpan data); + + private: + BufferStack &get_buffer_stack(const int64_t size, const int64_t alignment); +}; + +class LocalAllocatorSet : NonCopyable, NonMovable { + private: + threading::EnumerableThreadSpecific allocator_by_thread_; + + public: + LocalAllocatorSet(); + ~LocalAllocatorSet(); + + LocalAllocator &local(); +}; + +inline bool LocalAllocator::is_local() const +{ + return this == &owner_set_.local(); +} + +inline LocalAllocator &LocalAllocator::local() +{ + return owner_set_.local(); +} + +inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) +{ + BLI_assert(size > 0); + BLI_assert(alignment <= size); + BLI_assert(alignment <= s_alignment); + BLI_assert(is_power_of_2_i(alignment)); + BLI_assert(this->is_local()); + + BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); + BLI_assert(buffer_stack.element_size >= size); + BLI_assert(buffer_stack.alignment >= alignment); + + void *buffer; + if (!buffer_stack.stack.is_empty()) { + buffer = buffer_stack.stack.pop(); + BLI_asan_unpoison(buffer, size); + } + else { + buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.alignment); + } + return buffer; +} + +inline void LocalAllocator::deallocate(const void *buffer, + const int64_t size, + const int64_t alignment) +{ + BLI_assert(size > 0); + BLI_assert(alignment <= size); + BLI_assert(alignment <= s_alignment); + BLI_assert(is_power_of_2_i(alignment)); + BLI_assert(this->is_local()); + +#ifdef DEBUG + memset(const_cast(buffer), -1, size); +#endif + BLI_asan_poison(buffer, size); + + BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); + BLI_assert(buffer_stack.element_size >= size); + BLI_assert(buffer_stack.alignment >= alignment); + + buffer_stack.stack.push(const_cast(buffer)); +} + +inline LocalAllocator::BufferStack &LocalAllocator::get_buffer_stack(const int64_t size, + const int64_t /*alignment*/) +{ + if (size <= 64) { + return small_stacks_[(size - 1) >> 3]; + } + const int key = bitscan_reverse_uint64(uint64_t(size)); + return large_stacks_.lookup_or_add_cb(key, [&]() { + BufferStack buffer_stack; + buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key); + buffer_stack.alignment = s_alignment; + return buffer_stack; + }); +} + +template inline T &LocalAllocator::allocate_new(Args &&...args) +{ + void *buffer = this->allocate(sizeof(T), alignof(T)); + T *value = new (buffer) T(std::forward(args)...); + return *value; +} + +template inline void LocalAllocator::destruct_free(const T &value) +{ + std::destroy_at(value); + this->deallocate(&value, sizeof(T), alignof(T)); +} + +template MutableSpan inline LocalAllocator::allocate_new_array(const int64_t size) +{ + if (size == 0) { + return {}; + } + void *buffer = this->allocate(size * sizeof(T), alignof(T)); + return {static_cast(buffer), size}; +} + +template inline void LocalAllocator::destruct_free_array(Span data) +{ + if (data.is_empty()) { + return; + } + destruct_n(const_cast(data.data()), data.size()); + this->deallocate(data.data(), data.size_in_bytes(), alignof(T)); +} + +template inline void LocalAllocator::destruct_free_array(MutableSpan data) +{ + this->destruct_free_array(data.as_span()); +} + +inline LocalAllocator &LocalAllocatorSet::local() +{ + return allocator_by_thread_.local(); +} + +} // namespace blender diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index 9e31aec32d2..5830df5ec5c 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -255,6 +255,7 @@ set(SRC BLI_linklist_stack.h BLI_listbase.h BLI_listbase_wrapper.hh + BLI_local_allocator.hh BLI_local_pool.hh BLI_map.hh BLI_map_slots.hh @@ -480,7 +481,7 @@ if(WITH_GTESTS) tests/BLI_linear_allocator_test.cc tests/BLI_linklist_lockfree_test.cc tests/BLI_listbase_test.cc - tests/BLI_local_pool_test.cc + tests/BLI_local_allocator_test.cc tests/BLI_map_test.cc tests/BLI_math_base_safe_test.cc tests/BLI_math_base_test.cc diff --git a/source/blender/blenlib/intern/local_allocator.cc b/source/blender/blenlib/intern/local_allocator.cc new file mode 100644 index 00000000000..4634da8e31e --- /dev/null +++ b/source/blender/blenlib/intern/local_allocator.cc @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include "BLI_local_allocator.hh" + +namespace blender { + +LocalAllocatorSet::LocalAllocatorSet() + : allocator_by_thread_([this]() { return LocalAllocator{*this}; }) +{ +} + +LocalAllocatorSet::~LocalAllocatorSet() = default; + +LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set) +{ + for (const int64_t i : IndexRange(small_stacks_.size())) { + BufferStack &buffer_stack = small_stacks_[i]; + buffer_stack.element_size = 8 * (i + 1); + buffer_stack.alignment = power_of_2_min_u(buffer_stack.element_size); + } +} + +LocalAllocator::~LocalAllocator() = default; + +} // namespace blender diff --git a/source/blender/blenlib/tests/BLI_local_pool_test.cc b/source/blender/blenlib/tests/BLI_local_allocator_test.cc similarity index 56% rename from source/blender/blenlib/tests/BLI_local_pool_test.cc rename to source/blender/blenlib/tests/BLI_local_allocator_test.cc index 88cc63ea803..1e7feca5b39 100644 --- a/source/blender/blenlib/tests/BLI_local_pool_test.cc +++ b/source/blender/blenlib/tests/BLI_local_allocator_test.cc @@ -1,17 +1,10 @@ /* SPDX-License-Identifier: Apache-2.0 */ -#include "BLI_local_pool.hh" +#include "BLI_local_allocator.hh" #include "BLI_strict_flags.h" #include "testing/testing.h" namespace blender::tests { -TEST(local_pool, Test) -{ - LocalPool pool; - - std::cout << pool.allocate(30000, 8) << "\n"; -} - } // namespace blender::tests -- 2.30.2 From e4491302fcda2acf1884fe69fbef8076830b07ac Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 16:44:04 +0100 Subject: [PATCH 16/34] progress --- source/blender/blenlib/BLI_local_allocator.hh | 25 ++- source/blender/blenlib/BLI_local_pool.hh | 166 ------------------ source/blender/blenlib/CMakeLists.txt | 2 +- source/blender/functions/FN_lazy_function.hh | 8 +- .../functions/FN_lazy_function_execute.hh | 10 +- .../FN_lazy_function_graph_executor.hh | 4 +- .../blender/functions/intern/lazy_function.cc | 4 +- .../intern/lazy_function_graph_executor.cc | 85 +++++---- source/blender/modifiers/intern/MOD_nodes.cc | 13 +- .../intern/geometry_nodes_lazy_function.cc | 14 +- 10 files changed, 85 insertions(+), 246 deletions(-) delete mode 100644 source/blender/blenlib/BLI_local_pool.hh diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index 6c9c3c9e0b6..a6dd2c29504 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -35,9 +35,10 @@ class LocalAllocator : NonCopyable, NonMovable { friend LocalAllocatorSet; LocalAllocator(LocalAllocatorSet &owner_set); - ~LocalAllocator(); public: + ~LocalAllocator(); + bool is_local() const; LocalAllocator &local(); @@ -45,8 +46,10 @@ class LocalAllocator : NonCopyable, NonMovable { void deallocate(const void *buffer, const int64_t size, const int64_t alignment); template T &allocate_new(Args &&...args); - template void destruct_free(const T &value); - template MutableSpan allocate_new_array(const int64_t size); + template void destruct_free(const T *value); + template MutableSpan allocate_array(const int64_t size); + template + MutableSpan allocate_new_array(const int64_t size, Args &&...args); template void destruct_free_array(Span data); template void destruct_free_array(MutableSpan data); @@ -142,13 +145,13 @@ template inline T &LocalAllocator::allocate_new(Ar return *value; } -template inline void LocalAllocator::destruct_free(const T &value) +template inline void LocalAllocator::destruct_free(const T *value) { std::destroy_at(value); - this->deallocate(&value, sizeof(T), alignof(T)); + this->deallocate(value, sizeof(T), alignof(T)); } -template MutableSpan inline LocalAllocator::allocate_new_array(const int64_t size) +template MutableSpan inline LocalAllocator::allocate_array(const int64_t size) { if (size == 0) { return {}; @@ -157,6 +160,16 @@ template MutableSpan inline LocalAllocator::allocate_new_array(co return {static_cast(buffer), size}; } +template +MutableSpan inline LocalAllocator::allocate_new_array(const int64_t size, Args &&...args) +{ + MutableSpan array = this->allocate_array(size); + for (const int64_t i : IndexRange(size)) { + new (&array[i]) T(std::forward(args)...); + } + return array; +} + template inline void LocalAllocator::destruct_free_array(Span data) { if (data.is_empty()) { diff --git a/source/blender/blenlib/BLI_local_pool.hh b/source/blender/blenlib/BLI_local_pool.hh deleted file mode 100644 index 05e525b34f3..00000000000 --- a/source/blender/blenlib/BLI_local_pool.hh +++ /dev/null @@ -1,166 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#pragma once - -#include - -#include "BLI_allocator.hh" -#include "BLI_asan.h" -#include "BLI_enumerable_thread_specific.hh" -#include "BLI_linear_allocator.hh" -#include "BLI_map.hh" -#include "BLI_math_bits.h" -#include "BLI_stack.hh" -#include "BLI_utility_mixins.hh" -#include "BLI_vector.hh" - -namespace blender { - -template class LocalPool : NonCopyable, NonMovable { - private: - static constexpr int64_t s_alignment = 64; - LinearAllocator<> linear_allocator_; - - struct BufferStack { - int64_t element_size = -1; - int64_t min_alignment = -1; - Stack stack; - }; - - std::array small_stacks_; - std::unique_ptr> large_stacks_; - - public: - LocalPool() - { - for (const int64_t i : IndexRange(small_stacks_.size())) { - BufferStack &buffer_stack = small_stacks_[i]; - buffer_stack.element_size = 8 * (i + 1); - buffer_stack.min_alignment = power_of_2_min_u(buffer_stack.element_size); - } - } - - ~LocalPool() - { - } - - void *allocate(const int64_t size, const int64_t alignment) - { - BLI_assert(size > 0); - BLI_assert(alignment <= size && alignment <= s_alignment); - - BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); - BLI_assert(buffer_stack.element_size >= size); - BLI_assert(buffer_stack.min_alignment >= alignment); - - void *buffer; - if (!buffer_stack.stack.is_empty()) { - buffer = buffer_stack.stack.pop(); - BLI_asan_unpoison(buffer, size); - } - else { - buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.min_alignment); - } - - return buffer; - } - - void deallocate(const void *buffer, const int64_t size, const int64_t alignment) - { - BLI_assert(size > 0); - BLI_assert(alignment <= size && alignment <= s_alignment); - -#ifdef DEBUG - memset(const_cast(buffer), -1, size); -#endif - BLI_asan_poison(buffer, size); - BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); - BLI_assert(buffer_stack.element_size >= size); - buffer_stack.stack.push(const_cast(buffer)); - } - - template destruct_ptr construct(Args &&...args) - { - void *buffer = this->allocate(sizeof(T), alignof(T)); - T *value = new (buffer) T(std::forward(args)...); - return destruct_ptr(value); - } - - template MutableSpan allocate_array(int64_t size) - { - if (size == 0) { - return {}; - } - T *array = static_cast(this->allocate(sizeof(T) * size, alignof(T))); - return MutableSpan(array, size); - } - - template - MutableSpan construct_array(int64_t size, Args &&...args) - { - MutableSpan array = this->allocate_array(size); - for (const int64_t i : IndexRange(size)) { - new (&array[i]) T(std::forward(args)...); - } - return array; - } - - template void destruct_array(Span data) - { - if (data.is_empty()) { - return; - } - destruct_n(const_cast(data.data()), data.size()); - this->deallocate(data.data(), data.size() * sizeof(T), alignof(T)); - } - - template void destruct_array(MutableSpan data) - { - this->destruct_array(data.as_span()); - } - - template void destruct(const T *value) - { - std::destroy_at(value); - this->deallocate(value, sizeof(T), alignof(T)); - } - - private: - BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/) - { - if (size <= 64) { - return small_stacks_[(size - 1) >> 3]; - } - if (!large_stacks_) { - large_stacks_ = std::make_unique>(); - } - const int key = bitscan_reverse_uint64(uint64_t(size)); - return large_stacks_->lookup_or_add_cb(key, [&]() { - BufferStack buffer_stack; - buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key); - buffer_stack.min_alignment = s_alignment; - return buffer_stack; - }); - } -}; - -class LocalMemoryPools { - private: - threading::EnumerableThreadSpecific> pool_by_thread_; - - public: - ~LocalMemoryPools() - { - } - - LocalPool<> &local() - { - return pool_by_thread_.local(); - } -}; - -struct Pools { - LocalMemoryPools *pools = nullptr; - LocalPool<> *local = nullptr; -}; - -} // namespace blender diff --git a/source/blender/blenlib/CMakeLists.txt b/source/blender/blenlib/CMakeLists.txt index 5830df5ec5c..87e26e50215 100644 --- a/source/blender/blenlib/CMakeLists.txt +++ b/source/blender/blenlib/CMakeLists.txt @@ -90,6 +90,7 @@ set(SRC intern/lazy_threading.cc intern/length_parameterize.cc intern/listbase.cc + intern/local_allocator.cc intern/math_base.c intern/math_base_inline.c intern/math_base_safe_inline.c @@ -256,7 +257,6 @@ set(SRC BLI_listbase.h BLI_listbase_wrapper.hh BLI_local_allocator.hh - BLI_local_pool.hh BLI_map.hh BLI_map_slots.hh BLI_math.h diff --git a/source/blender/functions/FN_lazy_function.hh b/source/blender/functions/FN_lazy_function.hh index 301808ac88d..55e6d961df1 100644 --- a/source/blender/functions/FN_lazy_function.hh +++ b/source/blender/functions/FN_lazy_function.hh @@ -42,7 +42,7 @@ #include "BLI_function_ref.hh" #include "BLI_generic_pointer.hh" #include "BLI_linear_allocator.hh" -#include "BLI_local_pool.hh" +#include "BLI_local_allocator.hh" #include "BLI_vector.hh" #include @@ -100,7 +100,7 @@ struct Context { */ UserData *user_data; - Pools pools; + LocalAllocator *allocator; }; /** @@ -279,12 +279,12 @@ class LazyFunction { * Allocates storage for this function. The storage will be passed to every call to #execute. * If the function does not keep track of any state, this does not have to be implemented. */ - virtual void *init_storage(Pools &pools) const; + virtual void *init_storage(LocalAllocator &allocator) const; /** * Destruct the storage created in #init_storage. */ - virtual void destruct_storage(void *storage, Pools &pools) const; + virtual void destruct_storage(void *storage, LocalAllocator &allocator) const; /** * Calls `fn` with the input indices that the given `output_index` may depend on. By default diff --git a/source/blender/functions/FN_lazy_function_execute.hh b/source/blender/functions/FN_lazy_function_execute.hh index 75d522754f0..c604ba19deb 100644 --- a/source/blender/functions/FN_lazy_function_execute.hh +++ b/source/blender/functions/FN_lazy_function_execute.hh @@ -85,16 +85,16 @@ inline void execute_lazy_function_eagerly_impl( ...); output_usages.fill(ValueUsage::Used); set_outputs.fill(false); - LocalMemoryPools local_pools; - Pools pools{&local_pools, &local_pools.local()}; + LocalAllocatorSet allocator_set; + LocalAllocator &allocator = allocator_set.local(); Context context; context.user_data = user_data; - context.storage = fn.init_storage(pools); - context.pools = pools; + context.storage = fn.init_storage(allocator); + context.allocator = &allocator; BasicParams params{ fn, input_pointers, output_pointers, input_usages, output_usages, set_outputs}; fn.execute(params, context); - fn.destruct_storage(context.storage, pools); + fn.destruct_storage(context.storage, allocator); /* Make sure all outputs have been computed. */ BLI_assert(!Span(set_outputs).contains(false)); diff --git a/source/blender/functions/FN_lazy_function_graph_executor.hh b/source/blender/functions/FN_lazy_function_graph_executor.hh index ae1b0ac95fe..14f79970425 100644 --- a/source/blender/functions/FN_lazy_function_graph_executor.hh +++ b/source/blender/functions/FN_lazy_function_graph_executor.hh @@ -88,8 +88,8 @@ class GraphExecutor : public LazyFunction { const Logger *logger, const SideEffectProvider *side_effect_provider); - void *init_storage(Pools &pools) const override; - void destruct_storage(void *storage, Pools &pools) const override; + void *init_storage(LocalAllocator &allocator) const override; + void destruct_storage(void *storage, LocalAllocator &allocator) const override; private: void execute_impl(Params ¶ms, const Context &context) const override; diff --git a/source/blender/functions/intern/lazy_function.cc b/source/blender/functions/intern/lazy_function.cc index 71e8ad6b39b..e5d7481f4d9 100644 --- a/source/blender/functions/intern/lazy_function.cc +++ b/source/blender/functions/intern/lazy_function.cc @@ -25,12 +25,12 @@ std::string LazyFunction::output_name(int index) const return outputs_[index].debug_name; } -void *LazyFunction::init_storage(Pools & /*pools*/) const +void *LazyFunction::init_storage(LocalAllocator & /*allocator*/) const { return nullptr; } -void LazyFunction::destruct_storage(void *storage, Pools & /*pools*/) const +void LazyFunction::destruct_storage(void *storage, LocalAllocator & /*allocator*/) const { BLI_assert(storage == nullptr); UNUSED_VARS_NDEBUG(storage); diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index b49d35cf05a..e4ce2848117 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -262,22 +262,20 @@ class Executor { BLI_assert(self_.graph_.node_indices_are_valid()); } - void destruct_state(Pools &pools) + void destruct_state(LocalAllocator &allocator) { if (TaskPool *task_pool = task_pool_.load()) { BLI_task_pool_free(task_pool); } threading::parallel_for(node_states_.index_range(), 1024, [&](const IndexRange range) { - LocalPool<> &local = (range.size() == node_states_.size()) ? *pools.local : - pools.pools->local(); + LocalAllocator &local_allocator = allocator.local(); for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; NodeState &node_state = node_states_[node_index]; - Pools sub_pools = {pools.pools, &local}; - this->destruct_node_state(node, node_state, sub_pools); + this->destruct_node_state(node, node_state, local_allocator); } }); - pools.local->destruct_array(node_states_); + allocator.destruct_free_array(node_states_); } /** @@ -340,45 +338,44 @@ class Executor { void initialize_node_states() { Span nodes = self_.graph_.nodes(); - node_states_ = context_->pools.local->construct_array(nodes.size()); + node_states_ = context_->allocator->allocate_new_array(nodes.size()); /* Construct all node states in parallel. */ threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { - LocalPool<> &allocator = (range.size() == nodes.size()) ? *context_->pools.local : - this->get_local_allocator(); + LocalAllocator &local_allocator = context_->allocator->local(); for (const int i : range) { const Node &node = *nodes[i]; - this->construct_initial_node_state(allocator, node, node_states_[i]); + this->construct_initial_node_state(local_allocator, node, node_states_[i]); } }); } - void construct_initial_node_state(LocalPool<> &allocator, + void construct_initial_node_state(LocalAllocator &allocator, const Node &node, NodeState &node_state) { const Span node_inputs = node.inputs(); const Span node_outputs = node.outputs(); - node_state.inputs = allocator.construct_array(node_inputs.size()); - node_state.outputs = allocator.construct_array(node_outputs.size()); + node_state.inputs = allocator.allocate_new_array(node_inputs.size()); + node_state.outputs = allocator.allocate_new_array(node_outputs.size()); } - void destruct_node_state(const Node &node, NodeState &node_state, Pools &pools) + void destruct_node_state(const Node &node, NodeState &node_state, LocalAllocator &allocator) { if (node.is_function()) { const LazyFunction &fn = static_cast(node).function(); if (node_state.storage != nullptr) { - fn.destruct_storage(node_state.storage, pools); + fn.destruct_storage(node_state.storage, allocator); } } for (const int i : node.inputs().index_range()) { InputState &input_state = node_state.inputs[i]; const InputSocket &input_socket = node.input(i); - this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local); + this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); } - pools.local->destruct_array(node_state.inputs); - pools.local->destruct_array(node_state.outputs); + allocator.destruct_free_array(node_state.inputs); + allocator.destruct_free_array(node_state.outputs); } void schedule_newly_requested_outputs(CurrentTask ¤t_task) @@ -441,14 +438,14 @@ class Executor { * `OutputState.potential_target_sockets`. */ void initialize_static_value_usages(const Span side_effect_nodes, - LocalPool<> &allocator) + LocalAllocator &allocator) { const Span all_nodes = self_.graph_.nodes(); /* Used for a search through all nodes that outputs depend on. */ Stack reachable_nodes_to_check; - MutableSpan reachable_node_flags = allocator.allocate_array(all_nodes.size()); - BLI_SCOPED_DEFER([&]() { allocator.destruct_array(reachable_node_flags); }); + MutableSpan reachable_node_flags = allocator.allocate_new_array(all_nodes.size()); + BLI_SCOPED_DEFER([&]() { allocator.destruct_free_array(reachable_node_flags); }); reachable_node_flags.fill(false); /* Graph outputs are always reachable. */ @@ -529,7 +526,7 @@ class Executor { void forward_newly_provided_inputs(CurrentTask ¤t_task) { - LocalPool<> &allocator = this->get_local_allocator(); + LocalAllocator &allocator = this->get_local_allocator(); for (const int graph_input_index : self_.graph_inputs_.index_range()) { std::atomic &was_loaded = loaded_inputs_[graph_input_index]; if (was_loaded.load()) { @@ -548,7 +545,7 @@ class Executor { } void forward_newly_provided_input(CurrentTask ¤t_task, - LocalPool<> &allocator, + LocalAllocator &allocator, const int graph_input_index, void *input_data) { @@ -702,7 +699,7 @@ class Executor { void run_node_task(const FunctionNode &node, CurrentTask ¤t_task) { NodeState &node_state = node_states_[node.index_in_graph()]; - LocalPool<> &allocator = this->get_local_allocator(); + LocalAllocator &allocator = this->get_local_allocator(); const LazyFunction &fn = node.function(); bool node_needs_execution = false; @@ -768,8 +765,7 @@ class Executor { if (node_needs_execution) { if (!node_state.storage_and_defaults_initialized) { /* Initialize storage. */ - Pools pools{context_->pools.pools, &allocator}; - node_state.storage = fn.init_storage(pools); + node_state.storage = fn.init_storage(allocator); /* Load unlinked inputs. */ for (const int input_index : node.inputs().index_range()) { @@ -843,7 +839,7 @@ class Executor { } } - void finish_node_if_possible(LockedNode &locked_node, LocalPool<> &allocator) + void finish_node_if_possible(LockedNode &locked_node, LocalAllocator &allocator) { const Node &node = locked_node.node; NodeState &node_state = locked_node.node_state; @@ -881,8 +877,7 @@ class Executor { if (node_state.storage != nullptr) { if (node.is_function()) { const FunctionNode &fn_node = static_cast(node); - Pools pools{context_->pools.pools, &allocator}; - fn_node.function().destruct_storage(node_state.storage, pools); + fn_node.function().destruct_storage(node_state.storage, allocator); } node_state.storage = nullptr; } @@ -890,7 +885,7 @@ class Executor { void destruct_input_value_if_exists(InputState &input_state, const CPPType &type, - LocalPool<> &allocator) + LocalAllocator &allocator) { if (input_state.value != nullptr) { type.destruct(input_state.value); @@ -902,14 +897,14 @@ class Executor { void execute_node(const FunctionNode &node, NodeState &node_state, CurrentTask ¤t_task, - LocalPool<> &allocator); + LocalAllocator &allocator); void set_input_unused_during_execution(const Node &node, NodeState &node_state, const int input_index, CurrentTask ¤t_task) { - LocalPool<> &allocator = this->get_local_allocator(); + LocalAllocator &allocator = this->get_local_allocator(); const InputSocket &input_socket = node.input(input_index); this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { this->set_input_unused(locked_node, input_socket, allocator); @@ -918,7 +913,7 @@ class Executor { void set_input_unused(LockedNode &locked_node, const InputSocket &input_socket, - LocalPool<> &allocator) + LocalAllocator &allocator) { NodeState &node_state = locked_node.node_state; const int input_index = input_socket.index(); @@ -984,7 +979,7 @@ class Executor { CurrentTask ¤t_task) { BLI_assert(value_to_forward.get() != nullptr); - LocalPool<> &allocator = this->get_local_allocator(); + LocalAllocator &allocator = this->get_local_allocator(); const CPPType &type = *value_to_forward.type(); if (self_.logger_ != nullptr) { @@ -1143,12 +1138,12 @@ class Executor { }); } - LocalPool<> &get_local_allocator() + LocalAllocator &get_local_allocator() { if (this->use_multi_threading()) { - return context_->pools.pools->local(); + return context_->allocator->local(); } - return *context_->pools.local; + return *context_->allocator; } }; @@ -1197,7 +1192,7 @@ class GraphExecutorLFParams final : public Params { OutputState &output_state = node_state_.outputs[index]; BLI_assert(!output_state.has_been_computed); if (output_state.value == nullptr) { - LocalPool<> &allocator = executor_.get_local_allocator(); + LocalAllocator &allocator = executor_.get_local_allocator(); const CPPType &type = node_.output(index).type(); output_state.value = allocator.allocate(type.size(), type.alignment()); } @@ -1247,14 +1242,14 @@ class GraphExecutorLFParams final : public Params { inline void Executor::execute_node(const FunctionNode &node, NodeState &node_state, CurrentTask ¤t_task, - LocalPool<> &allocator) + LocalAllocator &allocator) { const LazyFunction &fn = node.function(); GraphExecutorLFParams node_params{fn, *this, node, node_state, current_task}; BLI_assert(context_ != nullptr); Context fn_context = *context_; fn_context.storage = node_state.storage; - fn_context.pools.local = &allocator; + fn_context.allocator = &allocator; if (self_.logger_ != nullptr) { self_.logger_->log_before_node_execute(node, node_params, fn_context); @@ -1311,17 +1306,17 @@ void GraphExecutor::execute_impl(Params ¶ms, const Context &context) const executor.execute(params, context); } -void *GraphExecutor::init_storage(Pools &pools) const +void *GraphExecutor::init_storage(LocalAllocator &allocator) const { - Executor &executor = *pools.local->construct(*this).release(); + Executor &executor = allocator.allocate_new(*this); return &executor; } -void GraphExecutor::destruct_storage(void *storage, Pools &pools) const +void GraphExecutor::destruct_storage(void *storage, LocalAllocator &allocator) const { Executor *executor = static_cast(storage); - executor->destruct_state(pools); - pools.local->destruct(executor); + executor->destruct_state(allocator); + allocator.destruct_free(executor); } void GraphExecutorLogger::log_socket_value(const Socket &socket, diff --git a/source/blender/modifiers/intern/MOD_nodes.cc b/source/blender/modifiers/intern/MOD_nodes.cc index d645fb8f2e6..3aa225f9a15 100644 --- a/source/blender/modifiers/intern/MOD_nodes.cc +++ b/source/blender/modifiers/intern/MOD_nodes.cc @@ -1162,11 +1162,8 @@ static GeometrySet compute_geometry( blender::bke::ModifierComputeContext modifier_compute_context{nullptr, nmd->modifier.name}; user_data.compute_context = &modifier_compute_context; - blender::LocalMemoryPools local_pools; - blender::Pools pools; - pools.pools = &local_pools; - pools.local = &local_pools.local(); - blender::LocalPool<> &allocator = *pools.local; + blender::LocalAllocatorSet allocator_set; + blender::LocalAllocator &allocator = allocator_set.local(); Vector inputs_to_destruct; @@ -1208,9 +1205,9 @@ static GeometrySet compute_geometry( } lf::Context lf_context; - lf_context.storage = graph_executor.init_storage(pools); + lf_context.storage = graph_executor.init_storage(allocator); lf_context.user_data = &user_data; - lf_context.pools = pools; + lf_context.allocator = &allocator; lf::BasicParams lf_params{graph_executor, param_inputs, param_outputs, @@ -1218,7 +1215,7 @@ static GeometrySet compute_geometry( param_output_usages, param_set_outputs}; graph_executor.execute(lf_params, lf_context); - graph_executor.destruct_storage(lf_context.storage, pools); + graph_executor.destruct_storage(lf_context.storage, allocator); for (GMutablePointer &ptr : inputs_to_destruct) { ptr.destruct(); diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 9c83c0bcf16..3aebc8a4b6e 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -806,18 +806,18 @@ class LazyFunctionForGroupNode : public LazyFunction { graph_executor_->execute(params, group_context); } - void *init_storage(Pools &pools) const override + void *init_storage(LocalAllocator &allocator) const override { - Storage *s = pools.local->construct().release(); - s->graph_executor_storage = graph_executor_->init_storage(pools); - return s; + Storage &s = allocator.allocate_new(); + s.graph_executor_storage = graph_executor_->init_storage(allocator); + return &s; } - void destruct_storage(void *storage, Pools &pools) const override + void destruct_storage(void *storage, LocalAllocator &allocator) const override { Storage *s = static_cast(storage); - graph_executor_->destruct_storage(s->graph_executor_storage, pools); - pools.local->destruct(s); + graph_executor_->destruct_storage(s->graph_executor_storage, allocator); + allocator.destruct_free(s); } std::string name() const override -- 2.30.2 From ff3273357fc005c0e2aa7a257703b170f62fcf3b Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 17:09:50 +0100 Subject: [PATCH 17/34] progress --- source/blender/blenlib/BLI_local_allocator.hh | 84 +++++++++++++++++-- .../intern/lazy_function_graph_executor.cc | 4 +- 2 files changed, 80 insertions(+), 8 deletions(-) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index a6dd2c29504..7779bb92706 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -29,6 +29,12 @@ class LocalAllocator : NonCopyable, NonMovable { int64_t alignment = -1; }; + struct Head { + int64_t buffer_size; + int64_t buffer_alignment; + }; + static_assert(is_power_of_2_constexpr(sizeof(Head))); + std::array small_stacks_; Map large_stacks_; @@ -41,20 +47,24 @@ class LocalAllocator : NonCopyable, NonMovable { bool is_local() const; LocalAllocator &local(); + LocalAllocatorSet &owner_set(); - void *allocate(const int64_t size, const int64_t alignment); - void deallocate(const void *buffer, const int64_t size, const int64_t alignment); + void *allocate(int64_t size, int64_t alignment); + void deallocate(const void *buffer, int64_t size, int64_t alignment); + + void *allocate_with_head(int64_t size, int64_t alignment); + void deallocate_with_head(const void *buffer); template T &allocate_new(Args &&...args); template void destruct_free(const T *value); - template MutableSpan allocate_array(const int64_t size); + template MutableSpan allocate_array(int64_t size); template - MutableSpan allocate_new_array(const int64_t size, Args &&...args); + MutableSpan allocate_new_array(int64_t size, Args &&...args); template void destruct_free_array(Span data); template void destruct_free_array(MutableSpan data); private: - BufferStack &get_buffer_stack(const int64_t size, const int64_t alignment); + BufferStack &get_buffer_stack(int64_t size, int64_t alignment); }; class LocalAllocatorSet : NonCopyable, NonMovable { @@ -68,6 +78,48 @@ class LocalAllocatorSet : NonCopyable, NonMovable { LocalAllocator &local(); }; +class ThreadedLocalAllocatorRef { + private: + LocalAllocatorSet &allocator_set_; + + public: + ThreadedLocalAllocatorRef(LocalAllocator &allocator) : allocator_set_(allocator.owner_set()) + { + } + + void *allocate(const size_t size, const size_t alignment, const char * /*name*/) + { + LocalAllocator &allocator = allocator_set_.local(); + return allocator.allocate_with_head(size, alignment); + } + + void deallocate(void *ptr) + { + LocalAllocator &allocator = allocator_set_.local(); + allocator.deallocate_with_head(ptr); + } +}; + +class LocalAllocatorRef { + private: + LocalAllocator &allocator_; + + public: + LocalAllocatorRef(LocalAllocator &allocator) : allocator_(allocator) + { + } + + void *allocate(const size_t size, const size_t alignment, const char * /*name*/) + { + return allocator_.allocate_with_head(size, alignment); + } + + void deallocate(void *ptr) + { + allocator_.deallocate_with_head(ptr); + } +}; + inline bool LocalAllocator::is_local() const { return this == &owner_set_.local(); @@ -78,6 +130,11 @@ inline LocalAllocator &LocalAllocator::local() return owner_set_.local(); } +inline LocalAllocatorSet &LocalAllocator::owner_set() +{ + return owner_set_; +} + inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) { BLI_assert(size > 0); @@ -138,6 +195,23 @@ inline LocalAllocator::BufferStack &LocalAllocator::get_buffer_stack(const int64 }); } +inline void *LocalAllocator::allocate_with_head(int64_t size, int64_t alignment) +{ + const int64_t buffer_size = size + std::max(alignment, sizeof(Head)); + const int64_t buffer_alignment = std::max(alignment, alignof(Head)); + void *buffer = this->allocate(buffer_size, buffer_alignment); + Head *head = new (buffer) Head; + head->buffer_size = buffer_size; + head->buffer_alignment = buffer_alignment; + return head + 1; +} + +inline void LocalAllocator::deallocate_with_head(const void *buffer) +{ + const Head *head = static_cast(buffer) - 1; + this->deallocate(head, head->buffer_size, head->buffer_alignment); +} + template inline T &LocalAllocator::allocate_new(Args &&...args) { void *buffer = this->allocate(sizeof(T), alignof(T)); diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index e4ce2848117..7059a558c6f 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -444,9 +444,7 @@ class Executor { /* Used for a search through all nodes that outputs depend on. */ Stack reachable_nodes_to_check; - MutableSpan reachable_node_flags = allocator.allocate_new_array(all_nodes.size()); - BLI_SCOPED_DEFER([&]() { allocator.destruct_free_array(reachable_node_flags); }); - reachable_node_flags.fill(false); + Array reachable_node_flags{all_nodes.size(), false, allocator}; /* Graph outputs are always reachable. */ for (const InputSocket *socket : self_.graph_outputs_) { -- 2.30.2 From 8dd83536d37ecca569c41f1081aa98adfd0ba54a Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 17:19:05 +0100 Subject: [PATCH 18/34] progress --- source/blender/functions/intern/lazy_function_graph_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 7059a558c6f..05657ac99f2 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -443,7 +443,7 @@ class Executor { const Span all_nodes = self_.graph_.nodes(); /* Used for a search through all nodes that outputs depend on. */ - Stack reachable_nodes_to_check; + Stack reachable_nodes_to_check{allocator}; Array reachable_node_flags{all_nodes.size(), false, allocator}; /* Graph outputs are always reachable. */ -- 2.30.2 From e8e79833f9260fa2c73961df2034d5f3cea77548 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 17:37:51 +0100 Subject: [PATCH 19/34] add debug utilities --- source/blender/blenlib/BLI_local_allocator.hh | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index 7779bb92706..dfddfdcd8ee 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -13,6 +13,9 @@ #include "BLI_utility_mixins.hh" #include "BLI_vector.hh" +// #define BLI_LOCAL_ALLOCATOR_USE_GUARDED +// #define BLI_LOCAL_ALLOCATOR_DEBUG_SIZES + namespace blender { class LocalAllocatorSet; @@ -71,6 +74,13 @@ class LocalAllocatorSet : NonCopyable, NonMovable { private: threading::EnumerableThreadSpecific allocator_by_thread_; +#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES + std::mutex debug_sizes_mutex_; + Map> debug_sizes_; +#endif + + friend LocalAllocator; + public: LocalAllocatorSet(); ~LocalAllocatorSet(); @@ -143,6 +153,10 @@ inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignmen BLI_assert(is_power_of_2_i(alignment)); BLI_assert(this->is_local()); +#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED + return MEM_mallocN_aligned(size, alignment, __func__); +#endif + BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); BLI_assert(buffer_stack.element_size >= size); BLI_assert(buffer_stack.alignment >= alignment); @@ -155,6 +169,14 @@ inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignmen else { buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.alignment); } + +#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES + { + std::lock_guard lock{owner_set_.debug_sizes_mutex_}; + owner_set_.debug_sizes_.add_new(buffer, {size, alignment}); + } +#endif + return buffer; } @@ -168,6 +190,25 @@ inline void LocalAllocator::deallocate(const void *buffer, BLI_assert(is_power_of_2_i(alignment)); BLI_assert(this->is_local()); +#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED + MEM_freeN(const_cast(buffer)); + UNUSED_VARS_NDEBUG(size, alignment); + return; +#endif + +#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES + { + std::lock_guard lock{owner_set_.debug_sizes_mutex_}; + auto [last_size, last_alignment] = owner_set_.debug_sizes_.pop(buffer); + if (last_size != size) { + BLI_assert_unreachable(); + } + if (last_alignment != alignment) { + BLI_assert_unreachable(); + } + } +#endif + #ifdef DEBUG memset(const_cast(buffer), -1, size); #endif -- 2.30.2 From a5bf084fb6146fa608344f673897a1a5baba7264 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 18:43:24 +0100 Subject: [PATCH 20/34] progress --- source/blender/blenlib/BLI_local_allocator.hh | 8 +- .../functions/FN_multi_function_context.hh | 22 ++-- .../functions/intern/multi_function.cc | 3 +- .../multi_function_procedure_executor.cc | 124 ++++++++++++------ .../intern/geometry_nodes_lazy_function.cc | 29 ++-- 5 files changed, 120 insertions(+), 66 deletions(-) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index dfddfdcd8ee..4c017e8412c 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -145,7 +145,7 @@ inline LocalAllocatorSet &LocalAllocator::owner_set() return owner_set_; } -inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) +BLI_NOINLINE inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) { BLI_assert(size > 0); BLI_assert(alignment <= size); @@ -180,9 +180,9 @@ inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignmen return buffer; } -inline void LocalAllocator::deallocate(const void *buffer, - const int64_t size, - const int64_t alignment) +BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer, + const int64_t size, + const int64_t alignment) { BLI_assert(size > 0); BLI_assert(alignment <= size); diff --git a/source/blender/functions/FN_multi_function_context.hh b/source/blender/functions/FN_multi_function_context.hh index af5efb4cf88..0be768d7df5 100644 --- a/source/blender/functions/FN_multi_function_context.hh +++ b/source/blender/functions/FN_multi_function_context.hh @@ -12,24 +12,30 @@ * - Pass cached data to called functions. */ +#include "BLI_local_allocator.hh" #include "BLI_utildefines.h" -#include "BLI_map.hh" - namespace blender::fn { class MFContext; class MFContextBuilder { private: - Map global_contexts_; + std::unique_ptr allocator_set_; + LocalAllocator *allocator_; friend MFContext; public: - template void add_global_context(std::string name, const T *context) + MFContextBuilder(LocalAllocator *allocator = nullptr) { - global_contexts_.add_new(std::move(name), static_cast(context)); + if (allocator) { + allocator_ = allocator; + } + else { + allocator_set_ = std::make_unique(); + allocator_ = &allocator_set_->local(); + } } }; @@ -42,11 +48,9 @@ class MFContext { { } - template const T *get_global_context(StringRef name) const + LocalAllocator &allocator() { - const void *context = builder_.global_contexts_.lookup_default_as(name, nullptr); - /* TODO: Implement type checking. */ - return static_cast(context); + return *builder_.allocator_; } }; diff --git a/source/blender/functions/intern/multi_function.cc b/source/blender/functions/intern/multi_function.cc index c05087a4c2d..de25b5a0cd8 100644 --- a/source/blender/functions/intern/multi_function.cc +++ b/source/blender/functions/intern/multi_function.cc @@ -127,7 +127,8 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context } } - this->call(offset_mask, offset_params, context); + MFContextBuilder sub_context{&context.allocator().local()}; + this->call(offset_mask, offset_params, sub_context); }); } diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index 4fe3c27ea27..5a1abf6dd8b 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -132,32 +132,72 @@ class ValueAllocator : NonCopyable, NonMovable { static constexpr inline int min_alignment = 64; /** All buffers in the free-lists below have been allocated with this allocator. */ - LinearAllocator<> &linear_allocator_; + LocalAllocator &local_allocator_; + + int array_size_; /** * Use stacks so that the most recently used buffers are reused first. This improves cache * efficiency. */ - std::array, tot_variable_value_types> variable_value_free_lists_; + std::array, tot_variable_value_types> variable_value_free_lists_; /** * The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are * aligned to #min_alignment bytes. */ - Stack small_span_buffers_free_list_; - Map> span_buffers_free_lists_; + Vector small_span_buffers_free_list_; + Map> span_buffers_free_lists_; /** Cache buffers for single values of different types. */ static constexpr inline int small_value_max_size = 16; static constexpr inline int small_value_max_alignment = 8; - Stack small_single_value_free_list_; - Map> single_value_free_lists_; + Vector small_single_value_free_list_; + Map> single_value_free_lists_; public: - ValueAllocator(LinearAllocator<> &linear_allocator) : linear_allocator_(linear_allocator) + ValueAllocator(LocalAllocator &local_allocator, const int array_size) + : local_allocator_(local_allocator), array_size_(array_size) { } + ~ValueAllocator() + { + this->deallocate_variable_values(); + this->deallocate_variable_values(); + this->deallocate_variable_values(); + this->deallocate_variable_values(); + this->deallocate_variable_values(); + this->deallocate_variable_values(); + + for (void *buffer : small_span_buffers_free_list_) { + local_allocator_.deallocate(buffer, small_value_max_size, small_value_max_alignment); + } + for (const auto item : span_buffers_free_lists_.items()) { + const int element_size = item.key; + for (const void *buffer : item.value) { + local_allocator_.deallocate(buffer, element_size * array_size_, min_alignment); + } + } + + for (void *buffer : small_single_value_free_list_) { + local_allocator_.deallocate(buffer, small_value_max_size, small_value_max_alignment); + } + for (const auto item : single_value_free_lists_.items()) { + const CPPType &type = *item.key; + for (const void *buffer : item.value) { + local_allocator_.deallocate(buffer, type.size(), type.alignment()); + } + } + } + + template void deallocate_variable_values() + { + for (VariableValue *value : variable_value_free_lists_[int(T::static_type)]) { + local_allocator_.deallocate(value, sizeof(T), alignof(T)); + } + } + VariableValue_GVArray *obtain_GVArray(const GVArray &varray) { return this->obtain(varray); @@ -173,7 +213,7 @@ class ValueAllocator : NonCopyable, NonMovable { return this->obtain(buffer, false); } - VariableValue_Span *obtain_Span(const CPPType &type, int size) + VariableValue_Span *obtain_Span(const CPPType &type) { void *buffer = nullptr; @@ -182,20 +222,20 @@ class ValueAllocator : NonCopyable, NonMovable { if (alignment > min_alignment) { /* In this rare case we fallback to not reusing existing buffers. */ - buffer = linear_allocator_.allocate(element_size * size, alignment); + buffer = local_allocator_.allocate(element_size * array_size_, alignment); } else { - Stack *stack = type.can_exist_in_buffer(small_value_max_size, - small_value_max_alignment) ? - &small_span_buffers_free_list_ : - span_buffers_free_lists_.lookup_ptr(element_size); + Vector *stack = type.can_exist_in_buffer(small_value_max_size, + small_value_max_alignment) ? + &small_span_buffers_free_list_ : + span_buffers_free_lists_.lookup_ptr(element_size); if (stack == nullptr || stack->is_empty()) { - buffer = linear_allocator_.allocate( - std::max(element_size, small_value_max_size) * size, min_alignment); + buffer = local_allocator_.allocate( + std::max(element_size, small_value_max_size) * array_size_, min_alignment); } else { /* Reuse existing buffer. */ - buffer = stack->pop(); + buffer = stack->pop_last(); } } @@ -207,9 +247,9 @@ class ValueAllocator : NonCopyable, NonMovable { return this->obtain(data, false); } - VariableValue_GVectorArray *obtain_GVectorArray(const CPPType &type, int size) + VariableValue_GVectorArray *obtain_GVectorArray(const CPPType &type) { - GVectorArray *vector_array = new GVectorArray(type, size); + GVectorArray *vector_array = new GVectorArray(type, array_size_); return this->obtain(*vector_array, true); } @@ -217,16 +257,16 @@ class ValueAllocator : NonCopyable, NonMovable { { const bool is_small = type.can_exist_in_buffer(small_value_max_size, small_value_max_alignment); - Stack &stack = is_small ? small_single_value_free_list_ : - single_value_free_lists_.lookup_or_add_default(&type); + Vector &stack = is_small ? small_single_value_free_list_ : + single_value_free_lists_.lookup_or_add_default(&type); void *buffer; if (stack.is_empty()) { - buffer = linear_allocator_.allocate( + buffer = local_allocator_.allocate( std::max(small_value_max_size, type.size()), std::max(small_value_max_alignment, type.alignment())); } else { - buffer = stack.pop(); + buffer = stack.pop_last(); } return this->obtain(buffer); } @@ -248,11 +288,12 @@ class ValueAllocator : NonCopyable, NonMovable { if (value_typed->owned) { const CPPType &type = data_type.single_type(); /* Assumes all values in the buffer are uninitialized already. */ - Stack &buffers = type.can_exist_in_buffer(small_value_max_size, - small_value_max_alignment) ? - small_span_buffers_free_list_ : - span_buffers_free_lists_.lookup_or_add_default(type.size()); - buffers.push(value_typed->data); + Vector &buffers = type.can_exist_in_buffer(small_value_max_size, + small_value_max_alignment) ? + small_span_buffers_free_list_ : + span_buffers_free_lists_.lookup_or_add_default( + type.size()); + buffers.append(value_typed->data); } break; } @@ -275,10 +316,10 @@ class ValueAllocator : NonCopyable, NonMovable { const bool is_small = type.can_exist_in_buffer(small_value_max_size, small_value_max_alignment); if (is_small) { - small_single_value_free_list_.push(value_typed->data); + small_single_value_free_list_.append(value_typed->data); } else { - single_value_free_lists_.lookup_or_add_default(&type).push(value_typed->data); + single_value_free_lists_.lookup_or_add_default(&type).append(value_typed->data); } break; } @@ -289,20 +330,20 @@ class ValueAllocator : NonCopyable, NonMovable { } } - Stack &stack = variable_value_free_lists_[int(value->type)]; - stack.push(value); + Vector &stack = variable_value_free_lists_[int(value->type)]; + stack.append(value); } private: template T *obtain(Args &&...args) { static_assert(std::is_base_of_v); - Stack &stack = variable_value_free_lists_[int(T::static_type)]; + Vector &stack = variable_value_free_lists_[int(T::static_type)]; if (stack.is_empty()) { - void *buffer = linear_allocator_.allocate(sizeof(T), alignof(T)); + void *buffer = local_allocator_.allocate(sizeof(T), alignof(T)); return new (buffer) T(std::forward(args)...); } - return new (stack.pop()) T(std::forward(args)...); + return new (stack.pop_last()) T(std::forward(args)...); } }; @@ -414,7 +455,7 @@ class VariableState : NonCopyable, NonMovable { const CPPType &type = data_type.single_type(); VariableValue_Span *new_value = nullptr; if (caller_provided_storage_ == nullptr) { - new_value = value_allocator.obtain_Span(type, array_size); + new_value = value_allocator.obtain_Span(type); } else { /* Reuse the storage provided caller when possible. */ @@ -445,7 +486,7 @@ class VariableState : NonCopyable, NonMovable { const CPPType &type = data_type.vector_base_type(); VariableValue_GVectorArray *new_value = nullptr; if (caller_provided_storage_ == nullptr) { - new_value = value_allocator.obtain_GVectorArray(type, array_size); + new_value = value_allocator.obtain_GVectorArray(type); } else { new_value = value_allocator.obtain_GVectorArray_not_owned( @@ -829,10 +870,10 @@ class VariableStates { IndexMask full_mask_; public: - VariableStates(LinearAllocator<> &linear_allocator, + VariableStates(LocalAllocator &local_allocator, const MFProcedure &procedure, IndexMask full_mask) - : value_allocator_(linear_allocator), + : value_allocator_(local_allocator, full_mask.min_array_size()), procedure_(procedure), variable_states_(procedure.variables().size()), full_mask_(full_mask) @@ -1178,11 +1219,8 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c { BLI_assert(procedure_.validate()); - AlignedBuffer<512, 64> local_buffer; - LinearAllocator<> linear_allocator; - linear_allocator.provide_buffer(local_buffer); - - VariableStates variable_states{linear_allocator, procedure_, full_mask}; + LocalAllocator &local_allocator = context.allocator(); + VariableStates variable_states{local_allocator, procedure_, full_mask}; variable_states.add_initial_variable_states(*this, procedure_, params); InstructionScheduler scheduler; diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 3aebc8a4b6e..ae21b83d604 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -304,6 +304,7 @@ class LazyFunctionForUndefinedNode : public LazyFunction { * values. If any input is a field, the outputs will also be fields. */ static void execute_multi_function_on_value_or_field( + LocalAllocator &allocator, const MultiFunction &fn, const std::shared_ptr &owned_fn, const Span input_types, @@ -354,7 +355,7 @@ static void execute_multi_function_on_value_or_field( else { /* In this case, the multi-function is evaluated directly. */ MFParamsBuilder params{fn, 1}; - MFContextBuilder context; + MFContextBuilder context{&allocator}; for (const int i : input_types.index_range()) { const ValueOrFieldCPPType &type = *input_types[i]; @@ -412,7 +413,7 @@ class LazyFunctionForMutedNode : public LazyFunction { } } - void execute_impl(lf::Params ¶ms, const lf::Context & /*context*/) const override + void execute_impl(lf::Params ¶ms, const lf::Context &context) const override { for (const int output_i : outputs_.index_range()) { if (params.output_was_set(output_i)) { @@ -446,8 +447,13 @@ class LazyFunctionForMutedNode : public LazyFunction { if (conversions.is_convertible(from_type->value, to_type->value)) { const MultiFunction &multi_fn = *conversions.get_conversion_multi_function( MFDataType::ForSingle(from_type->value), MFDataType::ForSingle(to_type->value)); - execute_multi_function_on_value_or_field( - multi_fn, {}, {from_type}, {to_type}, {input_value}, {output_value}); + execute_multi_function_on_value_or_field(*context.allocator, + multi_fn, + {}, + {from_type}, + {to_type}, + {input_value}, + {output_value}); } params.output_set(output_i); continue; @@ -480,7 +486,7 @@ class LazyFunctionForMultiFunctionConversion : public LazyFunction { outputs_.append({"To", to.self}); } - void execute_impl(lf::Params ¶ms, const lf::Context & /*context*/) const override + void execute_impl(lf::Params ¶ms, const lf::Context &context) const override { const void *from_value = params.try_get_input_data_ptr(0); void *to_value = params.get_output_data_ptr(0); @@ -488,7 +494,7 @@ class LazyFunctionForMultiFunctionConversion : public LazyFunction { BLI_assert(to_value != nullptr); execute_multi_function_on_value_or_field( - fn_, {}, {&from_type_}, {&to_type_}, {from_value}, {to_value}); + *context.allocator, fn_, {}, {&from_type_}, {&to_type_}, {from_value}, {to_value}); params.output_set(0); } @@ -521,7 +527,7 @@ class LazyFunctionForMultiFunctionNode : public LazyFunction { } } - void execute_impl(lf::Params ¶ms, const lf::Context & /*context*/) const override + void execute_impl(lf::Params ¶ms, const lf::Context &context) const override { Vector input_values(inputs_.size()); Vector output_values(outputs_.size()); @@ -531,8 +537,13 @@ class LazyFunctionForMultiFunctionNode : public LazyFunction { for (const int i : outputs_.index_range()) { output_values[i] = params.get_output_data_ptr(i); } - execute_multi_function_on_value_or_field( - *fn_item_.fn, fn_item_.owned_fn, input_types_, output_types_, input_values, output_values); + execute_multi_function_on_value_or_field(*context.allocator, + *fn_item_.fn, + fn_item_.owned_fn, + input_types_, + output_types_, + input_values, + output_values); for (const int i : outputs_.index_range()) { params.output_set(i); } -- 2.30.2 From 609e9c42ca848dea5fea5110bad300b28775f751 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 18:59:24 +0100 Subject: [PATCH 21/34] fix --- .../functions/intern/multi_function_procedure_executor.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index 5a1abf6dd8b..0f6876b0709 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -171,7 +171,8 @@ class ValueAllocator : NonCopyable, NonMovable { this->deallocate_variable_values(); for (void *buffer : small_span_buffers_free_list_) { - local_allocator_.deallocate(buffer, small_value_max_size, small_value_max_alignment); + local_allocator_.deallocate( + buffer, small_value_max_size * array_size_, small_value_max_alignment); } for (const auto item : span_buffers_free_lists_.items()) { const int element_size = item.key; -- 2.30.2 From 093ade946fa2e05de6b38633124762683ca2b821 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 19:18:20 +0100 Subject: [PATCH 22/34] progress --- source/blender/blenlib/BLI_local_allocator.hh | 136 ++++++++++-------- .../blender/blenlib/intern/local_allocator.cc | 8 +- 2 files changed, 80 insertions(+), 64 deletions(-) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index 4c017e8412c..4095cdd726a 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -19,6 +19,17 @@ namespace blender { class LocalAllocatorSet; +class LocalAllocator; +class LocalAllocatorPool; + +class LocalAllocatorPool : NonCopyable, NonMovable { + private: + Stack buffers; + int64_t element_size = -1; + int64_t alignment = -1; + + friend LocalAllocator; +}; class LocalAllocator : NonCopyable, NonMovable { private: @@ -26,20 +37,14 @@ class LocalAllocator : NonCopyable, NonMovable { LocalAllocatorSet &owner_set_; LinearAllocator<> linear_allocator_; - struct BufferStack { - Stack stack; - int64_t element_size = -1; - int64_t alignment = -1; - }; - struct Head { int64_t buffer_size; int64_t buffer_alignment; }; static_assert(is_power_of_2_constexpr(sizeof(Head))); - std::array small_stacks_; - Map large_stacks_; + std::array small_buffer_pools_; + Map> large_buffer_pools_; friend LocalAllocatorSet; @@ -55,9 +60,14 @@ class LocalAllocator : NonCopyable, NonMovable { void *allocate(int64_t size, int64_t alignment); void deallocate(const void *buffer, int64_t size, int64_t alignment); + void *allocate(LocalAllocatorPool &pool); + void deallocate(const void *buffer, LocalAllocatorPool &pool); + void *allocate_with_head(int64_t size, int64_t alignment); void deallocate_with_head(const void *buffer); + LocalAllocatorPool &get_pool(int64_t size, int64_t alignment); + template T &allocate_new(Args &&...args); template void destruct_free(const T *value); template MutableSpan allocate_array(int64_t size); @@ -65,9 +75,6 @@ class LocalAllocator : NonCopyable, NonMovable { MutableSpan allocate_new_array(int64_t size, Args &&...args); template void destruct_free_array(Span data); template void destruct_free_array(MutableSpan data); - - private: - BufferStack &get_buffer_stack(int64_t size, int64_t alignment); }; class LocalAllocatorSet : NonCopyable, NonMovable { @@ -76,7 +83,7 @@ class LocalAllocatorSet : NonCopyable, NonMovable { #ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES std::mutex debug_sizes_mutex_; - Map> debug_sizes_; + Map debug_sizes_; #endif friend LocalAllocator; @@ -147,47 +154,53 @@ inline LocalAllocatorSet &LocalAllocator::owner_set() BLI_NOINLINE inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) { - BLI_assert(size > 0); - BLI_assert(alignment <= size); - BLI_assert(alignment <= s_alignment); - BLI_assert(is_power_of_2_i(alignment)); - BLI_assert(this->is_local()); + LocalAllocatorPool &pool = this->get_pool(size, alignment); + BLI_assert(pool.element_size >= size); + BLI_assert(pool.alignment >= alignment); -#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED - return MEM_mallocN_aligned(size, alignment, __func__); -#endif - - BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); - BLI_assert(buffer_stack.element_size >= size); - BLI_assert(buffer_stack.alignment >= alignment); - - void *buffer; - if (!buffer_stack.stack.is_empty()) { - buffer = buffer_stack.stack.pop(); - BLI_asan_unpoison(buffer, size); - } - else { - buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.alignment); - } - -#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES - { - std::lock_guard lock{owner_set_.debug_sizes_mutex_}; - owner_set_.debug_sizes_.add_new(buffer, {size, alignment}); - } -#endif - - return buffer; + return this->allocate(pool); } BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer, const int64_t size, const int64_t alignment) { - BLI_assert(size > 0); - BLI_assert(alignment <= size); - BLI_assert(alignment <= s_alignment); - BLI_assert(is_power_of_2_i(alignment)); + LocalAllocatorPool &pool = this->get_pool(size, alignment); + BLI_assert(pool.element_size >= size); + BLI_assert(pool.alignment >= alignment); + + this->deallocate(buffer, pool); +} + +inline void *LocalAllocator::allocate(LocalAllocatorPool &pool) +{ + BLI_assert(this->is_local()); + +#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED + return MEM_mallocN_aligned(size, alignment, __func__); +#endif + + void *buffer; + if (!pool.buffers.is_empty()) { + buffer = pool.buffers.pop(); + BLI_asan_unpoison(buffer, pool.element_size); + } + else { + buffer = linear_allocator_.allocate(pool.element_size, pool.alignment); + } + +#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES + { + std::lock_guard lock{owner_set_.debug_sizes_mutex_}; + owner_set_.debug_sizes_.add_new(buffer, pool.element_size); + } +#endif + + return buffer; +} + +inline void LocalAllocator::deallocate(const void *buffer, LocalAllocatorPool &pool) +{ BLI_assert(this->is_local()); #ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED @@ -210,29 +223,32 @@ BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer, #endif #ifdef DEBUG - memset(const_cast(buffer), -1, size); + memset(const_cast(buffer), -1, pool.element_size); #endif - BLI_asan_poison(buffer, size); - BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); - BLI_assert(buffer_stack.element_size >= size); - BLI_assert(buffer_stack.alignment >= alignment); + BLI_asan_poison(buffer, pool.element_size); - buffer_stack.stack.push(const_cast(buffer)); + pool.buffers.push(const_cast(buffer)); } -inline LocalAllocator::BufferStack &LocalAllocator::get_buffer_stack(const int64_t size, - const int64_t /*alignment*/) +inline LocalAllocatorPool &LocalAllocator::get_pool(const int64_t size, const int64_t alignment) { + BLI_assert(size > 0); + BLI_assert(alignment <= size); + BLI_assert(alignment <= s_alignment); + BLI_assert(is_power_of_2_i(alignment)); + UNUSED_VARS_NDEBUG(alignment); + + BLI_assert(this->is_local()); if (size <= 64) { - return small_stacks_[(size - 1) >> 3]; + return small_buffer_pools_[(size - 1) >> 3]; } const int key = bitscan_reverse_uint64(uint64_t(size)); - return large_stacks_.lookup_or_add_cb(key, [&]() { - BufferStack buffer_stack; - buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key); - buffer_stack.alignment = s_alignment; - return buffer_stack; + return *large_buffer_pools_.lookup_or_add_cb(key, [&]() { + auto pool = std::make_unique(); + pool->element_size = int64_t(1) << (8 * sizeof(int64_t) - key); + pool->alignment = s_alignment; + return pool; }); } diff --git a/source/blender/blenlib/intern/local_allocator.cc b/source/blender/blenlib/intern/local_allocator.cc index 4634da8e31e..1d7e6ecd915 100644 --- a/source/blender/blenlib/intern/local_allocator.cc +++ b/source/blender/blenlib/intern/local_allocator.cc @@ -13,10 +13,10 @@ LocalAllocatorSet::~LocalAllocatorSet() = default; LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set) { - for (const int64_t i : IndexRange(small_stacks_.size())) { - BufferStack &buffer_stack = small_stacks_[i]; - buffer_stack.element_size = 8 * (i + 1); - buffer_stack.alignment = power_of_2_min_u(buffer_stack.element_size); + for (const int64_t i : IndexRange(small_buffer_pools_.size())) { + LocalAllocatorPool &pool = small_buffer_pools_[i]; + pool.element_size = 8 * (i + 1); + pool.alignment = power_of_2_min_u(pool.element_size); } } -- 2.30.2 From 6683b50bb116d84b2e9ca7e84addd0b1db4f04f1 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 19:24:58 +0100 Subject: [PATCH 23/34] progress --- .../multi_function_procedure_executor.cc | 40 ++++++++----------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index 0f6876b0709..d9af56ac3ba 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -140,7 +140,7 @@ class ValueAllocator : NonCopyable, NonMovable { * Use stacks so that the most recently used buffers are reused first. This improves cache * efficiency. */ - std::array, tot_variable_value_types> variable_value_free_lists_; + std::array variable_value_free_lists_; /** * The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are @@ -159,17 +159,22 @@ class ValueAllocator : NonCopyable, NonMovable { ValueAllocator(LocalAllocator &local_allocator, const int array_size) : local_allocator_(local_allocator), array_size_(array_size) { + this->prepare_variable_value_pool(); + this->prepare_variable_value_pool(); + this->prepare_variable_value_pool(); + this->prepare_variable_value_pool(); + this->prepare_variable_value_pool(); + this->prepare_variable_value_pool(); + } + + template void prepare_variable_value_pool() + { + variable_value_free_lists_[int(T::static_type)] = &local_allocator_.get_pool(sizeof(T), + alignof(T)); } ~ValueAllocator() { - this->deallocate_variable_values(); - this->deallocate_variable_values(); - this->deallocate_variable_values(); - this->deallocate_variable_values(); - this->deallocate_variable_values(); - this->deallocate_variable_values(); - for (void *buffer : small_span_buffers_free_list_) { local_allocator_.deallocate( buffer, small_value_max_size * array_size_, small_value_max_alignment); @@ -192,13 +197,6 @@ class ValueAllocator : NonCopyable, NonMovable { } } - template void deallocate_variable_values() - { - for (VariableValue *value : variable_value_free_lists_[int(T::static_type)]) { - local_allocator_.deallocate(value, sizeof(T), alignof(T)); - } - } - VariableValue_GVArray *obtain_GVArray(const GVArray &varray) { return this->obtain(varray); @@ -331,20 +329,16 @@ class ValueAllocator : NonCopyable, NonMovable { } } - Vector &stack = variable_value_free_lists_[int(value->type)]; - stack.append(value); + local_allocator_.deallocate(value, *variable_value_free_lists_[int(value->type)]); } private: template T *obtain(Args &&...args) { static_assert(std::is_base_of_v); - Vector &stack = variable_value_free_lists_[int(T::static_type)]; - if (stack.is_empty()) { - void *buffer = local_allocator_.allocate(sizeof(T), alignof(T)); - return new (buffer) T(std::forward(args)...); - } - return new (stack.pop_last()) T(std::forward(args)...); + void *buffer = static_cast( + local_allocator_.allocate(*variable_value_free_lists_[int(T::static_type)])); + return new (buffer) T(std::forward(args)...); } }; -- 2.30.2 From 067af03ea118459dcd19b5e77cb7b9c0cd173f34 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 19:40:03 +0100 Subject: [PATCH 24/34] progress --- .../multi_function_procedure_executor.cc | 126 +++++++----------- 1 file changed, 47 insertions(+), 79 deletions(-) diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index d9af56ac3ba..aac84a6aa55 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -117,6 +117,21 @@ static_assert(std::is_trivially_destructible_v); static_assert(std::is_trivially_destructible_v); static_assert(std::is_trivially_destructible_v); +static constexpr int64_t max_variable_value_size = int64_t( + std::max({sizeof(VariableValue_GVArray), + sizeof(VariableValue_Span), + sizeof(VariableValue_GVVectorArray), + sizeof(VariableValue_GVectorArray), + sizeof(VariableValue_OneSingle), + sizeof(VariableValue_OneVector)})); +static constexpr int64_t max_variable_value_alignment = int64_t( + std::max({alignof(VariableValue_GVArray), + alignof(VariableValue_Span), + alignof(VariableValue_GVVectorArray), + alignof(VariableValue_GVectorArray), + alignof(VariableValue_OneSingle), + alignof(VariableValue_OneVector)})); + class VariableState; /** @@ -129,7 +144,7 @@ class ValueAllocator : NonCopyable, NonMovable { * Allocate with 64 byte alignment for better reusability of buffers and improved cache * performance. */ - static constexpr inline int min_alignment = 64; + static constexpr inline int s_span_alignment = 64; /** All buffers in the free-lists below have been allocated with this allocator. */ LocalAllocator &local_allocator_; @@ -140,61 +155,29 @@ class ValueAllocator : NonCopyable, NonMovable { * Use stacks so that the most recently used buffers are reused first. This improves cache * efficiency. */ - std::array variable_value_free_lists_; + LocalAllocatorPool *variable_value_pool_; /** * The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are * aligned to #min_alignment bytes. */ - Vector small_span_buffers_free_list_; - Map> span_buffers_free_lists_; + LocalAllocatorPool *small_span_buffers_pool_; /** Cache buffers for single values of different types. */ static constexpr inline int small_value_max_size = 16; static constexpr inline int small_value_max_alignment = 8; - Vector small_single_value_free_list_; - Map> single_value_free_lists_; + LocalAllocatorPool *small_values_pool_; public: ValueAllocator(LocalAllocator &local_allocator, const int array_size) : local_allocator_(local_allocator), array_size_(array_size) { - this->prepare_variable_value_pool(); - this->prepare_variable_value_pool(); - this->prepare_variable_value_pool(); - this->prepare_variable_value_pool(); - this->prepare_variable_value_pool(); - this->prepare_variable_value_pool(); - } - - template void prepare_variable_value_pool() - { - variable_value_free_lists_[int(T::static_type)] = &local_allocator_.get_pool(sizeof(T), - alignof(T)); - } - - ~ValueAllocator() - { - for (void *buffer : small_span_buffers_free_list_) { - local_allocator_.deallocate( - buffer, small_value_max_size * array_size_, small_value_max_alignment); - } - for (const auto item : span_buffers_free_lists_.items()) { - const int element_size = item.key; - for (const void *buffer : item.value) { - local_allocator_.deallocate(buffer, element_size * array_size_, min_alignment); - } - } - - for (void *buffer : small_single_value_free_list_) { - local_allocator_.deallocate(buffer, small_value_max_size, small_value_max_alignment); - } - for (const auto item : single_value_free_lists_.items()) { - const CPPType &type = *item.key; - for (const void *buffer : item.value) { - local_allocator_.deallocate(buffer, type.size(), type.alignment()); - } - } + variable_value_pool_ = &local_allocator.get_pool(max_variable_value_size, + max_variable_value_alignment); + small_span_buffers_pool_ = &local_allocator.get_pool( + std::max(s_span_alignment, small_value_max_size * array_size), s_span_alignment); + small_values_pool_ = &local_allocator.get_pool(small_value_max_size, + small_value_max_alignment); } VariableValue_GVArray *obtain_GVArray(const GVArray &varray) @@ -214,28 +197,15 @@ class ValueAllocator : NonCopyable, NonMovable { VariableValue_Span *obtain_Span(const CPPType &type) { + const bool is_small = type.can_exist_in_buffer(small_value_max_size, + small_value_max_alignment); + void *buffer = nullptr; - - const int64_t element_size = type.size(); - const int64_t alignment = type.alignment(); - - if (alignment > min_alignment) { - /* In this rare case we fallback to not reusing existing buffers. */ - buffer = local_allocator_.allocate(element_size * array_size_, alignment); + if (is_small) { + buffer = local_allocator_.allocate(*small_span_buffers_pool_); } else { - Vector *stack = type.can_exist_in_buffer(small_value_max_size, - small_value_max_alignment) ? - &small_span_buffers_free_list_ : - span_buffers_free_lists_.lookup_ptr(element_size); - if (stack == nullptr || stack->is_empty()) { - buffer = local_allocator_.allocate( - std::max(element_size, small_value_max_size) * array_size_, min_alignment); - } - else { - /* Reuse existing buffer. */ - buffer = stack->pop_last(); - } + buffer = local_allocator_.allocate(type.size() * array_size_, type.alignment()); } return this->obtain(buffer, true); @@ -256,16 +226,12 @@ class ValueAllocator : NonCopyable, NonMovable { { const bool is_small = type.can_exist_in_buffer(small_value_max_size, small_value_max_alignment); - Vector &stack = is_small ? small_single_value_free_list_ : - single_value_free_lists_.lookup_or_add_default(&type); void *buffer; - if (stack.is_empty()) { - buffer = local_allocator_.allocate( - std::max(small_value_max_size, type.size()), - std::max(small_value_max_alignment, type.alignment())); + if (is_small) { + buffer = local_allocator_.allocate(*small_values_pool_); } else { - buffer = stack.pop_last(); + buffer = local_allocator_.allocate(type.size(), type.alignment()); } return this->obtain(buffer); } @@ -286,13 +252,16 @@ class ValueAllocator : NonCopyable, NonMovable { auto *value_typed = static_cast(value); if (value_typed->owned) { const CPPType &type = data_type.single_type(); + const bool is_small = type.can_exist_in_buffer(small_value_max_size, + small_value_max_alignment); /* Assumes all values in the buffer are uninitialized already. */ - Vector &buffers = type.can_exist_in_buffer(small_value_max_size, - small_value_max_alignment) ? - small_span_buffers_free_list_ : - span_buffers_free_lists_.lookup_or_add_default( - type.size()); - buffers.append(value_typed->data); + if (is_small) { + local_allocator_.deallocate(value_typed->data, *small_span_buffers_pool_); + } + else { + local_allocator_.deallocate( + value_typed->data, type.size() * array_size_, type.alignment()); + } } break; } @@ -315,10 +284,10 @@ class ValueAllocator : NonCopyable, NonMovable { const bool is_small = type.can_exist_in_buffer(small_value_max_size, small_value_max_alignment); if (is_small) { - small_single_value_free_list_.append(value_typed->data); + local_allocator_.deallocate(value_typed->data, *small_values_pool_); } else { - single_value_free_lists_.lookup_or_add_default(&type).append(value_typed->data); + local_allocator_.deallocate(value_typed->data, type.size(), type.alignment()); } break; } @@ -329,15 +298,14 @@ class ValueAllocator : NonCopyable, NonMovable { } } - local_allocator_.deallocate(value, *variable_value_free_lists_[int(value->type)]); + local_allocator_.deallocate(value, *variable_value_pool_); } private: template T *obtain(Args &&...args) { static_assert(std::is_base_of_v); - void *buffer = static_cast( - local_allocator_.allocate(*variable_value_free_lists_[int(T::static_type)])); + void *buffer = static_cast(local_allocator_.allocate(*variable_value_pool_)); return new (buffer) T(std::forward(args)...); } }; -- 2.30.2 From ff17314372d458347b4c2ac630115c4f647c42f3 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 19:49:35 +0100 Subject: [PATCH 25/34] progress --- source/blender/blenlib/BLI_local_allocator.hh | 8 ++++---- source/blender/functions/FN_field.hh | 13 ++++++++++--- source/blender/functions/intern/field.cc | 17 ++++++++++------- source/blender/functions/tests/FN_field_test.cc | 2 +- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index 4095cdd726a..aed7f32a4ab 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -152,7 +152,7 @@ inline LocalAllocatorSet &LocalAllocator::owner_set() return owner_set_; } -BLI_NOINLINE inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) +inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment) { LocalAllocatorPool &pool = this->get_pool(size, alignment); BLI_assert(pool.element_size >= size); @@ -161,9 +161,9 @@ BLI_NOINLINE inline void *LocalAllocator::allocate(const int64_t size, const int return this->allocate(pool); } -BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer, - const int64_t size, - const int64_t alignment) +inline void LocalAllocator::deallocate(const void *buffer, + const int64_t size, + const int64_t alignment) { LocalAllocatorPool &pool = this->get_pool(size, alignment); BLI_assert(pool.element_size >= size); diff --git a/source/blender/functions/FN_field.hh b/source/blender/functions/FN_field.hh index 7f940294113..2e3a4245c02 100644 --- a/source/blender/functions/FN_field.hh +++ b/source/blender/functions/FN_field.hh @@ -334,6 +334,7 @@ class FieldEvaluator : NonMovable, NonCopyable { ResourceScope scope_; const FieldContext &context_; const IndexMask mask_; + LocalAllocator *allocator_ = nullptr; Vector fields_to_evaluate_; Vector dst_varrays_; Vector evaluated_varrays_; @@ -345,13 +346,18 @@ class FieldEvaluator : NonMovable, NonCopyable { public: /** Takes #mask by pointer because the mask has to live longer than the evaluator. */ - FieldEvaluator(const FieldContext &context, const IndexMask *mask) - : context_(context), mask_(*mask) + FieldEvaluator(const FieldContext &context, + const IndexMask *mask, + LocalAllocator *allocator = nullptr) + : context_(context), mask_(*mask), allocator_(allocator) { } /** Construct a field evaluator for all indices less than #size. */ - FieldEvaluator(const FieldContext &context, const int64_t size) : context_(context), mask_(size) + FieldEvaluator(const FieldContext &context, + const int64_t size, + LocalAllocator *allocator = nullptr) + : context_(context), mask_(size), allocator_(allocator) { } @@ -474,6 +480,7 @@ class FieldEvaluator : NonMovable, NonCopyable { * provided virtual arrays are returned. */ Vector evaluate_fields(ResourceScope &scope, + LocalAllocator *allocator, Span fields_to_evaluate, IndexMask mask, const FieldContext &context, diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc index a9d26fa09f1..7a3dba95f13 100644 --- a/source/blender/functions/intern/field.cc +++ b/source/blender/functions/intern/field.cc @@ -277,6 +277,7 @@ static void build_multi_function_procedure_for_fields(MFProcedure &procedure, } Vector evaluate_fields(ResourceScope &scope, + LocalAllocator *allocator, Span fields_to_evaluate, IndexMask mask, const FieldContext &context, @@ -372,7 +373,7 @@ Vector evaluate_fields(ResourceScope &scope, MFProcedureExecutor procedure_executor{procedure}; MFParamsBuilder mf_params{procedure_executor, &mask}; - MFContextBuilder mf_context; + MFContextBuilder mf_context{allocator}; /* Provide inputs to the procedure executor. */ for (const GVArray &varray : field_context_inputs) { @@ -423,7 +424,7 @@ Vector evaluate_fields(ResourceScope &scope, procedure, scope, field_tree_info, constant_fields_to_evaluate); MFProcedureExecutor procedure_executor{procedure}; MFParamsBuilder mf_params{procedure_executor, 1}; - MFContextBuilder mf_context; + MFContextBuilder mf_context{allocator}; /* Provide inputs to the procedure executor. */ for (const GVArray &varray : field_context_inputs) { @@ -500,7 +501,7 @@ void evaluate_constant_field(const GField &field, void *r_value) ResourceScope scope; FieldContext context; - Vector varrays = evaluate_fields(scope, {field}, IndexRange(1), context); + Vector varrays = evaluate_fields(scope, nullptr, {field}, IndexRange(1), context); varrays[0].get_to_uninitialized(0, r_value); } @@ -771,11 +772,12 @@ int FieldEvaluator::add(GField field) static IndexMask evaluate_selection(const Field &selection_field, const FieldContext &context, IndexMask full_mask, - ResourceScope &scope) + ResourceScope &scope, + LocalAllocator *allocator) { if (selection_field) { VArray selection = - evaluate_fields(scope, {selection_field}, full_mask, context)[0].typed(); + evaluate_fields(scope, allocator, {selection_field}, full_mask, context)[0].typed(); return index_mask_from_selection(full_mask, selection, scope); } return full_mask; @@ -785,13 +787,14 @@ void FieldEvaluator::evaluate() { BLI_assert_msg(!is_evaluated_, "Cannot evaluate fields twice."); - selection_mask_ = evaluate_selection(selection_field_, context_, mask_, scope_); + selection_mask_ = evaluate_selection(selection_field_, context_, mask_, scope_, allocator_); Array fields(fields_to_evaluate_.size()); for (const int i : fields_to_evaluate_.index_range()) { fields[i] = fields_to_evaluate_[i]; } - evaluated_varrays_ = evaluate_fields(scope_, fields, selection_mask_, context_, dst_varrays_); + evaluated_varrays_ = evaluate_fields( + scope_, allocator_, fields, selection_mask_, context_, dst_varrays_); BLI_assert(fields_to_evaluate_.size() == evaluated_varrays_.size()); for (const int i : fields_to_evaluate_.index_range()) { OutputPointerInfo &info = output_pointer_infos_[i]; diff --git a/source/blender/functions/tests/FN_field_test.cc b/source/blender/functions/tests/FN_field_test.cc index 8c5cc817174..3396539fa12 100644 --- a/source/blender/functions/tests/FN_field_test.cc +++ b/source/blender/functions/tests/FN_field_test.cc @@ -263,7 +263,7 @@ TEST(field, SameFieldTwice) IndexMask mask{IndexRange(2)}; ResourceScope scope; Vector results = evaluate_fields( - scope, {constant_field, constant_field}, mask, field_context); + scope, nullptr, {constant_field, constant_field}, mask, field_context); VArray varray1 = results[0].typed(); VArray varray2 = results[1].typed(); -- 2.30.2 From e566e6d4cfe0cfc04c325fd85213f18f1356a8a2 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 19:55:42 +0100 Subject: [PATCH 26/34] progress --- source/blender/blenkernel/BKE_geometry_fields.hh | 3 ++- source/blender/blenkernel/intern/geometry_fields.cc | 5 +++-- source/blender/blenlib/BLI_local_allocator.hh | 3 +-- source/blender/nodes/NOD_geometry_exec.hh | 5 +++++ .../nodes/geometry/nodes/node_geo_attribute_capture.cc | 6 ++++-- .../nodes/geometry/nodes/node_geo_store_named_attribute.cc | 6 ++++-- 6 files changed, 19 insertions(+), 9 deletions(-) diff --git a/source/blender/blenkernel/BKE_geometry_fields.hh b/source/blender/blenkernel/BKE_geometry_fields.hh index 967bb912cc6..b724c64105a 100644 --- a/source/blender/blenkernel/BKE_geometry_fields.hh +++ b/source/blender/blenkernel/BKE_geometry_fields.hh @@ -313,7 +313,8 @@ class CurveLengthFieldInput final : public CurvesFieldInput { bool try_capture_field_on_geometry(GeometryComponent &component, const AttributeIDRef &attribute_id, const eAttrDomain domain, - const fn::GField &field); + const fn::GField &field, + LocalAllocator *allocator = nullptr); /** * Try to find the geometry domain that the field should be evaluated on. If it is not obvious diff --git a/source/blender/blenkernel/intern/geometry_fields.cc b/source/blender/blenkernel/intern/geometry_fields.cc index 6fe822d6dc6..d552fa482ff 100644 --- a/source/blender/blenkernel/intern/geometry_fields.cc +++ b/source/blender/blenkernel/intern/geometry_fields.cc @@ -408,7 +408,8 @@ bool NormalFieldInput::is_equal_to(const fn::FieldNode &other) const bool try_capture_field_on_geometry(GeometryComponent &component, const AttributeIDRef &attribute_id, const eAttrDomain domain, - const fn::GField &field) + const fn::GField &field, + LocalAllocator *allocator) { MutableAttributeAccessor attributes = *component.attributes_for_write(); const int domain_size = attributes.domain_size(domain); @@ -428,7 +429,7 @@ bool try_capture_field_on_geometry(GeometryComponent &component, * - The field does not depend on that attribute (we can't easily check for that yet). */ void *buffer = MEM_mallocN(type.size() * domain_size, __func__); - fn::FieldEvaluator evaluator{field_context, &mask}; + fn::FieldEvaluator evaluator{field_context, &mask, allocator}; evaluator.add_with_destination(validator.validate_field_if_necessary(field), GMutableSpan{type, buffer, domain_size}); evaluator.evaluate(); diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index aed7f32a4ab..dd7470fc1ee 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -177,7 +177,7 @@ inline void *LocalAllocator::allocate(LocalAllocatorPool &pool) BLI_assert(this->is_local()); #ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED - return MEM_mallocN_aligned(size, alignment, __func__); + return MEM_mallocN_aligned(pool.element_size, pool.alignment, __func__); #endif void *buffer; @@ -205,7 +205,6 @@ inline void LocalAllocator::deallocate(const void *buffer, LocalAllocatorPool &p #ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED MEM_freeN(const_cast(buffer)); - UNUSED_VARS_NDEBUG(size, alignment); return; #endif diff --git a/source/blender/nodes/NOD_geometry_exec.hh b/source/blender/nodes/NOD_geometry_exec.hh index 60f58f4c215..bef0435240e 100644 --- a/source/blender/nodes/NOD_geometry_exec.hh +++ b/source/blender/nodes/NOD_geometry_exec.hh @@ -257,6 +257,11 @@ class GeoNodeExecParams { return dynamic_cast(lf_context_.user_data); } + LocalAllocator &allocator() + { + return *lf_context_.allocator; + } + /** * Add an error message displayed at the top of the node when displaying the node tree, * and potentially elsewhere in Blender. diff --git a/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc b/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc index a07cd1437d6..e06b741f00a 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc @@ -181,7 +181,8 @@ static void node_geo_exec(GeoNodeExecParams params) if (geometry_set.has_instances()) { GeometryComponent &component = geometry_set.get_component_for_write( GEO_COMPONENT_TYPE_INSTANCES); - bke::try_capture_field_on_geometry(component, *attribute_id, domain, field); + bke::try_capture_field_on_geometry( + component, *attribute_id, domain, field, ¶ms.allocator()); } } else { @@ -192,7 +193,8 @@ static void node_geo_exec(GeoNodeExecParams params) for (const GeometryComponentType type : types) { if (geometry_set.has(type)) { GeometryComponent &component = geometry_set.get_component_for_write(type); - bke::try_capture_field_on_geometry(component, *attribute_id, domain, field); + bke::try_capture_field_on_geometry( + component, *attribute_id, domain, field, ¶ms.allocator()); } } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc b/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc index d42793d474f..2b94d6aff5c 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc @@ -147,7 +147,8 @@ static void node_geo_exec(GeoNodeExecParams params) if (geometry_set.has_instances()) { GeometryComponent &component = geometry_set.get_component_for_write( GEO_COMPONENT_TYPE_INSTANCES); - if (!bke::try_capture_field_on_geometry(component, name, domain, field)) { + if (!bke::try_capture_field_on_geometry( + component, name, domain, field, ¶ms.allocator())) { if (component.attribute_domain_size(domain) != 0) { failure.store(true); } @@ -160,7 +161,8 @@ static void node_geo_exec(GeoNodeExecParams params) {GEO_COMPONENT_TYPE_MESH, GEO_COMPONENT_TYPE_POINT_CLOUD, GEO_COMPONENT_TYPE_CURVE}) { if (geometry_set.has(type)) { GeometryComponent &component = geometry_set.get_component_for_write(type); - if (!bke::try_capture_field_on_geometry(component, name, domain, field)) { + if (!bke::try_capture_field_on_geometry( + component, name, domain, field, ¶ms.allocator())) { if (component.attribute_domain_size(domain) != 0) { failure.store(true); } -- 2.30.2 From 1ce04829813b06f6b543c7b2d1c32489d51aee8a Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 20:30:15 +0100 Subject: [PATCH 27/34] progress --- .../functions/intern/multi_function.cc | 4 +- .../nodes/geometry/node_geometry_util.hh | 1 + .../nodes/node_geo_delete_geometry.cc | 40 +++++-- .../node_geo_distribute_points_on_faces.cc | 34 ++++-- .../nodes/node_geo_duplicate_elements.cc | 103 ++++++++++++------ .../geometry/nodes/node_geo_edge_split.cc | 3 +- .../geometry/nodes/node_geo_extrude_mesh.cc | 31 ++++-- .../nodes/geometry/nodes/node_geo_points.cc | 2 +- .../nodes/node_geo_points_to_vertices.cc | 11 +- .../geometry/nodes/node_geo_scale_elements.cc | 37 +++++-- .../nodes/node_geo_scale_instances.cc | 2 +- .../nodes/node_geo_separate_geometry.cc | 2 + .../nodes/node_geo_set_curve_radius.cc | 10 +- .../geometry/nodes/node_geo_set_curve_tilt.cc | 10 +- .../geometry/nodes/node_geo_set_material.cc | 3 +- .../nodes/node_geo_set_point_radius.cc | 8 +- .../geometry/nodes/node_geo_set_position.cc | 12 +- .../nodes/node_geo_store_named_attribute.cc | 2 +- .../nodes/node_geo_subdivision_surface.cc | 6 +- .../nodes/node_geo_translate_instances.cc | 2 +- .../geometry/nodes/node_geo_triangulate.cc | 2 +- .../geometry/nodes/node_geo_volume_cube.cc | 2 +- 22 files changed, 222 insertions(+), 105 deletions(-) diff --git a/source/blender/functions/intern/multi_function.cc b/source/blender/functions/intern/multi_function.cc index de25b5a0cd8..b6322e81277 100644 --- a/source/blender/functions/intern/multi_function.cc +++ b/source/blender/functions/intern/multi_function.cc @@ -73,6 +73,7 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) { const IndexMask sliced_mask = mask.slice(sub_range); + MFContextBuilder sub_context{&context.allocator().local()}; if (!hints.allocates_array) { /* There is no benefit to changing indices in this case. */ this->call(sliced_mask, params, context); @@ -80,7 +81,7 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context } if (sliced_mask[0] < grain_size) { /* The indices are low, no need to offset them. */ - this->call(sliced_mask, params, context); + this->call(sliced_mask, params, sub_context); return; } const int64_t input_slice_start = sliced_mask[0]; @@ -127,7 +128,6 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context } } - MFContextBuilder sub_context{&context.allocator().local()}; this->call(offset_mask, offset_params, sub_context); }); } diff --git a/source/blender/nodes/geometry/node_geometry_util.hh b/source/blender/nodes/geometry/node_geometry_util.hh index ce6b4cd6cfe..cef97633d85 100644 --- a/source/blender/nodes/geometry/node_geometry_util.hh +++ b/source/blender/nodes/geometry/node_geometry_util.hh @@ -82,6 +82,7 @@ void separate_geometry(GeometrySet &geometry_set, GeometryNodeDeleteGeometryMode mode, const Field &selection_field, const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator, bool &r_is_error); void get_closest_in_bvhtree(BVHTreeFromMesh &tree_data, diff --git a/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc b/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc index e92fe1a613d..4513216ae5d 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc @@ -307,14 +307,15 @@ static void copy_masked_polys_to_new_mesh(const Mesh &src_mesh, static void delete_curves_selection(GeometrySet &geometry_set, const Field &selection_field, const eAttrDomain selection_domain, - const bke::AnonymousAttributePropagationInfo &propagation_info) + const bke::AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { const Curves &src_curves_id = *geometry_set.get_curves_for_read(); const bke::CurvesGeometry &src_curves = bke::CurvesGeometry::wrap(src_curves_id.geometry); const int domain_size = src_curves.attributes().domain_size(selection_domain); bke::CurvesFieldContext field_context{src_curves, selection_domain}; - fn::FieldEvaluator evaluator{field_context, domain_size}; + fn::FieldEvaluator evaluator{field_context, domain_size, &allocator}; evaluator.set_selection(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_selection_as_mask(); @@ -341,12 +342,13 @@ static void delete_curves_selection(GeometrySet &geometry_set, static void separate_point_cloud_selection( GeometrySet &geometry_set, const Field &selection_field, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { const PointCloud &src_pointcloud = *geometry_set.get_pointcloud_for_read(); bke::PointCloudFieldContext field_context{src_pointcloud}; - fn::FieldEvaluator evaluator{field_context, src_pointcloud.totpoint}; + fn::FieldEvaluator evaluator{field_context, src_pointcloud.totpoint, &allocator}; evaluator.set_selection(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_selection_as_mask(); @@ -374,12 +376,13 @@ static void separate_point_cloud_selection( static void delete_selected_instances(GeometrySet &geometry_set, const Field &selection_field, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { bke::Instances &instances = *geometry_set.get_instances_for_write(); bke::InstancesFieldContext field_context{instances}; - fn::FieldEvaluator evaluator{field_context, instances.instances_num()}; + fn::FieldEvaluator evaluator{field_context, instances.instances_num(), &allocator}; evaluator.set_selection(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_selection_as_mask(); @@ -1096,6 +1099,7 @@ void separate_geometry(GeometrySet &geometry_set, const GeometryNodeDeleteGeometryMode mode, const Field &selection_field, const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator, bool &r_is_error) { namespace file_ns = blender::nodes::node_geo_delete_geometry_cc; @@ -1103,7 +1107,8 @@ void separate_geometry(GeometrySet &geometry_set, bool some_valid_domain = false; if (geometry_set.has_pointcloud()) { if (domain == ATTR_DOMAIN_POINT) { - file_ns::separate_point_cloud_selection(geometry_set, selection_field, propagation_info); + file_ns::separate_point_cloud_selection( + geometry_set, selection_field, propagation_info, allocator); some_valid_domain = true; } } @@ -1116,14 +1121,18 @@ void separate_geometry(GeometrySet &geometry_set, } if (geometry_set.has_curves()) { if (ELEM(domain, ATTR_DOMAIN_POINT, ATTR_DOMAIN_CURVE)) { - file_ns::delete_curves_selection( - geometry_set, fn::invert_boolean_field(selection_field), domain, propagation_info); + file_ns::delete_curves_selection(geometry_set, + fn::invert_boolean_field(selection_field), + domain, + propagation_info, + allocator); some_valid_domain = true; } } if (geometry_set.has_instances()) { if (domain == ATTR_DOMAIN_INSTANCE) { - file_ns::delete_selected_instances(geometry_set, selection_field, propagation_info); + file_ns::delete_selected_instances( + geometry_set, selection_field, propagation_info, allocator); some_valid_domain = true; } } @@ -1188,13 +1197,20 @@ static void node_geo_exec(GeoNodeExecParams params) if (domain == ATTR_DOMAIN_INSTANCE) { bool is_error; - separate_geometry(geometry_set, domain, mode, selection, propagation_info, is_error); + separate_geometry( + geometry_set, domain, mode, selection, propagation_info, params.allocator(), is_error); } else { geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { bool is_error; /* Invert here because we want to keep the things not in the selection. */ - separate_geometry(geometry_set, domain, mode, selection, propagation_info, is_error); + separate_geometry(geometry_set, + domain, + mode, + selection, + propagation_info, + params.allocator().local(), + is_error); }); } diff --git a/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc b/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc index 91fa215d117..d646f47fc38 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc @@ -385,14 +385,15 @@ BLI_NOINLINE static void compute_attribute_outputs(const Mesh &mesh, static Array calc_full_density_factors_with_selection(const Mesh &mesh, const Field &density_field, - const Field &selection_field) + const Field &selection_field, + LocalAllocator &allocator) { const eAttrDomain domain = ATTR_DOMAIN_CORNER; const int domain_size = mesh.attributes().domain_size(domain); Array densities(domain_size, 0.0f); bke::MeshFieldContext field_context{mesh, domain}; - fn::FieldEvaluator evaluator{field_context, domain_size}; + fn::FieldEvaluator evaluator{field_context, domain_size, &allocator}; evaluator.set_selection(selection_field); evaluator.add_with_destination(density_field, densities.as_mutable_span()); evaluator.evaluate(); @@ -403,12 +404,13 @@ static void distribute_points_random(const Mesh &mesh, const Field &density_field, const Field &selection_field, const int seed, + LocalAllocator &allocator, Vector &positions, Vector &bary_coords, Vector &looptri_indices) { const Array densities = calc_full_density_factors_with_selection( - mesh, density_field, selection_field); + mesh, density_field, selection_field, allocator); sample_mesh_surface(mesh, 1.0f, densities, seed, positions, bary_coords, looptri_indices); } @@ -418,6 +420,7 @@ static void distribute_points_poisson_disk(const Mesh &mesh, const Field &density_factor_field, const Field &selection_field, const int seed, + LocalAllocator &allocator, Vector &positions, Vector &bary_coords, Vector &looptri_indices) @@ -428,7 +431,7 @@ static void distribute_points_poisson_disk(const Mesh &mesh, update_elimination_mask_for_close_points(positions, minimum_distance, elimination_mask); const Array density_factors = calc_full_density_factors_with_selection( - mesh, density_factor_field, selection_field); + mesh, density_factor_field, selection_field, allocator); update_elimination_mask_based_on_density_factors( mesh, density_factors, bary_coords, looptri_indices, elimination_mask.as_mutable_span()); @@ -442,7 +445,8 @@ static void point_distribution_calculate(GeometrySet &geometry_set, const GeometryNodeDistributePointsOnFacesMode method, const int seed, const AttributeOutputs &attribute_outputs, - const GeoNodeExecParams ¶ms) + const GeoNodeExecParams ¶ms, + LocalAllocator &allocator) { if (!geometry_set.has_mesh()) { return; @@ -457,8 +461,14 @@ static void point_distribution_calculate(GeometrySet &geometry_set, switch (method) { case GEO_NODE_POINT_DISTRIBUTE_POINTS_ON_FACES_RANDOM: { const Field density_field = params.get_input>("Density"); - distribute_points_random( - mesh, density_field, selection_field, seed, positions, bary_coords, looptri_indices); + distribute_points_random(mesh, + density_field, + selection_field, + seed, + allocator, + positions, + bary_coords, + looptri_indices); break; } case GEO_NODE_POINT_DISTRIBUTE_POINTS_ON_FACES_POISSON: { @@ -471,6 +481,7 @@ static void point_distribution_calculate(GeometrySet &geometry_set, density_factors_field, selection_field, seed, + allocator, positions, bary_coords, looptri_indices); @@ -527,8 +538,13 @@ static void node_geo_exec(GeoNodeExecParams params) lazy_threading::send_hint(); geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { - point_distribution_calculate( - geometry_set, selection_field, method, seed, attribute_outputs, params); + point_distribution_calculate(geometry_set, + selection_field, + method, + seed, + attribute_outputs, + params, + params.allocator().local()); /* Keep instances because the original geometry set may contain instances that are processed as * well. */ geometry_set.keep_only_during_modify({GEO_COMPONENT_TYPE_POINT_CLOUD}); diff --git a/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc b/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc index cd191fa8498..92814d700ce 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc @@ -324,7 +324,8 @@ static void duplicate_curves(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { if (!geometry_set.has_curves()) { geometry_set.remove_geometry_during_modify(); @@ -337,7 +338,7 @@ static void duplicate_curves(GeometrySet &geometry_set, const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry); bke::CurvesFieldContext field_context{curves, ATTR_DOMAIN_CURVE}; - FieldEvaluator evaluator{field_context, curves.curves_num()}; + FieldEvaluator evaluator{field_context, curves.curves_num(), &allocator}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -516,7 +517,8 @@ static void duplicate_faces(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { if (!geometry_set.has_mesh()) { geometry_set.remove_geometry_during_modify(); @@ -531,7 +533,7 @@ static void duplicate_faces(GeometrySet &geometry_set, const Span loops = mesh.loops(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator(field_context, polys.size()); + FieldEvaluator evaluator(field_context, polys.size(), &allocator); evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -718,7 +720,8 @@ static void duplicate_edges(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { if (!geometry_set.has_mesh()) { geometry_set.remove_geometry_during_modify(); @@ -728,7 +731,7 @@ static void duplicate_edges(GeometrySet &geometry_set, const Span edges = mesh.edges(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator evaluator{field_context, edges.size()}; + FieldEvaluator evaluator{field_context, edges.size(), &allocator}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -798,7 +801,8 @@ static void duplicate_points_curve(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { const Curves &src_curves_id = *geometry_set.get_curves_for_read(); const bke::CurvesGeometry &src_curves = bke::CurvesGeometry::wrap(src_curves_id.geometry); @@ -807,7 +811,7 @@ static void duplicate_points_curve(GeometrySet &geometry_set, } bke::CurvesFieldContext field_context{src_curves, ATTR_DOMAIN_POINT}; - FieldEvaluator evaluator{field_context, src_curves.points_num()}; + FieldEvaluator evaluator{field_context, src_curves.points_num(), &allocator}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -902,13 +906,14 @@ static void duplicate_points_mesh(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { const Mesh &mesh = *geometry_set.get_mesh_for_read(); const Span src_verts = mesh.verts(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_POINT}; - FieldEvaluator evaluator{field_context, src_verts.size()}; + FieldEvaluator evaluator{field_context, src_verts.size(), &allocator}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -954,12 +959,13 @@ static void duplicate_points_pointcloud(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { const PointCloud &src_points = *geometry_set.get_pointcloud_for_read(); bke::PointCloudFieldContext field_context{src_points}; - FieldEvaluator evaluator{field_context, src_points.totpoint}; + FieldEvaluator evaluator{field_context, src_points.totpoint, &allocator}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -1001,27 +1007,40 @@ static void duplicate_points(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { Vector component_types = geometry_set.gather_component_types(true, true); for (const GeometryComponentType component_type : component_types) { switch (component_type) { case GEO_COMPONENT_TYPE_POINT_CLOUD: if (geometry_set.has_pointcloud()) { - duplicate_points_pointcloud( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_points_pointcloud(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); } break; case GEO_COMPONENT_TYPE_MESH: if (geometry_set.has_mesh()) { - duplicate_points_mesh( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_points_mesh(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); } break; case GEO_COMPONENT_TYPE_CURVE: if (geometry_set.has_curves()) { - duplicate_points_curve( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_points_curve(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); } break; default: @@ -1042,7 +1061,8 @@ static void duplicate_instances(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { if (!geometry_set.has_instances()) { geometry_set.clear(); @@ -1052,7 +1072,7 @@ static void duplicate_instances(GeometrySet &geometry_set, const bke::Instances &src_instances = *geometry_set.get_instances_for_read(); bke::InstancesFieldContext field_context{src_instances}; - FieldEvaluator evaluator{field_context, src_instances.instances_num()}; + FieldEvaluator evaluator{field_context, src_instances.instances_num(), &allocator}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -1124,27 +1144,48 @@ static void node_geo_exec(GeoNodeExecParams params) "Geometry"); if (duplicate_domain == ATTR_DOMAIN_INSTANCE) { - duplicate_instances( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_instances(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + params.allocator()); } else { geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { + LocalAllocator &allocator = params.allocator().local(); switch (duplicate_domain) { case ATTR_DOMAIN_CURVE: - duplicate_curves( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_curves(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); break; case ATTR_DOMAIN_FACE: - duplicate_faces( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_faces(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); break; case ATTR_DOMAIN_EDGE: - duplicate_edges( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_edges(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); break; case ATTR_DOMAIN_POINT: - duplicate_points( - geometry_set, count_field, selection_field, attribute_outputs, propagation_info); + duplicate_points(geometry_set, + count_field, + selection_field, + attribute_outputs, + propagation_info, + allocator); break; default: BLI_assert_unreachable(); diff --git a/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc b/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc index 2948713852b..f5c34a6476a 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc @@ -25,7 +25,8 @@ static void node_geo_exec(GeoNodeExecParams params) if (const Mesh *mesh = geometry_set.get_mesh_for_read()) { bke::MeshFieldContext field_context{*mesh, ATTR_DOMAIN_EDGE}; - fn::FieldEvaluator selection_evaluator{field_context, mesh->totedge}; + fn::FieldEvaluator selection_evaluator{ + field_context, mesh->totedge, ¶ms.allocator().local()}; selection_evaluator.set_selection(selection_field); selection_evaluator.evaluate(); const IndexMask mask = selection_evaluator.get_evaluated_selection_as_mask(); diff --git a/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc b/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc index 27f34db2f9f..c4a1d9e7d29 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc @@ -200,13 +200,14 @@ static Array> create_vert_to_edge_map(const int vert_size, static void extrude_mesh_vertices(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs) + const AttributeOutputs &attribute_outputs, + LocalAllocator &allocator) { const int orig_vert_size = mesh.totvert; const int orig_edge_size = mesh.totedge; const bke::MeshFieldContext context{mesh, ATTR_DOMAIN_POINT}; - FieldEvaluator evaluator{context, mesh.totvert}; + FieldEvaluator evaluator{context, mesh.totvert, &allocator}; evaluator.add(offset_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -368,7 +369,8 @@ static VectorSet vert_indices_from_edges(const Mesh &mesh, const Span ed static void extrude_mesh_edges(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs) + const AttributeOutputs &attribute_outputs, + LocalAllocator &allocator) { const int orig_vert_size = mesh.totvert; const Span orig_edges = mesh.edges(); @@ -376,7 +378,7 @@ static void extrude_mesh_edges(Mesh &mesh, const int orig_loop_size = mesh.totloop; const bke::MeshFieldContext edge_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator edge_evaluator{edge_context, mesh.totedge}; + FieldEvaluator edge_evaluator{edge_context, mesh.totedge, &allocator}; edge_evaluator.set_selection(selection_field); edge_evaluator.add(offset_field); edge_evaluator.evaluate(); @@ -647,7 +649,8 @@ static void extrude_mesh_edges(Mesh &mesh, static void extrude_mesh_face_regions(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs) + const AttributeOutputs &attribute_outputs, + LocalAllocator &allocator) { const int orig_vert_size = mesh.totvert; const Span orig_edges = mesh.edges(); @@ -655,7 +658,7 @@ static void extrude_mesh_face_regions(Mesh &mesh, const Span orig_loops = mesh.loops(); const bke::MeshFieldContext poly_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator poly_evaluator{poly_context, mesh.totpoly}; + FieldEvaluator poly_evaluator{poly_context, mesh.totpoly, &allocator}; poly_evaluator.set_selection(selection_field); poly_evaluator.add(offset_field); poly_evaluator.evaluate(); @@ -1050,7 +1053,8 @@ static IndexRange selected_corner_range(Span offsets, const int index) static void extrude_individual_mesh_faces(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs) + const AttributeOutputs &attribute_outputs, + LocalAllocator &allocator) { const int orig_vert_size = mesh.totvert; const int orig_edge_size = mesh.totedge; @@ -1061,7 +1065,7 @@ static void extrude_individual_mesh_faces(Mesh &mesh, * the vertices are moved, and the evaluated result might reference an attribute. */ Array poly_offset(orig_polys.size()); const bke::MeshFieldContext poly_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator poly_evaluator{poly_context, mesh.totpoly}; + FieldEvaluator poly_evaluator{poly_context, mesh.totpoly, &allocator}; poly_evaluator.set_selection(selection_field); poly_evaluator.add_with_destination(offset_field, poly_offset.as_mutable_span()); poly_evaluator.evaluate(); @@ -1342,19 +1346,22 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (Mesh *mesh = geometry_set.get_mesh_for_write()) { + LocalAllocator &allocator = params.allocator().local(); switch (mode) { case GEO_NODE_EXTRUDE_MESH_VERTICES: - extrude_mesh_vertices(*mesh, selection, final_offset, attribute_outputs); + extrude_mesh_vertices(*mesh, selection, final_offset, attribute_outputs, allocator); break; case GEO_NODE_EXTRUDE_MESH_EDGES: - extrude_mesh_edges(*mesh, selection, final_offset, attribute_outputs); + extrude_mesh_edges(*mesh, selection, final_offset, attribute_outputs, allocator); break; case GEO_NODE_EXTRUDE_MESH_FACES: { if (extrude_individual) { - extrude_individual_mesh_faces(*mesh, selection, final_offset, attribute_outputs); + extrude_individual_mesh_faces( + *mesh, selection, final_offset, attribute_outputs, allocator); } else { - extrude_mesh_face_regions(*mesh, selection, final_offset, attribute_outputs); + extrude_mesh_face_regions( + *mesh, selection, final_offset, attribute_outputs, allocator); } break; } diff --git a/source/blender/nodes/geometry/nodes/node_geo_points.cc b/source/blender/nodes/geometry/nodes/node_geo_points.cc index dcbe176b384..c6da51e9682 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_points.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_points.cc @@ -79,7 +79,7 @@ static void node_geo_exec(GeoNodeExecParams params) "radius", ATTR_DOMAIN_POINT); PointsFieldContext context{count}; - fn::FieldEvaluator evaluator{context, count}; + fn::FieldEvaluator evaluator{context, count, ¶ms.allocator()}; evaluator.add_with_destination(position_field, output_position.varray); evaluator.add_with_destination(radius_field, output_radii.varray); evaluator.evaluate(); diff --git a/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc b/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc index 5cd5bbe690e..130846a462b 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc @@ -24,7 +24,8 @@ static void node_declare(NodeDeclarationBuilder &b) static void geometry_set_points_to_vertices( GeometrySet &geometry_set, Field &selection_field, - const AnonymousAttributePropagationInfo &propagation_info) + const AnonymousAttributePropagationInfo &propagation_info, + LocalAllocator &allocator) { const PointCloud *points = geometry_set.get_pointcloud_for_read(); if (points == nullptr) { @@ -37,7 +38,7 @@ static void geometry_set_points_to_vertices( } bke::PointCloudFieldContext field_context{*points}; - fn::FieldEvaluator selection_evaluator{field_context, points->totpoint}; + fn::FieldEvaluator selection_evaluator{field_context, points->totpoint, &allocator}; selection_evaluator.add(selection_field); selection_evaluator.evaluate(); const IndexMask selection = selection_evaluator.get_evaluated_as_mask(0); @@ -78,8 +79,10 @@ static void node_geo_exec(GeoNodeExecParams params) Field selection_field = params.extract_input>("Selection"); geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { - geometry_set_points_to_vertices( - geometry_set, selection_field, params.get_output_propagation_info("Mesh")); + geometry_set_points_to_vertices(geometry_set, + selection_field, + params.get_output_propagation_info("Mesh"), + params.allocator().local()); }); params.set_output("Mesh", std::move(geometry_set)); diff --git a/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc b/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc index da9b04c06c0..5950b5ee7e4 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc @@ -307,10 +307,12 @@ static AxisScaleParams evaluate_axis_scale_fields(FieldEvaluator &evaluator, return out; } -static void scale_faces_on_axis(Mesh &mesh, const AxisScaleFields &fields) +static void scale_faces_on_axis(Mesh &mesh, + const AxisScaleFields &fields, + LocalAllocator &allocator) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator{field_context, mesh.totpoly}; + FieldEvaluator evaluator{field_context, mesh.totpoly, &allocator}; AxisScaleParams params = evaluate_axis_scale_fields(evaluator, fields); Vector island = prepare_face_islands(mesh, params.selection); @@ -329,10 +331,12 @@ static UniformScaleParams evaluate_uniform_scale_fields(FieldEvaluator &evaluato return out; } -static void scale_faces_uniformly(Mesh &mesh, const UniformScaleFields &fields) +static void scale_faces_uniformly(Mesh &mesh, + const UniformScaleFields &fields, + LocalAllocator &allocator) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator{field_context, mesh.totpoly}; + FieldEvaluator evaluator{field_context, mesh.totpoly, &allocator}; UniformScaleParams params = evaluate_uniform_scale_fields(evaluator, fields); Vector island = prepare_face_islands(mesh, params.selection); @@ -381,20 +385,24 @@ static void get_edge_verts(const Span edges, r_vertex_indices.add(edge.v2); } -static void scale_edges_uniformly(Mesh &mesh, const UniformScaleFields &fields) +static void scale_edges_uniformly(Mesh &mesh, + const UniformScaleFields &fields, + LocalAllocator &allocator) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator evaluator{field_context, mesh.totedge}; + FieldEvaluator evaluator{field_context, mesh.totedge, &allocator}; UniformScaleParams params = evaluate_uniform_scale_fields(evaluator, fields); Vector island = prepare_edge_islands(mesh, params.selection); scale_vertex_islands_uniformly(mesh, island, params, get_edge_verts); } -static void scale_edges_on_axis(Mesh &mesh, const AxisScaleFields &fields) +static void scale_edges_on_axis(Mesh &mesh, + const AxisScaleFields &fields, + LocalAllocator &allocator) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator evaluator{field_context, mesh.totedge}; + FieldEvaluator evaluator{field_context, mesh.totedge, &allocator}; AxisScaleParams params = evaluate_axis_scale_fields(evaluator, fields); Vector island = prepare_edge_islands(mesh, params.selection); @@ -419,15 +427,18 @@ static void node_geo_exec(GeoNodeExecParams params) geometry.modify_geometry_sets([&](GeometrySet &geometry) { if (Mesh *mesh = geometry.get_mesh_for_write()) { + LocalAllocator &allocator = params.allocator().local(); switch (domain) { case ATTR_DOMAIN_FACE: { switch (scale_mode) { case GEO_NODE_SCALE_ELEMENTS_UNIFORM: { - scale_faces_uniformly(*mesh, {selection_field, scale_field, center_field}); + scale_faces_uniformly( + *mesh, {selection_field, scale_field, center_field}, allocator); break; } case GEO_NODE_SCALE_ELEMENTS_SINGLE_AXIS: { - scale_faces_on_axis(*mesh, {selection_field, scale_field, center_field, axis_field}); + scale_faces_on_axis( + *mesh, {selection_field, scale_field, center_field, axis_field}, allocator); break; } } @@ -436,11 +447,13 @@ static void node_geo_exec(GeoNodeExecParams params) case ATTR_DOMAIN_EDGE: { switch (scale_mode) { case GEO_NODE_SCALE_ELEMENTS_UNIFORM: { - scale_edges_uniformly(*mesh, {selection_field, scale_field, center_field}); + scale_edges_uniformly( + *mesh, {selection_field, scale_field, center_field}, allocator); break; } case GEO_NODE_SCALE_ELEMENTS_SINGLE_AXIS: { - scale_edges_on_axis(*mesh, {selection_field, scale_field, center_field, axis_field}); + scale_edges_on_axis( + *mesh, {selection_field, scale_field, center_field, axis_field}, allocator); break; } } diff --git a/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc index 95604fdcb61..737fb5474c1 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc @@ -21,7 +21,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void scale_instances(GeoNodeExecParams ¶ms, bke::Instances &instances) { const bke::InstancesFieldContext context{instances}; - fn::FieldEvaluator evaluator{context, instances.instances_num()}; + fn::FieldEvaluator evaluator{context, instances.instances_num(), ¶ms.allocator()}; evaluator.set_selection(params.extract_input>("Selection")); evaluator.add(params.extract_input>("Scale")); evaluator.add(params.extract_input>("Center")); diff --git a/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc b/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc index 28c0bf84160..f9b2faf8607 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc @@ -59,6 +59,7 @@ static void node_geo_exec(GeoNodeExecParams params) GEO_NODE_DELETE_GEOMETRY_MODE_ALL, selection, propagation_info, + params.allocator(), is_error); } else { @@ -68,6 +69,7 @@ static void node_geo_exec(GeoNodeExecParams params) GEO_NODE_DELETE_GEOMETRY_MODE_ALL, selection, propagation_info, + params.allocator().local(), is_error); }); } diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc index f03f5aa0413..94d9ca562af 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc @@ -20,7 +20,8 @@ static void node_declare(NodeDeclarationBuilder &b) static void set_radius(bke::CurvesGeometry &curves, const Field &selection_field, - const Field &radius_field) + const Field &radius_field, + LocalAllocator &allocator) { if (curves.points_num() == 0) { return; @@ -30,7 +31,7 @@ static void set_radius(bke::CurvesGeometry &curves, ATTR_DOMAIN_POINT); bke::CurvesFieldContext field_context{curves, ATTR_DOMAIN_POINT}; - fn::FieldEvaluator evaluator{field_context, curves.points_num()}; + fn::FieldEvaluator evaluator{field_context, curves.points_num(), &allocator}; evaluator.set_selection(selection_field); evaluator.add_with_destination(radius_field, radii.varray); evaluator.evaluate(); @@ -46,7 +47,10 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (Curves *curves_id = geometry_set.get_curves_for_write()) { - set_radius(bke::CurvesGeometry::wrap(curves_id->geometry), selection_field, radii_field); + set_radius(bke::CurvesGeometry::wrap(curves_id->geometry), + selection_field, + radii_field, + params.allocator().local()); } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc index 2887800995f..0013b2cee72 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc @@ -16,7 +16,8 @@ static void node_declare(NodeDeclarationBuilder &b) static void set_tilt(bke::CurvesGeometry &curves, const Field &selection_field, - const Field &tilt_field) + const Field &tilt_field, + LocalAllocator &allocator) { if (curves.points_num() == 0) { return; @@ -26,7 +27,7 @@ static void set_tilt(bke::CurvesGeometry &curves, ATTR_DOMAIN_POINT); bke::CurvesFieldContext field_context{curves, ATTR_DOMAIN_POINT}; - fn::FieldEvaluator evaluator{field_context, curves.points_num()}; + fn::FieldEvaluator evaluator{field_context, curves.points_num(), &allocator}; evaluator.set_selection(selection_field); evaluator.add_with_destination(tilt_field, tilts.varray); evaluator.evaluate(); @@ -42,7 +43,10 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (Curves *curves_id = geometry_set.get_curves_for_write()) { - set_tilt(bke::CurvesGeometry::wrap(curves_id->geometry), selection_field, tilt_field); + set_tilt(bke::CurvesGeometry::wrap(curves_id->geometry), + selection_field, + tilt_field, + params.allocator().local()); } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_material.cc b/source/blender/nodes/geometry/nodes/node_geo_set_material.cc index e6e3eadff03..64b6947924f 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_material.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_material.cc @@ -75,7 +75,8 @@ static void node_geo_exec(GeoNodeExecParams params) Mesh &mesh = *mesh_component.get_for_write(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - fn::FieldEvaluator selection_evaluator{field_context, mesh.totpoly}; + fn::FieldEvaluator selection_evaluator{ + field_context, mesh.totpoly, ¶ms.allocator().local()}; selection_evaluator.add(selection_field); selection_evaluator.evaluate(); const IndexMask selection = selection_evaluator.get_evaluated_as_mask(0); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc b/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc index 0034fc4a292..92e2ef83ba6 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc @@ -20,7 +20,8 @@ static void node_declare(NodeDeclarationBuilder &b) static void set_radius_in_component(PointCloud &pointcloud, const Field &selection_field, - const Field &radius_field) + const Field &radius_field, + LocalAllocator &allocator) { if (pointcloud.totpoint == 0) { return; @@ -30,7 +31,7 @@ static void set_radius_in_component(PointCloud &pointcloud, ATTR_DOMAIN_POINT); bke::PointCloudFieldContext field_context{pointcloud}; - fn::FieldEvaluator evaluator{field_context, pointcloud.totpoint}; + fn::FieldEvaluator evaluator{field_context, pointcloud.totpoint, &allocator}; evaluator.set_selection(selection_field); evaluator.add_with_destination(radius_field, radii.varray); evaluator.evaluate(); @@ -46,7 +47,8 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (PointCloud *pointcloud = geometry_set.get_pointcloud_for_write()) { - set_radius_in_component(*pointcloud, selection_field, radii_field); + set_radius_in_component( + *pointcloud, selection_field, radii_field, params.allocator().local()); } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_position.cc b/source/blender/nodes/geometry/nodes/node_geo_set_position.cc index 4a76e230af7..d079e2f4798 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_position.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_position.cc @@ -147,7 +147,8 @@ static void set_computed_position_and_offset(GeometryComponent &component, static void set_position_in_component(GeometryComponent &component, const Field &selection_field, const Field &position_field, - const Field &offset_field) + const Field &offset_field, + LocalAllocator &allocator) { eAttrDomain domain = component.type() == GEO_COMPONENT_TYPE_INSTANCES ? ATTR_DOMAIN_INSTANCE : ATTR_DOMAIN_POINT; @@ -157,7 +158,7 @@ static void set_position_in_component(GeometryComponent &component, return; } - fn::FieldEvaluator evaluator{field_context, domain_size}; + fn::FieldEvaluator evaluator{field_context, domain_size, &allocator}; evaluator.set_selection(selection_field); evaluator.add(position_field); evaluator.add(offset_field); @@ -182,8 +183,11 @@ static void node_geo_exec(GeoNodeExecParams params) GEO_COMPONENT_TYPE_CURVE, GEO_COMPONENT_TYPE_INSTANCES}) { if (geometry.has(type)) { - set_position_in_component( - geometry.get_component_for_write(type), selection_field, position_field, offset_field); + set_position_in_component(geometry.get_component_for_write(type), + selection_field, + position_field, + offset_field, + params.allocator()); } } diff --git a/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc b/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc index 2b94d6aff5c..5497b16cf5c 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc @@ -162,7 +162,7 @@ static void node_geo_exec(GeoNodeExecParams params) if (geometry_set.has(type)) { GeometryComponent &component = geometry_set.get_component_for_write(type); if (!bke::try_capture_field_on_geometry( - component, name, domain, field, ¶ms.allocator())) { + component, name, domain, field, ¶ms.allocator().local())) { if (component.attribute_domain_size(domain) != 0) { failure.store(true); } diff --git a/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc b/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc index 40b49055949..cb6c796e34b 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc @@ -124,14 +124,16 @@ static void node_geo_exec(GeoNodeExecParams params) return; } + LocalAllocator &allocator = params.allocator().local(); + bke::MeshFieldContext point_context{mesh, ATTR_DOMAIN_POINT}; - FieldEvaluator point_evaluator(point_context, mesh.totvert); + FieldEvaluator point_evaluator(point_context, mesh.totvert, &allocator); point_evaluator.add(vertex_crease_field); point_evaluator.evaluate(); const VArray vertex_creases = point_evaluator.get_evaluated(0); bke::MeshFieldContext edge_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator edge_evaluator(edge_context, mesh.totedge); + FieldEvaluator edge_evaluator(edge_context, mesh.totedge, &allocator); edge_evaluator.add(edge_crease_field); edge_evaluator.evaluate(); const VArray edge_creases = edge_evaluator.get_evaluated(0); diff --git a/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc index 5a278ac8547..ff9708405d4 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc @@ -20,7 +20,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void translate_instances(GeoNodeExecParams ¶ms, bke::Instances &instances) { const bke::InstancesFieldContext context{instances}; - fn::FieldEvaluator evaluator{context, instances.instances_num()}; + fn::FieldEvaluator evaluator{context, instances.instances_num(), ¶ms.allocator()}; evaluator.set_selection(params.extract_input>("Selection")); evaluator.add(params.extract_input>("Translation")); evaluator.add(params.extract_input>("Local Space")); diff --git a/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc b/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc index 52c7dbf0605..9e60c10d834 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc @@ -78,7 +78,7 @@ static void node_geo_exec(GeoNodeExecParams params) const Mesh &mesh_in = *geometry_set.get_mesh_for_read(); bke::MeshFieldContext context{mesh_in, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator{context, mesh_in.totpoly}; + FieldEvaluator evaluator{context, mesh_in.totpoly, ¶ms.allocator().local()}; evaluator.add(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_as_mask(0); diff --git a/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc b/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc index 7d439309380..aae2cd73578 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc @@ -148,7 +148,7 @@ static void node_geo_exec(GeoNodeExecParams params) /* Evaluate input field on a 3D grid. */ Grid3DFieldContext context(resolution, bounds_min, bounds_max); - FieldEvaluator evaluator(context, context.points_num()); + FieldEvaluator evaluator(context, context.points_num(), ¶ms.allocator()); Array densities(context.points_num()); evaluator.add_with_destination(std::move(input_field), densities.as_mutable_span()); evaluator.evaluate(); -- 2.30.2 From 0dcb5ab278c4aae2d8df96f3a99c51309fa91780 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 21:03:06 +0100 Subject: [PATCH 28/34] fix --- .../blender/nodes/geometry/nodes/node_geo_attribute_capture.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc b/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc index e06b741f00a..9d50b7afcf9 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc @@ -194,7 +194,7 @@ static void node_geo_exec(GeoNodeExecParams params) if (geometry_set.has(type)) { GeometryComponent &component = geometry_set.get_component_for_write(type); bke::try_capture_field_on_geometry( - component, *attribute_id, domain, field, ¶ms.allocator()); + component, *attribute_id, domain, field, ¶ms.allocator().local()); } } }); -- 2.30.2 From 7aba5d0c7f5a619557b933c54122a4a23c7cb58a Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 21:28:21 +0100 Subject: [PATCH 29/34] progress --- .../intern/lazy_function_graph_executor.cc | 50 ++++++++++--------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 05657ac99f2..d9a42a40ac7 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -272,7 +272,9 @@ class Executor { for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; NodeState &node_state = node_states_[node_index]; - this->destruct_node_state(node, node_state, local_allocator); + if (!node_state.node_has_finished) { + this->destruct_node_state(node, node_state, local_allocator); + } } }); allocator.destruct_free_array(node_states_); @@ -559,7 +561,6 @@ class Executor { const Node &node = socket.node(); const int index_in_node = socket.index(); NodeState &node_state = node_states_[node.index_in_graph()]; - OutputState &output_state = node_state.outputs[index_in_node]; /* The notified output socket might be an input of the entire graph. In this case, notify the * caller that the input is required. */ @@ -584,6 +585,10 @@ class Executor { BLI_assert(node.is_function()); this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { + if (node_state.node_has_finished) { + return; + } + OutputState &output_state = node_state.outputs[index_in_node]; if (output_state.usage == ValueUsage::Used) { return; } @@ -597,9 +602,12 @@ class Executor { const Node &node = socket.node(); const int index_in_node = socket.index(); NodeState &node_state = node_states_[node.index_in_graph()]; - OutputState &output_state = node_state.outputs[index_in_node]; this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { + if (node_state.node_has_finished) { + return; + } + OutputState &output_state = node_state.outputs[index_in_node]; output_state.potential_target_sockets -= 1; if (output_state.potential_target_sockets == 0) { BLI_assert(output_state.usage != ValueUsage::Unused); @@ -867,18 +875,9 @@ class Executor { if (input_state.usage == ValueUsage::Maybe) { this->set_input_unused(locked_node, input_socket, allocator); } - else if (input_state.usage == ValueUsage::Used) { - this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); - } } - if (node_state.storage != nullptr) { - if (node.is_function()) { - const FunctionNode &fn_node = static_cast(node); - fn_node.function().destruct_storage(node_state.storage, allocator); - } - node_state.storage = nullptr; - } + this->destruct_node_state(node, node_state, allocator); } void destruct_input_value_if_exists(InputState &input_state, @@ -989,17 +988,7 @@ class Executor { const Node &target_node = target_socket->node(); NodeState &node_state = node_states_[target_node.index_in_graph()]; const int input_index = target_socket->index(); - InputState &input_state = node_state.inputs[input_index]; const bool is_last_target = target_socket == targets.last(); -#ifdef DEBUG - if (input_state.value != nullptr) { - if (self_.logger_ != nullptr) { - self_.logger_->dump_when_input_is_set_twice(*target_socket, from_socket, *context_); - } - BLI_assert_unreachable(); - } -#endif - BLI_assert(!input_state.was_ready_for_execution); BLI_assert(target_socket->type() == type); BLI_assert(target_socket->origin() == &from_socket); @@ -1023,6 +1012,21 @@ class Executor { continue; } this->with_locked_node(target_node, node_state, current_task, [&](LockedNode &locked_node) { + if (node_state.node_has_finished) { + return; + } + InputState &input_state = node_state.inputs[input_index]; + +#ifdef DEBUG + if (input_state.value != nullptr) { + if (self_.logger_ != nullptr) { + self_.logger_->dump_when_input_is_set_twice(*target_socket, from_socket, *context_); + } + BLI_assert_unreachable(); + } +#endif + BLI_assert(!input_state.was_ready_for_execution); + if (input_state.usage == ValueUsage::Unused) { return; } -- 2.30.2 From 90333ba8fcbfe79cb8a648e2b6912ea465ae45d1 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 21:30:27 +0100 Subject: [PATCH 30/34] add small buffer --- source/blender/blenlib/BLI_local_allocator.hh | 1 + source/blender/blenlib/intern/local_allocator.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index dd7470fc1ee..a149bbb2507 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -35,6 +35,7 @@ class LocalAllocator : NonCopyable, NonMovable { private: static constexpr int64_t s_alignment = 64; LocalAllocatorSet &owner_set_; + AlignedBuffer<256, 64> initial_buffer_; LinearAllocator<> linear_allocator_; struct Head { diff --git a/source/blender/blenlib/intern/local_allocator.cc b/source/blender/blenlib/intern/local_allocator.cc index 1d7e6ecd915..73015d3cff5 100644 --- a/source/blender/blenlib/intern/local_allocator.cc +++ b/source/blender/blenlib/intern/local_allocator.cc @@ -13,6 +13,7 @@ LocalAllocatorSet::~LocalAllocatorSet() = default; LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set) { + linear_allocator_.provide_buffer(initial_buffer_); for (const int64_t i : IndexRange(small_buffer_pools_.size())) { LocalAllocatorPool &pool = small_buffer_pools_[i]; pool.element_size = 8 * (i + 1); -- 2.30.2 From 12e76d1f83fcd8bbcbde3d1ba5e90bae01e3bbc3 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 22:11:13 +0100 Subject: [PATCH 31/34] allocate all node state data in one chunk --- .../FN_lazy_function_graph_executor.hh | 15 +++ .../intern/lazy_function_graph_executor.cc | 109 ++++++++++-------- .../functions/tests/FN_lazy_function_test.cc | 11 +- source/blender/modifiers/intern/MOD_nodes.cc | 8 +- .../nodes/NOD_geometry_nodes_lazy_function.hh | 1 + .../intern/geometry_nodes_lazy_function.cc | 2 + 6 files changed, 93 insertions(+), 53 deletions(-) diff --git a/source/blender/functions/FN_lazy_function_graph_executor.hh b/source/blender/functions/FN_lazy_function_graph_executor.hh index 14f79970425..a13ca907c21 100644 --- a/source/blender/functions/FN_lazy_function_graph_executor.hh +++ b/source/blender/functions/FN_lazy_function_graph_executor.hh @@ -59,11 +59,23 @@ class GraphExecutor : public LazyFunction { using Logger = GraphExecutorLogger; using SideEffectProvider = GraphExecutorSideEffectProvider; + struct NodeBufferOffsets { + int node; + int inputs; + int outputs; + }; + + struct PreprocessData { + Array offsets; + int node_state_buffer_size; + }; + private: /** * The graph that is evaluated. */ const Graph &graph_; + const PreprocessData &preprocess_data_; /** * Input and output sockets of the entire graph. */ @@ -85,12 +97,15 @@ class GraphExecutor : public LazyFunction { GraphExecutor(const Graph &graph, Span graph_inputs, Span graph_outputs, + const PreprocessData &preprocess_data, const Logger *logger, const SideEffectProvider *side_effect_provider); void *init_storage(LocalAllocator &allocator) const override; void destruct_storage(void *storage, LocalAllocator &allocator) const override; + static void preprocess(const Graph &graph, PreprocessData &r_preprocess_data); + private: void execute_impl(Params ¶ms, const Context &context) const override; }; diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index d9a42a40ac7..3bebcbf4318 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -75,7 +75,7 @@ enum class NodeScheduleState { RunningAndRescheduled, }; -struct InputState { +struct alignas(8) InputState { /** * Value of this input socket. By default, the value is empty. When other nodes are done * computing their outputs, the computed values will be forwarded to linked input sockets. The @@ -97,7 +97,7 @@ struct InputState { bool was_ready_for_execution = false; }; -struct OutputState { +struct alignas(8) OutputState { /** * Keeps track of how the output value is used. If a connected input becomes used, this output * has to become used as well. The output becomes unused when it is used by no input socket @@ -127,7 +127,7 @@ struct OutputState { void *value = nullptr; }; -struct NodeState { +struct alignas(8) NodeState { /** * Needs to be locked when any data in this state is accessed that is not explicitly marked as * not needing the lock. @@ -233,7 +233,7 @@ class Executor { /** * State of every node, indexed by #Node::index_in_graph. */ - MutableSpan node_states_; + MutableSpan node_states_; /** * Parameters provided by the caller. This is always non-null, while a node is running. */ @@ -271,12 +271,15 @@ class Executor { LocalAllocator &local_allocator = allocator.local(); for (const int node_index : range) { const Node &node = *self_.graph_.nodes()[node_index]; - NodeState &node_state = node_states_[node_index]; + NodeState &node_state = *node_states_[node_index]; if (!node_state.node_has_finished) { - this->destruct_node_state(node, node_state, local_allocator); + this->destruct_node_data(node, node_state, local_allocator); } + std::destroy_at(&node_state); } }); + allocator.deallocate( + node_states_[0], self_.preprocess_data_.node_state_buffer_size, alignof(NodeState)); allocator.destruct_free_array(node_states_); } @@ -317,7 +320,7 @@ class Executor { side_effect_nodes = self_.side_effect_provider_->get_nodes_with_side_effects(*context_); for (const FunctionNode *node : side_effect_nodes) { const int node_index = node->index_in_graph(); - NodeState &node_state = node_states_[node_index]; + NodeState &node_state = *node_states_[node_index]; node_state.has_side_effects = true; } } @@ -340,30 +343,29 @@ class Executor { void initialize_node_states() { Span nodes = self_.graph_.nodes(); - node_states_ = context_->allocator->allocate_new_array(nodes.size()); + node_states_ = context_->allocator->allocate_array(nodes.size()); - /* Construct all node states in parallel. */ - threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) { - LocalAllocator &local_allocator = context_->allocator->local(); - for (const int i : range) { - const Node &node = *nodes[i]; - this->construct_initial_node_state(local_allocator, node, node_states_[i]); - } - }); + void *node_states_buffer = context_->allocator->allocate( + self_.preprocess_data_.node_state_buffer_size, alignof(NodeState)); + + for (const int i : nodes.index_range()) { + const Node &node = *nodes[i]; + const GraphExecutor::NodeBufferOffsets &node_offsets = self_.preprocess_data_.offsets[i]; + void *state_buffer = POINTER_OFFSET(node_states_buffer, node_offsets.node); + NodeState *node_state = new (state_buffer) NodeState(); + node_state->inputs = { + static_cast(POINTER_OFFSET(node_states_buffer, node_offsets.inputs)), + node.inputs().size()}; + node_state->outputs = { + static_cast(POINTER_OFFSET(node_states_buffer, node_offsets.outputs)), + node.outputs().size()}; + default_construct_n(node_state->inputs.data(), node_state->inputs.size()); + default_construct_n(node_state->outputs.data(), node_state->outputs.size()); + node_states_[i] = node_state; + } } - void construct_initial_node_state(LocalAllocator &allocator, - const Node &node, - NodeState &node_state) - { - const Span node_inputs = node.inputs(); - const Span node_outputs = node.outputs(); - - node_state.inputs = allocator.allocate_new_array(node_inputs.size()); - node_state.outputs = allocator.allocate_new_array(node_outputs.size()); - } - - void destruct_node_state(const Node &node, NodeState &node_state, LocalAllocator &allocator) + void destruct_node_data(const Node &node, NodeState &node_state, LocalAllocator &allocator) { if (node.is_function()) { const LazyFunction &fn = static_cast(node).function(); @@ -376,8 +378,6 @@ class Executor { const InputSocket &input_socket = node.input(i); this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator); } - allocator.destruct_free_array(node_state.inputs); - allocator.destruct_free_array(node_state.outputs); } void schedule_newly_requested_outputs(CurrentTask ¤t_task) @@ -391,7 +391,7 @@ class Executor { } const InputSocket &socket = *self_.graph_outputs_[graph_output_index]; const Node &node = socket.node(); - NodeState &node_state = node_states_[node.index_in_graph()]; + NodeState &node_state = *node_states_[node.index_in_graph()]; this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { this->set_input_required(locked_node, socket); }); @@ -424,7 +424,7 @@ class Executor { for (const int i : self_.graph_inputs_.index_range()) { const OutputSocket &socket = *self_.graph_inputs_[i]; const Node &node = socket.node(); - const NodeState &node_state = node_states_[node.index_in_graph()]; + const NodeState &node_state = *node_states_[node.index_in_graph()]; const OutputState &output_state = node_state.outputs[socket.index()]; if (output_state.usage == ValueUsage::Unused) { params_->set_input_unused(i); @@ -483,7 +483,7 @@ class Executor { for (const int node_index : reachable_node_flags.index_range()) { const Node &node = *all_nodes[node_index]; - NodeState &node_state = node_states_[node_index]; + NodeState &node_state = *node_states_[node_index]; const bool node_is_reachable = reachable_node_flags[node_index]; if (node_is_reachable) { for (const int output_index : node.outputs().index_range()) { @@ -517,7 +517,7 @@ class Executor { CurrentTask ¤t_task) { for (const FunctionNode *node : side_effect_nodes) { - NodeState &node_state = node_states_[node->index_in_graph()]; + NodeState &node_state = *node_states_[node->index_in_graph()]; this->with_locked_node(*node, node_state, current_task, [&](LockedNode &locked_node) { this->schedule_node(locked_node, current_task); }); @@ -560,7 +560,7 @@ class Executor { { const Node &node = socket.node(); const int index_in_node = socket.index(); - NodeState &node_state = node_states_[node.index_in_graph()]; + NodeState &node_state = *node_states_[node.index_in_graph()]; /* The notified output socket might be an input of the entire graph. In this case, notify the * caller that the input is required. */ @@ -585,9 +585,6 @@ class Executor { BLI_assert(node.is_function()); this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { - if (node_state.node_has_finished) { - return; - } OutputState &output_state = node_state.outputs[index_in_node]; if (output_state.usage == ValueUsage::Used) { return; @@ -601,12 +598,9 @@ class Executor { { const Node &node = socket.node(); const int index_in_node = socket.index(); - NodeState &node_state = node_states_[node.index_in_graph()]; + NodeState &node_state = *node_states_[node.index_in_graph()]; this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) { - if (node_state.node_has_finished) { - return; - } OutputState &output_state = node_state.outputs[index_in_node]; output_state.potential_target_sockets -= 1; if (output_state.potential_target_sockets == 0) { @@ -660,7 +654,7 @@ class Executor { CurrentTask ¤t_task, const FunctionRef f) { - BLI_assert(&node_state == &node_states_[node.index_in_graph()]); + BLI_assert(&node_state == node_states_[node.index_in_graph()]); LockedNode locked_node{node, node_state}; if (this->use_multi_threading()) { @@ -704,7 +698,7 @@ class Executor { void run_node_task(const FunctionNode &node, CurrentTask ¤t_task) { - NodeState &node_state = node_states_[node.index_in_graph()]; + NodeState &node_state = *node_states_[node.index_in_graph()]; LocalAllocator &allocator = this->get_local_allocator(); const LazyFunction &fn = node.function(); @@ -877,7 +871,7 @@ class Executor { } } - this->destruct_node_state(node, node_state, allocator); + this->destruct_node_data(node, node_state, allocator); } void destruct_input_value_if_exists(InputState &input_state, @@ -986,7 +980,7 @@ class Executor { const Span targets = from_socket.targets(); for (const InputSocket *target_socket : targets) { const Node &target_node = target_socket->node(); - NodeState &node_state = node_states_[target_node.index_in_graph()]; + NodeState &node_state = *node_states_[target_node.index_in_graph()]; const int input_index = target_socket->index(); const bool is_last_target = target_socket == targets.last(); BLI_assert(target_socket->type() == type); @@ -1012,9 +1006,6 @@ class Executor { continue; } this->with_locked_node(target_node, node_state, current_task, [&](LockedNode &locked_node) { - if (node_state.node_has_finished) { - return; - } InputState &input_state = node_state.inputs[input_index]; #ifdef DEBUG @@ -1278,12 +1269,32 @@ inline void Executor::execute_node(const FunctionNode &node, } } +void GraphExecutor::preprocess(const Graph &graph, PreprocessData &r_preprocess_data) +{ + const Span nodes = graph.nodes(); + r_preprocess_data.offsets.reinitialize(nodes.size()); + int offset = 0; + for (const int i : nodes.index_range()) { + const Node &node = *nodes[i]; + NodeBufferOffsets &node_offsets = r_preprocess_data.offsets[i]; + node_offsets.node = offset; + offset += sizeof(NodeState); + node_offsets.inputs = offset; + offset += sizeof(InputState) * node.inputs().size(); + node_offsets.outputs = offset; + offset += sizeof(OutputState) * node.outputs().size(); + } + r_preprocess_data.node_state_buffer_size = offset; +} + GraphExecutor::GraphExecutor(const Graph &graph, const Span graph_inputs, const Span graph_outputs, + const PreprocessData &preprocess_data, const Logger *logger, const SideEffectProvider *side_effect_provider) : graph_(graph), + preprocess_data_(preprocess_data), graph_inputs_(graph_inputs), graph_outputs_(graph_outputs), logger_(logger), diff --git a/source/blender/functions/tests/FN_lazy_function_test.cc b/source/blender/functions/tests/FN_lazy_function_test.cc index 54e1df00cdf..9776a6b4d77 100644 --- a/source/blender/functions/tests/FN_lazy_function_test.cc +++ b/source/blender/functions/tests/FN_lazy_function_test.cc @@ -105,7 +105,11 @@ TEST(lazy_function, SideEffects) SimpleSideEffectProvider side_effect_provider{{&store_node}}; - GraphExecutor executor_fn{graph, {&input_node.output(0)}, {}, nullptr, &side_effect_provider}; + GraphExecutor::PreprocessData preprocess_data; + GraphExecutor::preprocess(graph, preprocess_data); + + GraphExecutor executor_fn{ + graph, {&input_node.output(0)}, {}, preprocess_data, nullptr, &side_effect_provider}; execute_lazy_function_eagerly(executor_fn, nullptr, std::make_tuple(5), std::make_tuple()); EXPECT_EQ(dst1, 15); @@ -167,8 +171,11 @@ TEST(lazy_function, GraphWithCycle) graph.update_node_indices(); + GraphExecutor::PreprocessData preprocess_data; + GraphExecutor::preprocess(graph, preprocess_data); + GraphExecutor executor_fn{ - graph, {&input_node.output(0)}, {&output_node.input(0)}, nullptr, nullptr}; + graph, {&input_node.output(0)}, {&output_node.input(0)}, preprocess_data, nullptr, nullptr}; int result = 0; execute_lazy_function_eagerly( executor_fn, nullptr, std::make_tuple(10), std::make_tuple(&result)); diff --git a/source/blender/modifiers/intern/MOD_nodes.cc b/source/blender/modifiers/intern/MOD_nodes.cc index 3aa225f9a15..a2c01922525 100644 --- a/source/blender/modifiers/intern/MOD_nodes.cc +++ b/source/blender/modifiers/intern/MOD_nodes.cc @@ -1139,8 +1139,12 @@ static GeometrySet compute_geometry( blender::nodes::GeometryNodesLazyFunctionLogger lf_logger(lf_graph_info); blender::nodes::GeometryNodesLazyFunctionSideEffectProvider lf_side_effect_provider; - lf::GraphExecutor graph_executor{ - lf_graph_info.graph, graph_inputs, graph_outputs, &lf_logger, &lf_side_effect_provider}; + lf::GraphExecutor graph_executor{lf_graph_info.graph, + graph_inputs, + graph_outputs, + lf_graph_info.graph_preprocess_data, + &lf_logger, + &lf_side_effect_provider}; blender::nodes::GeoNodesModifierData geo_nodes_modifier_data; geo_nodes_modifier_data.depsgraph = ctx->depsgraph; diff --git a/source/blender/nodes/NOD_geometry_nodes_lazy_function.hh b/source/blender/nodes/NOD_geometry_nodes_lazy_function.hh index 7f49d067061..6744599a38e 100644 --- a/source/blender/nodes/NOD_geometry_nodes_lazy_function.hh +++ b/source/blender/nodes/NOD_geometry_nodes_lazy_function.hh @@ -187,6 +187,7 @@ struct GeometryNodesLazyFunctionGraphInfo { * Mappings between the lazy-function graph and the #bNodeTree. */ GeometryNodeLazyFunctionGraphMapping mapping; + lf::GraphExecutor::PreprocessData graph_preprocess_data; /** * Approximate number of nodes in the graph if all sub-graphs were inlined. * This can be used as a simple heuristic for the complexity of the node group. diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index ae21b83d604..b41d18d914e 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -781,6 +781,7 @@ class LazyFunctionForGroupNode : public LazyFunction { graph_executor_.emplace(lf_graph_info.graph, std::move(graph_inputs), std::move(graph_outputs), + lf_graph_info.graph_preprocess_data, &*lf_logger_, &*lf_side_effect_provider_); } @@ -1228,6 +1229,7 @@ struct GeometryNodesLazyFunctionGraphBuilder { lf_graph_->update_node_indices(); lf_graph_info_->num_inline_nodes_approximate += lf_graph_->nodes().size(); + lf::GraphExecutor::preprocess(*lf_graph_, lf_graph_info_->graph_preprocess_data); } private: -- 2.30.2 From 16724c4328631da8cf9d994c14acd5d93d019ef0 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Thu, 5 Jan 2023 22:18:24 +0100 Subject: [PATCH 32/34] fall back to global allocation for very large allocations --- source/blender/blenlib/BLI_local_allocator.hh | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/source/blender/blenlib/BLI_local_allocator.hh b/source/blender/blenlib/BLI_local_allocator.hh index a149bbb2507..efc2c7f616d 100644 --- a/source/blender/blenlib/BLI_local_allocator.hh +++ b/source/blender/blenlib/BLI_local_allocator.hh @@ -34,6 +34,7 @@ class LocalAllocatorPool : NonCopyable, NonMovable { class LocalAllocator : NonCopyable, NonMovable { private: static constexpr int64_t s_alignment = 64; + static constexpr int64_t s_global_allocation_threshold = 5 * 1024 * 1024; LocalAllocatorSet &owner_set_; AlignedBuffer<256, 64> initial_buffer_; LinearAllocator<> linear_allocator_; @@ -186,9 +187,12 @@ inline void *LocalAllocator::allocate(LocalAllocatorPool &pool) buffer = pool.buffers.pop(); BLI_asan_unpoison(buffer, pool.element_size); } - else { + else if (pool.element_size < s_global_allocation_threshold) { buffer = linear_allocator_.allocate(pool.element_size, pool.alignment); } + else { + buffer = MEM_mallocN(pool.element_size, __func__); + } #ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES { @@ -226,9 +230,13 @@ inline void LocalAllocator::deallocate(const void *buffer, LocalAllocatorPool &p memset(const_cast(buffer), -1, pool.element_size); #endif - BLI_asan_poison(buffer, pool.element_size); - - pool.buffers.push(const_cast(buffer)); + if (pool.element_size < s_global_allocation_threshold) { + BLI_asan_poison(buffer, pool.element_size); + pool.buffers.push(const_cast(buffer)); + } + else { + MEM_freeN(const_cast(buffer)); + } } inline LocalAllocatorPool &LocalAllocator::get_pool(const int64_t size, const int64_t alignment) -- 2.30.2 From 2a8b965af80738c3cabd3e354e01bf868c715498 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Fri, 6 Jan 2023 16:48:32 +0100 Subject: [PATCH 33/34] remove changes to multi function --- .../blender/blenkernel/BKE_geometry_fields.hh | 3 +- .../blenkernel/intern/geometry_fields.cc | 5 +- source/blender/functions/FN_field.hh | 13 +- .../functions/FN_multi_function_context.hh | 22 ++-- source/blender/functions/intern/field.cc | 17 +-- .../functions/intern/multi_function.cc | 5 +- .../multi_function_procedure_executor.cc | 123 +++++++++--------- .../blender/functions/tests/FN_field_test.cc | 2 +- source/blender/nodes/NOD_geometry_exec.hh | 5 - .../nodes/geometry/node_geometry_util.hh | 1 - .../nodes/node_geo_attribute_capture.cc | 6 +- .../nodes/node_geo_delete_geometry.cc | 40 ++---- .../node_geo_distribute_points_on_faces.cc | 34 ++--- .../nodes/node_geo_duplicate_elements.cc | 103 +++++---------- .../geometry/nodes/node_geo_edge_split.cc | 3 +- .../geometry/nodes/node_geo_extrude_mesh.cc | 31 ++--- .../nodes/geometry/nodes/node_geo_points.cc | 2 +- .../nodes/node_geo_points_to_vertices.cc | 11 +- .../geometry/nodes/node_geo_scale_elements.cc | 37 ++---- .../nodes/node_geo_scale_instances.cc | 2 +- .../nodes/node_geo_separate_geometry.cc | 2 - .../nodes/node_geo_set_curve_radius.cc | 10 +- .../geometry/nodes/node_geo_set_curve_tilt.cc | 10 +- .../geometry/nodes/node_geo_set_material.cc | 3 +- .../nodes/node_geo_set_point_radius.cc | 8 +- .../geometry/nodes/node_geo_set_position.cc | 12 +- .../nodes/node_geo_store_named_attribute.cc | 6 +- .../nodes/node_geo_subdivision_surface.cc | 6 +- .../nodes/node_geo_translate_instances.cc | 2 +- .../geometry/nodes/node_geo_triangulate.cc | 2 +- .../geometry/nodes/node_geo_volume_cube.cc | 2 +- .../intern/geometry_nodes_lazy_function.cc | 29 ++--- 32 files changed, 201 insertions(+), 356 deletions(-) diff --git a/source/blender/blenkernel/BKE_geometry_fields.hh b/source/blender/blenkernel/BKE_geometry_fields.hh index b724c64105a..967bb912cc6 100644 --- a/source/blender/blenkernel/BKE_geometry_fields.hh +++ b/source/blender/blenkernel/BKE_geometry_fields.hh @@ -313,8 +313,7 @@ class CurveLengthFieldInput final : public CurvesFieldInput { bool try_capture_field_on_geometry(GeometryComponent &component, const AttributeIDRef &attribute_id, const eAttrDomain domain, - const fn::GField &field, - LocalAllocator *allocator = nullptr); + const fn::GField &field); /** * Try to find the geometry domain that the field should be evaluated on. If it is not obvious diff --git a/source/blender/blenkernel/intern/geometry_fields.cc b/source/blender/blenkernel/intern/geometry_fields.cc index 373291aa18a..9c691cb5870 100644 --- a/source/blender/blenkernel/intern/geometry_fields.cc +++ b/source/blender/blenkernel/intern/geometry_fields.cc @@ -408,8 +408,7 @@ bool NormalFieldInput::is_equal_to(const fn::FieldNode &other) const bool try_capture_field_on_geometry(GeometryComponent &component, const AttributeIDRef &attribute_id, const eAttrDomain domain, - const fn::GField &field, - LocalAllocator *allocator) + const fn::GField &field) { MutableAttributeAccessor attributes = *component.attributes_for_write(); const int domain_size = attributes.domain_size(domain); @@ -429,7 +428,7 @@ bool try_capture_field_on_geometry(GeometryComponent &component, * - The field does not depend on that attribute (we can't easily check for that yet). */ void *buffer = MEM_mallocN(type.size() * domain_size, __func__); - fn::FieldEvaluator evaluator{field_context, &mask, allocator}; + fn::FieldEvaluator evaluator{field_context, &mask}; evaluator.add_with_destination(validator.validate_field_if_necessary(field), GMutableSpan{type, buffer, domain_size}); evaluator.evaluate(); diff --git a/source/blender/functions/FN_field.hh b/source/blender/functions/FN_field.hh index 2e3a4245c02..7f940294113 100644 --- a/source/blender/functions/FN_field.hh +++ b/source/blender/functions/FN_field.hh @@ -334,7 +334,6 @@ class FieldEvaluator : NonMovable, NonCopyable { ResourceScope scope_; const FieldContext &context_; const IndexMask mask_; - LocalAllocator *allocator_ = nullptr; Vector fields_to_evaluate_; Vector dst_varrays_; Vector evaluated_varrays_; @@ -346,18 +345,13 @@ class FieldEvaluator : NonMovable, NonCopyable { public: /** Takes #mask by pointer because the mask has to live longer than the evaluator. */ - FieldEvaluator(const FieldContext &context, - const IndexMask *mask, - LocalAllocator *allocator = nullptr) - : context_(context), mask_(*mask), allocator_(allocator) + FieldEvaluator(const FieldContext &context, const IndexMask *mask) + : context_(context), mask_(*mask) { } /** Construct a field evaluator for all indices less than #size. */ - FieldEvaluator(const FieldContext &context, - const int64_t size, - LocalAllocator *allocator = nullptr) - : context_(context), mask_(size), allocator_(allocator) + FieldEvaluator(const FieldContext &context, const int64_t size) : context_(context), mask_(size) { } @@ -480,7 +474,6 @@ class FieldEvaluator : NonMovable, NonCopyable { * provided virtual arrays are returned. */ Vector evaluate_fields(ResourceScope &scope, - LocalAllocator *allocator, Span fields_to_evaluate, IndexMask mask, const FieldContext &context, diff --git a/source/blender/functions/FN_multi_function_context.hh b/source/blender/functions/FN_multi_function_context.hh index 0be768d7df5..af5efb4cf88 100644 --- a/source/blender/functions/FN_multi_function_context.hh +++ b/source/blender/functions/FN_multi_function_context.hh @@ -12,30 +12,24 @@ * - Pass cached data to called functions. */ -#include "BLI_local_allocator.hh" #include "BLI_utildefines.h" +#include "BLI_map.hh" + namespace blender::fn { class MFContext; class MFContextBuilder { private: - std::unique_ptr allocator_set_; - LocalAllocator *allocator_; + Map global_contexts_; friend MFContext; public: - MFContextBuilder(LocalAllocator *allocator = nullptr) + template void add_global_context(std::string name, const T *context) { - if (allocator) { - allocator_ = allocator; - } - else { - allocator_set_ = std::make_unique(); - allocator_ = &allocator_set_->local(); - } + global_contexts_.add_new(std::move(name), static_cast(context)); } }; @@ -48,9 +42,11 @@ class MFContext { { } - LocalAllocator &allocator() + template const T *get_global_context(StringRef name) const { - return *builder_.allocator_; + const void *context = builder_.global_contexts_.lookup_default_as(name, nullptr); + /* TODO: Implement type checking. */ + return static_cast(context); } }; diff --git a/source/blender/functions/intern/field.cc b/source/blender/functions/intern/field.cc index 7a3dba95f13..a9d26fa09f1 100644 --- a/source/blender/functions/intern/field.cc +++ b/source/blender/functions/intern/field.cc @@ -277,7 +277,6 @@ static void build_multi_function_procedure_for_fields(MFProcedure &procedure, } Vector evaluate_fields(ResourceScope &scope, - LocalAllocator *allocator, Span fields_to_evaluate, IndexMask mask, const FieldContext &context, @@ -373,7 +372,7 @@ Vector evaluate_fields(ResourceScope &scope, MFProcedureExecutor procedure_executor{procedure}; MFParamsBuilder mf_params{procedure_executor, &mask}; - MFContextBuilder mf_context{allocator}; + MFContextBuilder mf_context; /* Provide inputs to the procedure executor. */ for (const GVArray &varray : field_context_inputs) { @@ -424,7 +423,7 @@ Vector evaluate_fields(ResourceScope &scope, procedure, scope, field_tree_info, constant_fields_to_evaluate); MFProcedureExecutor procedure_executor{procedure}; MFParamsBuilder mf_params{procedure_executor, 1}; - MFContextBuilder mf_context{allocator}; + MFContextBuilder mf_context; /* Provide inputs to the procedure executor. */ for (const GVArray &varray : field_context_inputs) { @@ -501,7 +500,7 @@ void evaluate_constant_field(const GField &field, void *r_value) ResourceScope scope; FieldContext context; - Vector varrays = evaluate_fields(scope, nullptr, {field}, IndexRange(1), context); + Vector varrays = evaluate_fields(scope, {field}, IndexRange(1), context); varrays[0].get_to_uninitialized(0, r_value); } @@ -772,12 +771,11 @@ int FieldEvaluator::add(GField field) static IndexMask evaluate_selection(const Field &selection_field, const FieldContext &context, IndexMask full_mask, - ResourceScope &scope, - LocalAllocator *allocator) + ResourceScope &scope) { if (selection_field) { VArray selection = - evaluate_fields(scope, allocator, {selection_field}, full_mask, context)[0].typed(); + evaluate_fields(scope, {selection_field}, full_mask, context)[0].typed(); return index_mask_from_selection(full_mask, selection, scope); } return full_mask; @@ -787,14 +785,13 @@ void FieldEvaluator::evaluate() { BLI_assert_msg(!is_evaluated_, "Cannot evaluate fields twice."); - selection_mask_ = evaluate_selection(selection_field_, context_, mask_, scope_, allocator_); + selection_mask_ = evaluate_selection(selection_field_, context_, mask_, scope_); Array fields(fields_to_evaluate_.size()); for (const int i : fields_to_evaluate_.index_range()) { fields[i] = fields_to_evaluate_[i]; } - evaluated_varrays_ = evaluate_fields( - scope_, allocator_, fields, selection_mask_, context_, dst_varrays_); + evaluated_varrays_ = evaluate_fields(scope_, fields, selection_mask_, context_, dst_varrays_); BLI_assert(fields_to_evaluate_.size() == evaluated_varrays_.size()); for (const int i : fields_to_evaluate_.index_range()) { OutputPointerInfo &info = output_pointer_infos_[i]; diff --git a/source/blender/functions/intern/multi_function.cc b/source/blender/functions/intern/multi_function.cc index b6322e81277..c05087a4c2d 100644 --- a/source/blender/functions/intern/multi_function.cc +++ b/source/blender/functions/intern/multi_function.cc @@ -73,7 +73,6 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) { const IndexMask sliced_mask = mask.slice(sub_range); - MFContextBuilder sub_context{&context.allocator().local()}; if (!hints.allocates_array) { /* There is no benefit to changing indices in this case. */ this->call(sliced_mask, params, context); @@ -81,7 +80,7 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context } if (sliced_mask[0] < grain_size) { /* The indices are low, no need to offset them. */ - this->call(sliced_mask, params, sub_context); + this->call(sliced_mask, params, context); return; } const int64_t input_slice_start = sliced_mask[0]; @@ -128,7 +127,7 @@ void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context } } - this->call(offset_mask, offset_params, sub_context); + this->call(offset_mask, offset_params, context); }); } diff --git a/source/blender/functions/intern/multi_function_procedure_executor.cc b/source/blender/functions/intern/multi_function_procedure_executor.cc index aac84a6aa55..4fe3c27ea27 100644 --- a/source/blender/functions/intern/multi_function_procedure_executor.cc +++ b/source/blender/functions/intern/multi_function_procedure_executor.cc @@ -117,21 +117,6 @@ static_assert(std::is_trivially_destructible_v); static_assert(std::is_trivially_destructible_v); static_assert(std::is_trivially_destructible_v); -static constexpr int64_t max_variable_value_size = int64_t( - std::max({sizeof(VariableValue_GVArray), - sizeof(VariableValue_Span), - sizeof(VariableValue_GVVectorArray), - sizeof(VariableValue_GVectorArray), - sizeof(VariableValue_OneSingle), - sizeof(VariableValue_OneVector)})); -static constexpr int64_t max_variable_value_alignment = int64_t( - std::max({alignof(VariableValue_GVArray), - alignof(VariableValue_Span), - alignof(VariableValue_GVVectorArray), - alignof(VariableValue_GVectorArray), - alignof(VariableValue_OneSingle), - alignof(VariableValue_OneVector)})); - class VariableState; /** @@ -144,40 +129,33 @@ class ValueAllocator : NonCopyable, NonMovable { * Allocate with 64 byte alignment for better reusability of buffers and improved cache * performance. */ - static constexpr inline int s_span_alignment = 64; + static constexpr inline int min_alignment = 64; /** All buffers in the free-lists below have been allocated with this allocator. */ - LocalAllocator &local_allocator_; - - int array_size_; + LinearAllocator<> &linear_allocator_; /** * Use stacks so that the most recently used buffers are reused first. This improves cache * efficiency. */ - LocalAllocatorPool *variable_value_pool_; + std::array, tot_variable_value_types> variable_value_free_lists_; /** * The integer key is the size of one element (e.g. 4 for an integer buffer). All buffers are * aligned to #min_alignment bytes. */ - LocalAllocatorPool *small_span_buffers_pool_; + Stack small_span_buffers_free_list_; + Map> span_buffers_free_lists_; /** Cache buffers for single values of different types. */ static constexpr inline int small_value_max_size = 16; static constexpr inline int small_value_max_alignment = 8; - LocalAllocatorPool *small_values_pool_; + Stack small_single_value_free_list_; + Map> single_value_free_lists_; public: - ValueAllocator(LocalAllocator &local_allocator, const int array_size) - : local_allocator_(local_allocator), array_size_(array_size) + ValueAllocator(LinearAllocator<> &linear_allocator) : linear_allocator_(linear_allocator) { - variable_value_pool_ = &local_allocator.get_pool(max_variable_value_size, - max_variable_value_alignment); - small_span_buffers_pool_ = &local_allocator.get_pool( - std::max(s_span_alignment, small_value_max_size * array_size), s_span_alignment); - small_values_pool_ = &local_allocator.get_pool(small_value_max_size, - small_value_max_alignment); } VariableValue_GVArray *obtain_GVArray(const GVArray &varray) @@ -195,17 +173,30 @@ class ValueAllocator : NonCopyable, NonMovable { return this->obtain(buffer, false); } - VariableValue_Span *obtain_Span(const CPPType &type) + VariableValue_Span *obtain_Span(const CPPType &type, int size) { - const bool is_small = type.can_exist_in_buffer(small_value_max_size, - small_value_max_alignment); - void *buffer = nullptr; - if (is_small) { - buffer = local_allocator_.allocate(*small_span_buffers_pool_); + + const int64_t element_size = type.size(); + const int64_t alignment = type.alignment(); + + if (alignment > min_alignment) { + /* In this rare case we fallback to not reusing existing buffers. */ + buffer = linear_allocator_.allocate(element_size * size, alignment); } else { - buffer = local_allocator_.allocate(type.size() * array_size_, type.alignment()); + Stack *stack = type.can_exist_in_buffer(small_value_max_size, + small_value_max_alignment) ? + &small_span_buffers_free_list_ : + span_buffers_free_lists_.lookup_ptr(element_size); + if (stack == nullptr || stack->is_empty()) { + buffer = linear_allocator_.allocate( + std::max(element_size, small_value_max_size) * size, min_alignment); + } + else { + /* Reuse existing buffer. */ + buffer = stack->pop(); + } } return this->obtain(buffer, true); @@ -216,9 +207,9 @@ class ValueAllocator : NonCopyable, NonMovable { return this->obtain(data, false); } - VariableValue_GVectorArray *obtain_GVectorArray(const CPPType &type) + VariableValue_GVectorArray *obtain_GVectorArray(const CPPType &type, int size) { - GVectorArray *vector_array = new GVectorArray(type, array_size_); + GVectorArray *vector_array = new GVectorArray(type, size); return this->obtain(*vector_array, true); } @@ -226,12 +217,16 @@ class ValueAllocator : NonCopyable, NonMovable { { const bool is_small = type.can_exist_in_buffer(small_value_max_size, small_value_max_alignment); + Stack &stack = is_small ? small_single_value_free_list_ : + single_value_free_lists_.lookup_or_add_default(&type); void *buffer; - if (is_small) { - buffer = local_allocator_.allocate(*small_values_pool_); + if (stack.is_empty()) { + buffer = linear_allocator_.allocate( + std::max(small_value_max_size, type.size()), + std::max(small_value_max_alignment, type.alignment())); } else { - buffer = local_allocator_.allocate(type.size(), type.alignment()); + buffer = stack.pop(); } return this->obtain(buffer); } @@ -252,16 +247,12 @@ class ValueAllocator : NonCopyable, NonMovable { auto *value_typed = static_cast(value); if (value_typed->owned) { const CPPType &type = data_type.single_type(); - const bool is_small = type.can_exist_in_buffer(small_value_max_size, - small_value_max_alignment); /* Assumes all values in the buffer are uninitialized already. */ - if (is_small) { - local_allocator_.deallocate(value_typed->data, *small_span_buffers_pool_); - } - else { - local_allocator_.deallocate( - value_typed->data, type.size() * array_size_, type.alignment()); - } + Stack &buffers = type.can_exist_in_buffer(small_value_max_size, + small_value_max_alignment) ? + small_span_buffers_free_list_ : + span_buffers_free_lists_.lookup_or_add_default(type.size()); + buffers.push(value_typed->data); } break; } @@ -284,10 +275,10 @@ class ValueAllocator : NonCopyable, NonMovable { const bool is_small = type.can_exist_in_buffer(small_value_max_size, small_value_max_alignment); if (is_small) { - local_allocator_.deallocate(value_typed->data, *small_values_pool_); + small_single_value_free_list_.push(value_typed->data); } else { - local_allocator_.deallocate(value_typed->data, type.size(), type.alignment()); + single_value_free_lists_.lookup_or_add_default(&type).push(value_typed->data); } break; } @@ -298,15 +289,20 @@ class ValueAllocator : NonCopyable, NonMovable { } } - local_allocator_.deallocate(value, *variable_value_pool_); + Stack &stack = variable_value_free_lists_[int(value->type)]; + stack.push(value); } private: template T *obtain(Args &&...args) { static_assert(std::is_base_of_v); - void *buffer = static_cast(local_allocator_.allocate(*variable_value_pool_)); - return new (buffer) T(std::forward(args)...); + Stack &stack = variable_value_free_lists_[int(T::static_type)]; + if (stack.is_empty()) { + void *buffer = linear_allocator_.allocate(sizeof(T), alignof(T)); + return new (buffer) T(std::forward(args)...); + } + return new (stack.pop()) T(std::forward(args)...); } }; @@ -418,7 +414,7 @@ class VariableState : NonCopyable, NonMovable { const CPPType &type = data_type.single_type(); VariableValue_Span *new_value = nullptr; if (caller_provided_storage_ == nullptr) { - new_value = value_allocator.obtain_Span(type); + new_value = value_allocator.obtain_Span(type, array_size); } else { /* Reuse the storage provided caller when possible. */ @@ -449,7 +445,7 @@ class VariableState : NonCopyable, NonMovable { const CPPType &type = data_type.vector_base_type(); VariableValue_GVectorArray *new_value = nullptr; if (caller_provided_storage_ == nullptr) { - new_value = value_allocator.obtain_GVectorArray(type); + new_value = value_allocator.obtain_GVectorArray(type, array_size); } else { new_value = value_allocator.obtain_GVectorArray_not_owned( @@ -833,10 +829,10 @@ class VariableStates { IndexMask full_mask_; public: - VariableStates(LocalAllocator &local_allocator, + VariableStates(LinearAllocator<> &linear_allocator, const MFProcedure &procedure, IndexMask full_mask) - : value_allocator_(local_allocator, full_mask.min_array_size()), + : value_allocator_(linear_allocator), procedure_(procedure), variable_states_(procedure.variables().size()), full_mask_(full_mask) @@ -1182,8 +1178,11 @@ void MFProcedureExecutor::call(IndexMask full_mask, MFParams params, MFContext c { BLI_assert(procedure_.validate()); - LocalAllocator &local_allocator = context.allocator(); - VariableStates variable_states{local_allocator, procedure_, full_mask}; + AlignedBuffer<512, 64> local_buffer; + LinearAllocator<> linear_allocator; + linear_allocator.provide_buffer(local_buffer); + + VariableStates variable_states{linear_allocator, procedure_, full_mask}; variable_states.add_initial_variable_states(*this, procedure_, params); InstructionScheduler scheduler; diff --git a/source/blender/functions/tests/FN_field_test.cc b/source/blender/functions/tests/FN_field_test.cc index 3396539fa12..8c5cc817174 100644 --- a/source/blender/functions/tests/FN_field_test.cc +++ b/source/blender/functions/tests/FN_field_test.cc @@ -263,7 +263,7 @@ TEST(field, SameFieldTwice) IndexMask mask{IndexRange(2)}; ResourceScope scope; Vector results = evaluate_fields( - scope, nullptr, {constant_field, constant_field}, mask, field_context); + scope, {constant_field, constant_field}, mask, field_context); VArray varray1 = results[0].typed(); VArray varray2 = results[1].typed(); diff --git a/source/blender/nodes/NOD_geometry_exec.hh b/source/blender/nodes/NOD_geometry_exec.hh index bef0435240e..60f58f4c215 100644 --- a/source/blender/nodes/NOD_geometry_exec.hh +++ b/source/blender/nodes/NOD_geometry_exec.hh @@ -257,11 +257,6 @@ class GeoNodeExecParams { return dynamic_cast(lf_context_.user_data); } - LocalAllocator &allocator() - { - return *lf_context_.allocator; - } - /** * Add an error message displayed at the top of the node when displaying the node tree, * and potentially elsewhere in Blender. diff --git a/source/blender/nodes/geometry/node_geometry_util.hh b/source/blender/nodes/geometry/node_geometry_util.hh index cef97633d85..ce6b4cd6cfe 100644 --- a/source/blender/nodes/geometry/node_geometry_util.hh +++ b/source/blender/nodes/geometry/node_geometry_util.hh @@ -82,7 +82,6 @@ void separate_geometry(GeometrySet &geometry_set, GeometryNodeDeleteGeometryMode mode, const Field &selection_field, const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator, bool &r_is_error); void get_closest_in_bvhtree(BVHTreeFromMesh &tree_data, diff --git a/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc b/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc index 9d50b7afcf9..a07cd1437d6 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_attribute_capture.cc @@ -181,8 +181,7 @@ static void node_geo_exec(GeoNodeExecParams params) if (geometry_set.has_instances()) { GeometryComponent &component = geometry_set.get_component_for_write( GEO_COMPONENT_TYPE_INSTANCES); - bke::try_capture_field_on_geometry( - component, *attribute_id, domain, field, ¶ms.allocator()); + bke::try_capture_field_on_geometry(component, *attribute_id, domain, field); } } else { @@ -193,8 +192,7 @@ static void node_geo_exec(GeoNodeExecParams params) for (const GeometryComponentType type : types) { if (geometry_set.has(type)) { GeometryComponent &component = geometry_set.get_component_for_write(type); - bke::try_capture_field_on_geometry( - component, *attribute_id, domain, field, ¶ms.allocator().local()); + bke::try_capture_field_on_geometry(component, *attribute_id, domain, field); } } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc b/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc index 4513216ae5d..e92fe1a613d 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_delete_geometry.cc @@ -307,15 +307,14 @@ static void copy_masked_polys_to_new_mesh(const Mesh &src_mesh, static void delete_curves_selection(GeometrySet &geometry_set, const Field &selection_field, const eAttrDomain selection_domain, - const bke::AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const bke::AnonymousAttributePropagationInfo &propagation_info) { const Curves &src_curves_id = *geometry_set.get_curves_for_read(); const bke::CurvesGeometry &src_curves = bke::CurvesGeometry::wrap(src_curves_id.geometry); const int domain_size = src_curves.attributes().domain_size(selection_domain); bke::CurvesFieldContext field_context{src_curves, selection_domain}; - fn::FieldEvaluator evaluator{field_context, domain_size, &allocator}; + fn::FieldEvaluator evaluator{field_context, domain_size}; evaluator.set_selection(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_selection_as_mask(); @@ -342,13 +341,12 @@ static void delete_curves_selection(GeometrySet &geometry_set, static void separate_point_cloud_selection( GeometrySet &geometry_set, const Field &selection_field, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { const PointCloud &src_pointcloud = *geometry_set.get_pointcloud_for_read(); bke::PointCloudFieldContext field_context{src_pointcloud}; - fn::FieldEvaluator evaluator{field_context, src_pointcloud.totpoint, &allocator}; + fn::FieldEvaluator evaluator{field_context, src_pointcloud.totpoint}; evaluator.set_selection(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_selection_as_mask(); @@ -376,13 +374,12 @@ static void separate_point_cloud_selection( static void delete_selected_instances(GeometrySet &geometry_set, const Field &selection_field, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { bke::Instances &instances = *geometry_set.get_instances_for_write(); bke::InstancesFieldContext field_context{instances}; - fn::FieldEvaluator evaluator{field_context, instances.instances_num(), &allocator}; + fn::FieldEvaluator evaluator{field_context, instances.instances_num()}; evaluator.set_selection(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_selection_as_mask(); @@ -1099,7 +1096,6 @@ void separate_geometry(GeometrySet &geometry_set, const GeometryNodeDeleteGeometryMode mode, const Field &selection_field, const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator, bool &r_is_error) { namespace file_ns = blender::nodes::node_geo_delete_geometry_cc; @@ -1107,8 +1103,7 @@ void separate_geometry(GeometrySet &geometry_set, bool some_valid_domain = false; if (geometry_set.has_pointcloud()) { if (domain == ATTR_DOMAIN_POINT) { - file_ns::separate_point_cloud_selection( - geometry_set, selection_field, propagation_info, allocator); + file_ns::separate_point_cloud_selection(geometry_set, selection_field, propagation_info); some_valid_domain = true; } } @@ -1121,18 +1116,14 @@ void separate_geometry(GeometrySet &geometry_set, } if (geometry_set.has_curves()) { if (ELEM(domain, ATTR_DOMAIN_POINT, ATTR_DOMAIN_CURVE)) { - file_ns::delete_curves_selection(geometry_set, - fn::invert_boolean_field(selection_field), - domain, - propagation_info, - allocator); + file_ns::delete_curves_selection( + geometry_set, fn::invert_boolean_field(selection_field), domain, propagation_info); some_valid_domain = true; } } if (geometry_set.has_instances()) { if (domain == ATTR_DOMAIN_INSTANCE) { - file_ns::delete_selected_instances( - geometry_set, selection_field, propagation_info, allocator); + file_ns::delete_selected_instances(geometry_set, selection_field, propagation_info); some_valid_domain = true; } } @@ -1197,20 +1188,13 @@ static void node_geo_exec(GeoNodeExecParams params) if (domain == ATTR_DOMAIN_INSTANCE) { bool is_error; - separate_geometry( - geometry_set, domain, mode, selection, propagation_info, params.allocator(), is_error); + separate_geometry(geometry_set, domain, mode, selection, propagation_info, is_error); } else { geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { bool is_error; /* Invert here because we want to keep the things not in the selection. */ - separate_geometry(geometry_set, - domain, - mode, - selection, - propagation_info, - params.allocator().local(), - is_error); + separate_geometry(geometry_set, domain, mode, selection, propagation_info, is_error); }); } diff --git a/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc b/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc index d646f47fc38..91fa215d117 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_distribute_points_on_faces.cc @@ -385,15 +385,14 @@ BLI_NOINLINE static void compute_attribute_outputs(const Mesh &mesh, static Array calc_full_density_factors_with_selection(const Mesh &mesh, const Field &density_field, - const Field &selection_field, - LocalAllocator &allocator) + const Field &selection_field) { const eAttrDomain domain = ATTR_DOMAIN_CORNER; const int domain_size = mesh.attributes().domain_size(domain); Array densities(domain_size, 0.0f); bke::MeshFieldContext field_context{mesh, domain}; - fn::FieldEvaluator evaluator{field_context, domain_size, &allocator}; + fn::FieldEvaluator evaluator{field_context, domain_size}; evaluator.set_selection(selection_field); evaluator.add_with_destination(density_field, densities.as_mutable_span()); evaluator.evaluate(); @@ -404,13 +403,12 @@ static void distribute_points_random(const Mesh &mesh, const Field &density_field, const Field &selection_field, const int seed, - LocalAllocator &allocator, Vector &positions, Vector &bary_coords, Vector &looptri_indices) { const Array densities = calc_full_density_factors_with_selection( - mesh, density_field, selection_field, allocator); + mesh, density_field, selection_field); sample_mesh_surface(mesh, 1.0f, densities, seed, positions, bary_coords, looptri_indices); } @@ -420,7 +418,6 @@ static void distribute_points_poisson_disk(const Mesh &mesh, const Field &density_factor_field, const Field &selection_field, const int seed, - LocalAllocator &allocator, Vector &positions, Vector &bary_coords, Vector &looptri_indices) @@ -431,7 +428,7 @@ static void distribute_points_poisson_disk(const Mesh &mesh, update_elimination_mask_for_close_points(positions, minimum_distance, elimination_mask); const Array density_factors = calc_full_density_factors_with_selection( - mesh, density_factor_field, selection_field, allocator); + mesh, density_factor_field, selection_field); update_elimination_mask_based_on_density_factors( mesh, density_factors, bary_coords, looptri_indices, elimination_mask.as_mutable_span()); @@ -445,8 +442,7 @@ static void point_distribution_calculate(GeometrySet &geometry_set, const GeometryNodeDistributePointsOnFacesMode method, const int seed, const AttributeOutputs &attribute_outputs, - const GeoNodeExecParams ¶ms, - LocalAllocator &allocator) + const GeoNodeExecParams ¶ms) { if (!geometry_set.has_mesh()) { return; @@ -461,14 +457,8 @@ static void point_distribution_calculate(GeometrySet &geometry_set, switch (method) { case GEO_NODE_POINT_DISTRIBUTE_POINTS_ON_FACES_RANDOM: { const Field density_field = params.get_input>("Density"); - distribute_points_random(mesh, - density_field, - selection_field, - seed, - allocator, - positions, - bary_coords, - looptri_indices); + distribute_points_random( + mesh, density_field, selection_field, seed, positions, bary_coords, looptri_indices); break; } case GEO_NODE_POINT_DISTRIBUTE_POINTS_ON_FACES_POISSON: { @@ -481,7 +471,6 @@ static void point_distribution_calculate(GeometrySet &geometry_set, density_factors_field, selection_field, seed, - allocator, positions, bary_coords, looptri_indices); @@ -538,13 +527,8 @@ static void node_geo_exec(GeoNodeExecParams params) lazy_threading::send_hint(); geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { - point_distribution_calculate(geometry_set, - selection_field, - method, - seed, - attribute_outputs, - params, - params.allocator().local()); + point_distribution_calculate( + geometry_set, selection_field, method, seed, attribute_outputs, params); /* Keep instances because the original geometry set may contain instances that are processed as * well. */ geometry_set.keep_only_during_modify({GEO_COMPONENT_TYPE_POINT_CLOUD}); diff --git a/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc b/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc index 92814d700ce..cd191fa8498 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc @@ -324,8 +324,7 @@ static void duplicate_curves(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { if (!geometry_set.has_curves()) { geometry_set.remove_geometry_during_modify(); @@ -338,7 +337,7 @@ static void duplicate_curves(GeometrySet &geometry_set, const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry); bke::CurvesFieldContext field_context{curves, ATTR_DOMAIN_CURVE}; - FieldEvaluator evaluator{field_context, curves.curves_num(), &allocator}; + FieldEvaluator evaluator{field_context, curves.curves_num()}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -517,8 +516,7 @@ static void duplicate_faces(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { if (!geometry_set.has_mesh()) { geometry_set.remove_geometry_during_modify(); @@ -533,7 +531,7 @@ static void duplicate_faces(GeometrySet &geometry_set, const Span loops = mesh.loops(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator(field_context, polys.size(), &allocator); + FieldEvaluator evaluator(field_context, polys.size()); evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -720,8 +718,7 @@ static void duplicate_edges(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { if (!geometry_set.has_mesh()) { geometry_set.remove_geometry_during_modify(); @@ -731,7 +728,7 @@ static void duplicate_edges(GeometrySet &geometry_set, const Span edges = mesh.edges(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator evaluator{field_context, edges.size(), &allocator}; + FieldEvaluator evaluator{field_context, edges.size()}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -801,8 +798,7 @@ static void duplicate_points_curve(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { const Curves &src_curves_id = *geometry_set.get_curves_for_read(); const bke::CurvesGeometry &src_curves = bke::CurvesGeometry::wrap(src_curves_id.geometry); @@ -811,7 +807,7 @@ static void duplicate_points_curve(GeometrySet &geometry_set, } bke::CurvesFieldContext field_context{src_curves, ATTR_DOMAIN_POINT}; - FieldEvaluator evaluator{field_context, src_curves.points_num(), &allocator}; + FieldEvaluator evaluator{field_context, src_curves.points_num()}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -906,14 +902,13 @@ static void duplicate_points_mesh(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { const Mesh &mesh = *geometry_set.get_mesh_for_read(); const Span src_verts = mesh.verts(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_POINT}; - FieldEvaluator evaluator{field_context, src_verts.size(), &allocator}; + FieldEvaluator evaluator{field_context, src_verts.size()}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -959,13 +954,12 @@ static void duplicate_points_pointcloud(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { const PointCloud &src_points = *geometry_set.get_pointcloud_for_read(); bke::PointCloudFieldContext field_context{src_points}; - FieldEvaluator evaluator{field_context, src_points.totpoint, &allocator}; + FieldEvaluator evaluator{field_context, src_points.totpoint}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -1007,40 +1001,27 @@ static void duplicate_points(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { Vector component_types = geometry_set.gather_component_types(true, true); for (const GeometryComponentType component_type : component_types) { switch (component_type) { case GEO_COMPONENT_TYPE_POINT_CLOUD: if (geometry_set.has_pointcloud()) { - duplicate_points_pointcloud(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_points_pointcloud( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); } break; case GEO_COMPONENT_TYPE_MESH: if (geometry_set.has_mesh()) { - duplicate_points_mesh(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_points_mesh( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); } break; case GEO_COMPONENT_TYPE_CURVE: if (geometry_set.has_curves()) { - duplicate_points_curve(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_points_curve( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); } break; default: @@ -1061,8 +1042,7 @@ static void duplicate_instances(GeometrySet &geometry_set, const Field &count_field, const Field &selection_field, const IndexAttributes &attribute_outputs, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { if (!geometry_set.has_instances()) { geometry_set.clear(); @@ -1072,7 +1052,7 @@ static void duplicate_instances(GeometrySet &geometry_set, const bke::Instances &src_instances = *geometry_set.get_instances_for_read(); bke::InstancesFieldContext field_context{src_instances}; - FieldEvaluator evaluator{field_context, src_instances.instances_num(), &allocator}; + FieldEvaluator evaluator{field_context, src_instances.instances_num()}; evaluator.add(count_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -1144,48 +1124,27 @@ static void node_geo_exec(GeoNodeExecParams params) "Geometry"); if (duplicate_domain == ATTR_DOMAIN_INSTANCE) { - duplicate_instances(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - params.allocator()); + duplicate_instances( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); } else { geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { - LocalAllocator &allocator = params.allocator().local(); switch (duplicate_domain) { case ATTR_DOMAIN_CURVE: - duplicate_curves(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_curves( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); break; case ATTR_DOMAIN_FACE: - duplicate_faces(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_faces( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); break; case ATTR_DOMAIN_EDGE: - duplicate_edges(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_edges( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); break; case ATTR_DOMAIN_POINT: - duplicate_points(geometry_set, - count_field, - selection_field, - attribute_outputs, - propagation_info, - allocator); + duplicate_points( + geometry_set, count_field, selection_field, attribute_outputs, propagation_info); break; default: BLI_assert_unreachable(); diff --git a/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc b/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc index f5c34a6476a..2948713852b 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_edge_split.cc @@ -25,8 +25,7 @@ static void node_geo_exec(GeoNodeExecParams params) if (const Mesh *mesh = geometry_set.get_mesh_for_read()) { bke::MeshFieldContext field_context{*mesh, ATTR_DOMAIN_EDGE}; - fn::FieldEvaluator selection_evaluator{ - field_context, mesh->totedge, ¶ms.allocator().local()}; + fn::FieldEvaluator selection_evaluator{field_context, mesh->totedge}; selection_evaluator.set_selection(selection_field); selection_evaluator.evaluate(); const IndexMask mask = selection_evaluator.get_evaluated_selection_as_mask(); diff --git a/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc b/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc index c4a1d9e7d29..27f34db2f9f 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc @@ -200,14 +200,13 @@ static Array> create_vert_to_edge_map(const int vert_size, static void extrude_mesh_vertices(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs, - LocalAllocator &allocator) + const AttributeOutputs &attribute_outputs) { const int orig_vert_size = mesh.totvert; const int orig_edge_size = mesh.totedge; const bke::MeshFieldContext context{mesh, ATTR_DOMAIN_POINT}; - FieldEvaluator evaluator{context, mesh.totvert, &allocator}; + FieldEvaluator evaluator{context, mesh.totvert}; evaluator.add(offset_field); evaluator.set_selection(selection_field); evaluator.evaluate(); @@ -369,8 +368,7 @@ static VectorSet vert_indices_from_edges(const Mesh &mesh, const Span ed static void extrude_mesh_edges(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs, - LocalAllocator &allocator) + const AttributeOutputs &attribute_outputs) { const int orig_vert_size = mesh.totvert; const Span orig_edges = mesh.edges(); @@ -378,7 +376,7 @@ static void extrude_mesh_edges(Mesh &mesh, const int orig_loop_size = mesh.totloop; const bke::MeshFieldContext edge_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator edge_evaluator{edge_context, mesh.totedge, &allocator}; + FieldEvaluator edge_evaluator{edge_context, mesh.totedge}; edge_evaluator.set_selection(selection_field); edge_evaluator.add(offset_field); edge_evaluator.evaluate(); @@ -649,8 +647,7 @@ static void extrude_mesh_edges(Mesh &mesh, static void extrude_mesh_face_regions(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs, - LocalAllocator &allocator) + const AttributeOutputs &attribute_outputs) { const int orig_vert_size = mesh.totvert; const Span orig_edges = mesh.edges(); @@ -658,7 +655,7 @@ static void extrude_mesh_face_regions(Mesh &mesh, const Span orig_loops = mesh.loops(); const bke::MeshFieldContext poly_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator poly_evaluator{poly_context, mesh.totpoly, &allocator}; + FieldEvaluator poly_evaluator{poly_context, mesh.totpoly}; poly_evaluator.set_selection(selection_field); poly_evaluator.add(offset_field); poly_evaluator.evaluate(); @@ -1053,8 +1050,7 @@ static IndexRange selected_corner_range(Span offsets, const int index) static void extrude_individual_mesh_faces(Mesh &mesh, const Field &selection_field, const Field &offset_field, - const AttributeOutputs &attribute_outputs, - LocalAllocator &allocator) + const AttributeOutputs &attribute_outputs) { const int orig_vert_size = mesh.totvert; const int orig_edge_size = mesh.totedge; @@ -1065,7 +1061,7 @@ static void extrude_individual_mesh_faces(Mesh &mesh, * the vertices are moved, and the evaluated result might reference an attribute. */ Array poly_offset(orig_polys.size()); const bke::MeshFieldContext poly_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator poly_evaluator{poly_context, mesh.totpoly, &allocator}; + FieldEvaluator poly_evaluator{poly_context, mesh.totpoly}; poly_evaluator.set_selection(selection_field); poly_evaluator.add_with_destination(offset_field, poly_offset.as_mutable_span()); poly_evaluator.evaluate(); @@ -1346,22 +1342,19 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (Mesh *mesh = geometry_set.get_mesh_for_write()) { - LocalAllocator &allocator = params.allocator().local(); switch (mode) { case GEO_NODE_EXTRUDE_MESH_VERTICES: - extrude_mesh_vertices(*mesh, selection, final_offset, attribute_outputs, allocator); + extrude_mesh_vertices(*mesh, selection, final_offset, attribute_outputs); break; case GEO_NODE_EXTRUDE_MESH_EDGES: - extrude_mesh_edges(*mesh, selection, final_offset, attribute_outputs, allocator); + extrude_mesh_edges(*mesh, selection, final_offset, attribute_outputs); break; case GEO_NODE_EXTRUDE_MESH_FACES: { if (extrude_individual) { - extrude_individual_mesh_faces( - *mesh, selection, final_offset, attribute_outputs, allocator); + extrude_individual_mesh_faces(*mesh, selection, final_offset, attribute_outputs); } else { - extrude_mesh_face_regions( - *mesh, selection, final_offset, attribute_outputs, allocator); + extrude_mesh_face_regions(*mesh, selection, final_offset, attribute_outputs); } break; } diff --git a/source/blender/nodes/geometry/nodes/node_geo_points.cc b/source/blender/nodes/geometry/nodes/node_geo_points.cc index c6da51e9682..dcbe176b384 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_points.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_points.cc @@ -79,7 +79,7 @@ static void node_geo_exec(GeoNodeExecParams params) "radius", ATTR_DOMAIN_POINT); PointsFieldContext context{count}; - fn::FieldEvaluator evaluator{context, count, ¶ms.allocator()}; + fn::FieldEvaluator evaluator{context, count}; evaluator.add_with_destination(position_field, output_position.varray); evaluator.add_with_destination(radius_field, output_radii.varray); evaluator.evaluate(); diff --git a/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc b/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc index 130846a462b..5cd5bbe690e 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_points_to_vertices.cc @@ -24,8 +24,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void geometry_set_points_to_vertices( GeometrySet &geometry_set, Field &selection_field, - const AnonymousAttributePropagationInfo &propagation_info, - LocalAllocator &allocator) + const AnonymousAttributePropagationInfo &propagation_info) { const PointCloud *points = geometry_set.get_pointcloud_for_read(); if (points == nullptr) { @@ -38,7 +37,7 @@ static void geometry_set_points_to_vertices( } bke::PointCloudFieldContext field_context{*points}; - fn::FieldEvaluator selection_evaluator{field_context, points->totpoint, &allocator}; + fn::FieldEvaluator selection_evaluator{field_context, points->totpoint}; selection_evaluator.add(selection_field); selection_evaluator.evaluate(); const IndexMask selection = selection_evaluator.get_evaluated_as_mask(0); @@ -79,10 +78,8 @@ static void node_geo_exec(GeoNodeExecParams params) Field selection_field = params.extract_input>("Selection"); geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { - geometry_set_points_to_vertices(geometry_set, - selection_field, - params.get_output_propagation_info("Mesh"), - params.allocator().local()); + geometry_set_points_to_vertices( + geometry_set, selection_field, params.get_output_propagation_info("Mesh")); }); params.set_output("Mesh", std::move(geometry_set)); diff --git a/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc b/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc index 5950b5ee7e4..da9b04c06c0 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_scale_elements.cc @@ -307,12 +307,10 @@ static AxisScaleParams evaluate_axis_scale_fields(FieldEvaluator &evaluator, return out; } -static void scale_faces_on_axis(Mesh &mesh, - const AxisScaleFields &fields, - LocalAllocator &allocator) +static void scale_faces_on_axis(Mesh &mesh, const AxisScaleFields &fields) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator{field_context, mesh.totpoly, &allocator}; + FieldEvaluator evaluator{field_context, mesh.totpoly}; AxisScaleParams params = evaluate_axis_scale_fields(evaluator, fields); Vector island = prepare_face_islands(mesh, params.selection); @@ -331,12 +329,10 @@ static UniformScaleParams evaluate_uniform_scale_fields(FieldEvaluator &evaluato return out; } -static void scale_faces_uniformly(Mesh &mesh, - const UniformScaleFields &fields, - LocalAllocator &allocator) +static void scale_faces_uniformly(Mesh &mesh, const UniformScaleFields &fields) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator{field_context, mesh.totpoly, &allocator}; + FieldEvaluator evaluator{field_context, mesh.totpoly}; UniformScaleParams params = evaluate_uniform_scale_fields(evaluator, fields); Vector island = prepare_face_islands(mesh, params.selection); @@ -385,24 +381,20 @@ static void get_edge_verts(const Span edges, r_vertex_indices.add(edge.v2); } -static void scale_edges_uniformly(Mesh &mesh, - const UniformScaleFields &fields, - LocalAllocator &allocator) +static void scale_edges_uniformly(Mesh &mesh, const UniformScaleFields &fields) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator evaluator{field_context, mesh.totedge, &allocator}; + FieldEvaluator evaluator{field_context, mesh.totedge}; UniformScaleParams params = evaluate_uniform_scale_fields(evaluator, fields); Vector island = prepare_edge_islands(mesh, params.selection); scale_vertex_islands_uniformly(mesh, island, params, get_edge_verts); } -static void scale_edges_on_axis(Mesh &mesh, - const AxisScaleFields &fields, - LocalAllocator &allocator) +static void scale_edges_on_axis(Mesh &mesh, const AxisScaleFields &fields) { bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator evaluator{field_context, mesh.totedge, &allocator}; + FieldEvaluator evaluator{field_context, mesh.totedge}; AxisScaleParams params = evaluate_axis_scale_fields(evaluator, fields); Vector island = prepare_edge_islands(mesh, params.selection); @@ -427,18 +419,15 @@ static void node_geo_exec(GeoNodeExecParams params) geometry.modify_geometry_sets([&](GeometrySet &geometry) { if (Mesh *mesh = geometry.get_mesh_for_write()) { - LocalAllocator &allocator = params.allocator().local(); switch (domain) { case ATTR_DOMAIN_FACE: { switch (scale_mode) { case GEO_NODE_SCALE_ELEMENTS_UNIFORM: { - scale_faces_uniformly( - *mesh, {selection_field, scale_field, center_field}, allocator); + scale_faces_uniformly(*mesh, {selection_field, scale_field, center_field}); break; } case GEO_NODE_SCALE_ELEMENTS_SINGLE_AXIS: { - scale_faces_on_axis( - *mesh, {selection_field, scale_field, center_field, axis_field}, allocator); + scale_faces_on_axis(*mesh, {selection_field, scale_field, center_field, axis_field}); break; } } @@ -447,13 +436,11 @@ static void node_geo_exec(GeoNodeExecParams params) case ATTR_DOMAIN_EDGE: { switch (scale_mode) { case GEO_NODE_SCALE_ELEMENTS_UNIFORM: { - scale_edges_uniformly( - *mesh, {selection_field, scale_field, center_field}, allocator); + scale_edges_uniformly(*mesh, {selection_field, scale_field, center_field}); break; } case GEO_NODE_SCALE_ELEMENTS_SINGLE_AXIS: { - scale_edges_on_axis( - *mesh, {selection_field, scale_field, center_field, axis_field}, allocator); + scale_edges_on_axis(*mesh, {selection_field, scale_field, center_field, axis_field}); break; } } diff --git a/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc index 737fb5474c1..95604fdcb61 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc @@ -21,7 +21,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void scale_instances(GeoNodeExecParams ¶ms, bke::Instances &instances) { const bke::InstancesFieldContext context{instances}; - fn::FieldEvaluator evaluator{context, instances.instances_num(), ¶ms.allocator()}; + fn::FieldEvaluator evaluator{context, instances.instances_num()}; evaluator.set_selection(params.extract_input>("Selection")); evaluator.add(params.extract_input>("Scale")); evaluator.add(params.extract_input>("Center")); diff --git a/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc b/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc index f9b2faf8607..28c0bf84160 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_separate_geometry.cc @@ -59,7 +59,6 @@ static void node_geo_exec(GeoNodeExecParams params) GEO_NODE_DELETE_GEOMETRY_MODE_ALL, selection, propagation_info, - params.allocator(), is_error); } else { @@ -69,7 +68,6 @@ static void node_geo_exec(GeoNodeExecParams params) GEO_NODE_DELETE_GEOMETRY_MODE_ALL, selection, propagation_info, - params.allocator().local(), is_error); }); } diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc index 94d9ca562af..f03f5aa0413 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_radius.cc @@ -20,8 +20,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void set_radius(bke::CurvesGeometry &curves, const Field &selection_field, - const Field &radius_field, - LocalAllocator &allocator) + const Field &radius_field) { if (curves.points_num() == 0) { return; @@ -31,7 +30,7 @@ static void set_radius(bke::CurvesGeometry &curves, ATTR_DOMAIN_POINT); bke::CurvesFieldContext field_context{curves, ATTR_DOMAIN_POINT}; - fn::FieldEvaluator evaluator{field_context, curves.points_num(), &allocator}; + fn::FieldEvaluator evaluator{field_context, curves.points_num()}; evaluator.set_selection(selection_field); evaluator.add_with_destination(radius_field, radii.varray); evaluator.evaluate(); @@ -47,10 +46,7 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (Curves *curves_id = geometry_set.get_curves_for_write()) { - set_radius(bke::CurvesGeometry::wrap(curves_id->geometry), - selection_field, - radii_field, - params.allocator().local()); + set_radius(bke::CurvesGeometry::wrap(curves_id->geometry), selection_field, radii_field); } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc b/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc index 0013b2cee72..2887800995f 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_curve_tilt.cc @@ -16,8 +16,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void set_tilt(bke::CurvesGeometry &curves, const Field &selection_field, - const Field &tilt_field, - LocalAllocator &allocator) + const Field &tilt_field) { if (curves.points_num() == 0) { return; @@ -27,7 +26,7 @@ static void set_tilt(bke::CurvesGeometry &curves, ATTR_DOMAIN_POINT); bke::CurvesFieldContext field_context{curves, ATTR_DOMAIN_POINT}; - fn::FieldEvaluator evaluator{field_context, curves.points_num(), &allocator}; + fn::FieldEvaluator evaluator{field_context, curves.points_num()}; evaluator.set_selection(selection_field); evaluator.add_with_destination(tilt_field, tilts.varray); evaluator.evaluate(); @@ -43,10 +42,7 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (Curves *curves_id = geometry_set.get_curves_for_write()) { - set_tilt(bke::CurvesGeometry::wrap(curves_id->geometry), - selection_field, - tilt_field, - params.allocator().local()); + set_tilt(bke::CurvesGeometry::wrap(curves_id->geometry), selection_field, tilt_field); } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_material.cc b/source/blender/nodes/geometry/nodes/node_geo_set_material.cc index 64b6947924f..e6e3eadff03 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_material.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_material.cc @@ -75,8 +75,7 @@ static void node_geo_exec(GeoNodeExecParams params) Mesh &mesh = *mesh_component.get_for_write(); bke::MeshFieldContext field_context{mesh, ATTR_DOMAIN_FACE}; - fn::FieldEvaluator selection_evaluator{ - field_context, mesh.totpoly, ¶ms.allocator().local()}; + fn::FieldEvaluator selection_evaluator{field_context, mesh.totpoly}; selection_evaluator.add(selection_field); selection_evaluator.evaluate(); const IndexMask selection = selection_evaluator.get_evaluated_as_mask(0); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc b/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc index 92e2ef83ba6..0034fc4a292 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_point_radius.cc @@ -20,8 +20,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void set_radius_in_component(PointCloud &pointcloud, const Field &selection_field, - const Field &radius_field, - LocalAllocator &allocator) + const Field &radius_field) { if (pointcloud.totpoint == 0) { return; @@ -31,7 +30,7 @@ static void set_radius_in_component(PointCloud &pointcloud, ATTR_DOMAIN_POINT); bke::PointCloudFieldContext field_context{pointcloud}; - fn::FieldEvaluator evaluator{field_context, pointcloud.totpoint, &allocator}; + fn::FieldEvaluator evaluator{field_context, pointcloud.totpoint}; evaluator.set_selection(selection_field); evaluator.add_with_destination(radius_field, radii.varray); evaluator.evaluate(); @@ -47,8 +46,7 @@ static void node_geo_exec(GeoNodeExecParams params) geometry_set.modify_geometry_sets([&](GeometrySet &geometry_set) { if (PointCloud *pointcloud = geometry_set.get_pointcloud_for_write()) { - set_radius_in_component( - *pointcloud, selection_field, radii_field, params.allocator().local()); + set_radius_in_component(*pointcloud, selection_field, radii_field); } }); diff --git a/source/blender/nodes/geometry/nodes/node_geo_set_position.cc b/source/blender/nodes/geometry/nodes/node_geo_set_position.cc index d079e2f4798..4a76e230af7 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_set_position.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_set_position.cc @@ -147,8 +147,7 @@ static void set_computed_position_and_offset(GeometryComponent &component, static void set_position_in_component(GeometryComponent &component, const Field &selection_field, const Field &position_field, - const Field &offset_field, - LocalAllocator &allocator) + const Field &offset_field) { eAttrDomain domain = component.type() == GEO_COMPONENT_TYPE_INSTANCES ? ATTR_DOMAIN_INSTANCE : ATTR_DOMAIN_POINT; @@ -158,7 +157,7 @@ static void set_position_in_component(GeometryComponent &component, return; } - fn::FieldEvaluator evaluator{field_context, domain_size, &allocator}; + fn::FieldEvaluator evaluator{field_context, domain_size}; evaluator.set_selection(selection_field); evaluator.add(position_field); evaluator.add(offset_field); @@ -183,11 +182,8 @@ static void node_geo_exec(GeoNodeExecParams params) GEO_COMPONENT_TYPE_CURVE, GEO_COMPONENT_TYPE_INSTANCES}) { if (geometry.has(type)) { - set_position_in_component(geometry.get_component_for_write(type), - selection_field, - position_field, - offset_field, - params.allocator()); + set_position_in_component( + geometry.get_component_for_write(type), selection_field, position_field, offset_field); } } diff --git a/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc b/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc index 5497b16cf5c..d42793d474f 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_store_named_attribute.cc @@ -147,8 +147,7 @@ static void node_geo_exec(GeoNodeExecParams params) if (geometry_set.has_instances()) { GeometryComponent &component = geometry_set.get_component_for_write( GEO_COMPONENT_TYPE_INSTANCES); - if (!bke::try_capture_field_on_geometry( - component, name, domain, field, ¶ms.allocator())) { + if (!bke::try_capture_field_on_geometry(component, name, domain, field)) { if (component.attribute_domain_size(domain) != 0) { failure.store(true); } @@ -161,8 +160,7 @@ static void node_geo_exec(GeoNodeExecParams params) {GEO_COMPONENT_TYPE_MESH, GEO_COMPONENT_TYPE_POINT_CLOUD, GEO_COMPONENT_TYPE_CURVE}) { if (geometry_set.has(type)) { GeometryComponent &component = geometry_set.get_component_for_write(type); - if (!bke::try_capture_field_on_geometry( - component, name, domain, field, ¶ms.allocator().local())) { + if (!bke::try_capture_field_on_geometry(component, name, domain, field)) { if (component.attribute_domain_size(domain) != 0) { failure.store(true); } diff --git a/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc b/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc index cb6c796e34b..40b49055949 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_subdivision_surface.cc @@ -124,16 +124,14 @@ static void node_geo_exec(GeoNodeExecParams params) return; } - LocalAllocator &allocator = params.allocator().local(); - bke::MeshFieldContext point_context{mesh, ATTR_DOMAIN_POINT}; - FieldEvaluator point_evaluator(point_context, mesh.totvert, &allocator); + FieldEvaluator point_evaluator(point_context, mesh.totvert); point_evaluator.add(vertex_crease_field); point_evaluator.evaluate(); const VArray vertex_creases = point_evaluator.get_evaluated(0); bke::MeshFieldContext edge_context{mesh, ATTR_DOMAIN_EDGE}; - FieldEvaluator edge_evaluator(edge_context, mesh.totedge, &allocator); + FieldEvaluator edge_evaluator(edge_context, mesh.totedge); edge_evaluator.add(edge_crease_field); edge_evaluator.evaluate(); const VArray edge_creases = edge_evaluator.get_evaluated(0); diff --git a/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc b/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc index ff9708405d4..5a278ac8547 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc @@ -20,7 +20,7 @@ static void node_declare(NodeDeclarationBuilder &b) static void translate_instances(GeoNodeExecParams ¶ms, bke::Instances &instances) { const bke::InstancesFieldContext context{instances}; - fn::FieldEvaluator evaluator{context, instances.instances_num(), ¶ms.allocator()}; + fn::FieldEvaluator evaluator{context, instances.instances_num()}; evaluator.set_selection(params.extract_input>("Selection")); evaluator.add(params.extract_input>("Translation")); evaluator.add(params.extract_input>("Local Space")); diff --git a/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc b/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc index 9e60c10d834..52c7dbf0605 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_triangulate.cc @@ -78,7 +78,7 @@ static void node_geo_exec(GeoNodeExecParams params) const Mesh &mesh_in = *geometry_set.get_mesh_for_read(); bke::MeshFieldContext context{mesh_in, ATTR_DOMAIN_FACE}; - FieldEvaluator evaluator{context, mesh_in.totpoly, ¶ms.allocator().local()}; + FieldEvaluator evaluator{context, mesh_in.totpoly}; evaluator.add(selection_field); evaluator.evaluate(); const IndexMask selection = evaluator.get_evaluated_as_mask(0); diff --git a/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc b/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc index aae2cd73578..7d439309380 100644 --- a/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc +++ b/source/blender/nodes/geometry/nodes/node_geo_volume_cube.cc @@ -148,7 +148,7 @@ static void node_geo_exec(GeoNodeExecParams params) /* Evaluate input field on a 3D grid. */ Grid3DFieldContext context(resolution, bounds_min, bounds_max); - FieldEvaluator evaluator(context, context.points_num(), ¶ms.allocator()); + FieldEvaluator evaluator(context, context.points_num()); Array densities(context.points_num()); evaluator.add_with_destination(std::move(input_field), densities.as_mutable_span()); evaluator.evaluate(); diff --git a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc index 61f59a43fd5..fde5460e083 100644 --- a/source/blender/nodes/intern/geometry_nodes_lazy_function.cc +++ b/source/blender/nodes/intern/geometry_nodes_lazy_function.cc @@ -304,7 +304,6 @@ class LazyFunctionForUndefinedNode : public LazyFunction { * values. If any input is a field, the outputs will also be fields. */ static void execute_multi_function_on_value_or_field( - LocalAllocator &allocator, const MultiFunction &fn, const std::shared_ptr &owned_fn, const Span input_types, @@ -355,7 +354,7 @@ static void execute_multi_function_on_value_or_field( else { /* In this case, the multi-function is evaluated directly. */ MFParamsBuilder params{fn, 1}; - MFContextBuilder context{&allocator}; + MFContextBuilder context; for (const int i : input_types.index_range()) { const ValueOrFieldCPPType &type = *input_types[i]; @@ -413,7 +412,7 @@ class LazyFunctionForMutedNode : public LazyFunction { } } - void execute_impl(lf::Params ¶ms, const lf::Context &context) const override + void execute_impl(lf::Params ¶ms, const lf::Context & /*context*/) const override { for (const int output_i : outputs_.index_range()) { if (params.output_was_set(output_i)) { @@ -447,13 +446,8 @@ class LazyFunctionForMutedNode : public LazyFunction { if (conversions.is_convertible(from_type->value, to_type->value)) { const MultiFunction &multi_fn = *conversions.get_conversion_multi_function( MFDataType::ForSingle(from_type->value), MFDataType::ForSingle(to_type->value)); - execute_multi_function_on_value_or_field(*context.allocator, - multi_fn, - {}, - {from_type}, - {to_type}, - {input_value}, - {output_value}); + execute_multi_function_on_value_or_field( + multi_fn, {}, {from_type}, {to_type}, {input_value}, {output_value}); } params.output_set(output_i); continue; @@ -486,7 +480,7 @@ class LazyFunctionForMultiFunctionConversion : public LazyFunction { outputs_.append({"To", to.self}); } - void execute_impl(lf::Params ¶ms, const lf::Context &context) const override + void execute_impl(lf::Params ¶ms, const lf::Context & /*context*/) const override { const void *from_value = params.try_get_input_data_ptr(0); void *to_value = params.get_output_data_ptr(0); @@ -494,7 +488,7 @@ class LazyFunctionForMultiFunctionConversion : public LazyFunction { BLI_assert(to_value != nullptr); execute_multi_function_on_value_or_field( - *context.allocator, fn_, {}, {&from_type_}, {&to_type_}, {from_value}, {to_value}); + fn_, {}, {&from_type_}, {&to_type_}, {from_value}, {to_value}); params.output_set(0); } @@ -527,7 +521,7 @@ class LazyFunctionForMultiFunctionNode : public LazyFunction { } } - void execute_impl(lf::Params ¶ms, const lf::Context &context) const override + void execute_impl(lf::Params ¶ms, const lf::Context & /*context*/) const override { Vector input_values(inputs_.size()); Vector output_values(outputs_.size()); @@ -537,13 +531,8 @@ class LazyFunctionForMultiFunctionNode : public LazyFunction { for (const int i : outputs_.index_range()) { output_values[i] = params.get_output_data_ptr(i); } - execute_multi_function_on_value_or_field(*context.allocator, - *fn_item_.fn, - fn_item_.owned_fn, - input_types_, - output_types_, - input_values, - output_values); + execute_multi_function_on_value_or_field( + *fn_item_.fn, fn_item_.owned_fn, input_types_, output_types_, input_values, output_values); for (const int i : outputs_.index_range()) { params.output_set(i); } -- 2.30.2 From 6331490a846bdc7a21140abc69a0e602151a1118 Mon Sep 17 00:00:00 2001 From: Jacques Lucke Date: Sat, 14 Jan 2023 15:58:22 +0100 Subject: [PATCH 34/34] fix --- source/blender/functions/intern/lazy_function_graph_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/blender/functions/intern/lazy_function_graph_executor.cc b/source/blender/functions/intern/lazy_function_graph_executor.cc index 2506df8d8af..55efecab4a9 100644 --- a/source/blender/functions/intern/lazy_function_graph_executor.cc +++ b/source/blender/functions/intern/lazy_function_graph_executor.cc @@ -439,7 +439,7 @@ class Executor { this->set_input_required(locked_node, socket); } else { - this->set_input_unused(locked_node, socket); + this->set_input_unused(locked_node, socket, this->get_local_allocator()); } }); } -- 2.30.2