WIP: Functions: new local allocator for better memory reuse and performance #104630
|
@ -0,0 +1,331 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "BLI_allocator.hh"
|
||||
#include "BLI_asan.h"
|
||||
#include "BLI_enumerable_thread_specific.hh"
|
||||
#include "BLI_linear_allocator.hh"
|
||||
#include "BLI_map.hh"
|
||||
#include "BLI_math_bits.h"
|
||||
#include "BLI_stack.hh"
|
||||
#include "BLI_utility_mixins.hh"
|
||||
#include "BLI_vector.hh"
|
||||
|
||||
// #define BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||
// #define BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||
|
||||
namespace blender {
|
||||
|
||||
class LocalAllocatorSet;
|
||||
class LocalAllocator;
|
||||
class LocalAllocatorPool;
|
||||
|
||||
class LocalAllocatorPool : NonCopyable, NonMovable {
|
||||
private:
|
||||
Stack<void *> buffers;
|
||||
int64_t element_size = -1;
|
||||
int64_t alignment = -1;
|
||||
|
||||
friend LocalAllocator;
|
||||
};
|
||||
|
||||
class LocalAllocator : NonCopyable, NonMovable {
|
||||
private:
|
||||
static constexpr int64_t s_alignment = 64;
|
||||
static constexpr int64_t s_global_allocation_threshold = 5 * 1024 * 1024;
|
||||
LocalAllocatorSet &owner_set_;
|
||||
AlignedBuffer<256, 64> initial_buffer_;
|
||||
LinearAllocator<> linear_allocator_;
|
||||
|
||||
struct Head {
|
||||
int64_t buffer_size;
|
||||
int64_t buffer_alignment;
|
||||
};
|
||||
static_assert(is_power_of_2_constexpr(sizeof(Head)));
|
||||
|
||||
std::array<LocalAllocatorPool, 8> small_buffer_pools_;
|
||||
Map<int, std::unique_ptr<LocalAllocatorPool>> large_buffer_pools_;
|
||||
|
||||
friend LocalAllocatorSet;
|
||||
|
||||
LocalAllocator(LocalAllocatorSet &owner_set);
|
||||
|
||||
public:
|
||||
~LocalAllocator();
|
||||
|
||||
bool is_local() const;
|
||||
LocalAllocator &local();
|
||||
LocalAllocatorSet &owner_set();
|
||||
|
||||
void *allocate(int64_t size, int64_t alignment);
|
||||
void deallocate(const void *buffer, int64_t size, int64_t alignment);
|
||||
|
||||
void *allocate(LocalAllocatorPool &pool);
|
||||
void deallocate(const void *buffer, LocalAllocatorPool &pool);
|
||||
|
||||
void *allocate_with_head(int64_t size, int64_t alignment);
|
||||
void deallocate_with_head(const void *buffer);
|
||||
|
||||
LocalAllocatorPool &get_pool(int64_t size, int64_t alignment);
|
||||
|
||||
template<typename T, typename... Args> T &allocate_new(Args &&...args);
|
||||
template<typename T, typename... Args> void destruct_free(const T *value);
|
||||
template<typename T> MutableSpan<T> allocate_array(int64_t size);
|
||||
template<typename T, typename... Args>
|
||||
MutableSpan<T> allocate_new_array(int64_t size, Args &&...args);
|
||||
template<typename T> void destruct_free_array(Span<T> data);
|
||||
template<typename T> void destruct_free_array(MutableSpan<T> data);
|
||||
};
|
||||
|
||||
class LocalAllocatorSet : NonCopyable, NonMovable {
|
||||
private:
|
||||
threading::EnumerableThreadSpecific<LocalAllocator> allocator_by_thread_;
|
||||
|
||||
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||
std::mutex debug_sizes_mutex_;
|
||||
Map<const void *, int64_t> debug_sizes_;
|
||||
#endif
|
||||
|
||||
friend LocalAllocator;
|
||||
|
||||
public:
|
||||
LocalAllocatorSet();
|
||||
~LocalAllocatorSet();
|
||||
|
||||
LocalAllocator &local();
|
||||
};
|
||||
|
||||
class ThreadedLocalAllocatorRef {
|
||||
private:
|
||||
LocalAllocatorSet &allocator_set_;
|
||||
|
||||
public:
|
||||
ThreadedLocalAllocatorRef(LocalAllocator &allocator) : allocator_set_(allocator.owner_set())
|
||||
{
|
||||
}
|
||||
|
||||
void *allocate(const size_t size, const size_t alignment, const char * /*name*/)
|
||||
{
|
||||
LocalAllocator &allocator = allocator_set_.local();
|
||||
return allocator.allocate_with_head(size, alignment);
|
||||
}
|
||||
|
||||
void deallocate(void *ptr)
|
||||
{
|
||||
LocalAllocator &allocator = allocator_set_.local();
|
||||
allocator.deallocate_with_head(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
class LocalAllocatorRef {
|
||||
private:
|
||||
LocalAllocator &allocator_;
|
||||
|
||||
public:
|
||||
LocalAllocatorRef(LocalAllocator &allocator) : allocator_(allocator)
|
||||
{
|
||||
}
|
||||
|
||||
void *allocate(const size_t size, const size_t alignment, const char * /*name*/)
|
||||
{
|
||||
return allocator_.allocate_with_head(size, alignment);
|
||||
}
|
||||
|
||||
void deallocate(void *ptr)
|
||||
{
|
||||
allocator_.deallocate_with_head(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
inline bool LocalAllocator::is_local() const
|
||||
{
|
||||
return this == &owner_set_.local();
|
||||
}
|
||||
|
||||
inline LocalAllocator &LocalAllocator::local()
|
||||
{
|
||||
return owner_set_.local();
|
||||
}
|
||||
|
||||
inline LocalAllocatorSet &LocalAllocator::owner_set()
|
||||
{
|
||||
return owner_set_;
|
||||
}
|
||||
|
||||
inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment)
|
||||
{
|
||||
LocalAllocatorPool &pool = this->get_pool(size, alignment);
|
||||
BLI_assert(pool.element_size >= size);
|
||||
BLI_assert(pool.alignment >= alignment);
|
||||
|
||||
return this->allocate(pool);
|
||||
}
|
||||
|
||||
inline void LocalAllocator::deallocate(const void *buffer,
|
||||
const int64_t size,
|
||||
const int64_t alignment)
|
||||
{
|
||||
LocalAllocatorPool &pool = this->get_pool(size, alignment);
|
||||
BLI_assert(pool.element_size >= size);
|
||||
BLI_assert(pool.alignment >= alignment);
|
||||
|
||||
this->deallocate(buffer, pool);
|
||||
}
|
||||
|
||||
inline void *LocalAllocator::allocate(LocalAllocatorPool &pool)
|
||||
{
|
||||
BLI_assert(this->is_local());
|
||||
|
||||
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||
return MEM_mallocN_aligned(pool.element_size, pool.alignment, __func__);
|
||||
#endif
|
||||
|
||||
void *buffer;
|
||||
if (!pool.buffers.is_empty()) {
|
||||
buffer = pool.buffers.pop();
|
||||
BLI_asan_unpoison(buffer, pool.element_size);
|
||||
}
|
||||
else if (pool.element_size < s_global_allocation_threshold) {
|
||||
buffer = linear_allocator_.allocate(pool.element_size, pool.alignment);
|
||||
}
|
||||
else {
|
||||
buffer = MEM_mallocN(pool.element_size, __func__);
|
||||
}
|
||||
|
||||
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||
{
|
||||
std::lock_guard lock{owner_set_.debug_sizes_mutex_};
|
||||
owner_set_.debug_sizes_.add_new(buffer, pool.element_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
inline void LocalAllocator::deallocate(const void *buffer, LocalAllocatorPool &pool)
|
||||
{
|
||||
BLI_assert(this->is_local());
|
||||
|
||||
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||
MEM_freeN(const_cast<void *>(buffer));
|
||||
return;
|
||||
#endif
|
||||
|
||||
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||
{
|
||||
std::lock_guard lock{owner_set_.debug_sizes_mutex_};
|
||||
auto [last_size, last_alignment] = owner_set_.debug_sizes_.pop(buffer);
|
||||
if (last_size != size) {
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
if (last_alignment != alignment) {
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
memset(const_cast<void *>(buffer), -1, pool.element_size);
|
||||
#endif
|
||||
|
||||
if (pool.element_size < s_global_allocation_threshold) {
|
||||
BLI_asan_poison(buffer, pool.element_size);
|
||||
pool.buffers.push(const_cast<void *>(buffer));
|
||||
}
|
||||
else {
|
||||
MEM_freeN(const_cast<void *>(buffer));
|
||||
}
|
||||
}
|
||||
|
||||
inline LocalAllocatorPool &LocalAllocator::get_pool(const int64_t size, const int64_t alignment)
|
||||
{
|
||||
BLI_assert(size > 0);
|
||||
BLI_assert(alignment <= size);
|
||||
BLI_assert(alignment <= s_alignment);
|
||||
BLI_assert(is_power_of_2_i(alignment));
|
||||
UNUSED_VARS_NDEBUG(alignment);
|
||||
|
||||
BLI_assert(this->is_local());
|
||||
if (size <= 64) {
|
||||
return small_buffer_pools_[(size - 1) >> 3];
|
||||
}
|
||||
const int key = bitscan_reverse_uint64(uint64_t(size));
|
||||
return *large_buffer_pools_.lookup_or_add_cb(key, [&]() {
|
||||
auto pool = std::make_unique<LocalAllocatorPool>();
|
||||
pool->element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
|
||||
pool->alignment = s_alignment;
|
||||
return pool;
|
||||
});
|
||||
}
|
||||
|
||||
inline void *LocalAllocator::allocate_with_head(int64_t size, int64_t alignment)
|
||||
{
|
||||
const int64_t buffer_size = size + std::max<int64_t>(alignment, sizeof(Head));
|
||||
const int64_t buffer_alignment = std::max<int64_t>(alignment, alignof(Head));
|
||||
void *buffer = this->allocate(buffer_size, buffer_alignment);
|
||||
Head *head = new (buffer) Head;
|
||||
head->buffer_size = buffer_size;
|
||||
head->buffer_alignment = buffer_alignment;
|
||||
return head + 1;
|
||||
}
|
||||
|
||||
inline void LocalAllocator::deallocate_with_head(const void *buffer)
|
||||
{
|
||||
const Head *head = static_cast<const Head *>(buffer) - 1;
|
||||
this->deallocate(head, head->buffer_size, head->buffer_alignment);
|
||||
}
|
||||
|
||||
template<typename T, typename... Args> inline T &LocalAllocator::allocate_new(Args &&...args)
|
||||
{
|
||||
void *buffer = this->allocate(sizeof(T), alignof(T));
|
||||
T *value = new (buffer) T(std::forward<Args>(args)...);
|
||||
return *value;
|
||||
}
|
||||
|
||||
template<typename T, typename... Args> inline void LocalAllocator::destruct_free(const T *value)
|
||||
{
|
||||
std::destroy_at(value);
|
||||
this->deallocate(value, sizeof(T), alignof(T));
|
||||
}
|
||||
|
||||
template<typename T> MutableSpan<T> inline LocalAllocator::allocate_array(const int64_t size)
|
||||
{
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
void *buffer = this->allocate(size * sizeof(T), alignof(T));
|
||||
return {static_cast<T *>(buffer), size};
|
||||
}
|
||||
|
||||
template<typename T, typename... Args>
|
||||
MutableSpan<T> inline LocalAllocator::allocate_new_array(const int64_t size, Args &&...args)
|
||||
{
|
||||
MutableSpan<T> array = this->allocate_array<T>(size);
|
||||
for (const int64_t i : IndexRange(size)) {
|
||||
new (&array[i]) T(std::forward<Args>(args)...);
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
template<typename T> inline void LocalAllocator::destruct_free_array(Span<T> data)
|
||||
{
|
||||
if (data.is_empty()) {
|
||||
return;
|
||||
}
|
||||
destruct_n(const_cast<T *>(data.data()), data.size());
|
||||
this->deallocate(data.data(), data.size_in_bytes(), alignof(T));
|
||||
}
|
||||
|
||||
template<typename T> inline void LocalAllocator::destruct_free_array(MutableSpan<T> data)
|
||||
{
|
||||
this->destruct_free_array(data.as_span());
|
||||
}
|
||||
|
||||
inline LocalAllocator &LocalAllocatorSet::local()
|
||||
{
|
||||
return allocator_by_thread_.local();
|
||||
}
|
||||
|
||||
} // namespace blender
|
|
@ -91,6 +91,7 @@ set(SRC
|
|||
intern/lazy_threading.cc
|
||||
intern/length_parameterize.cc
|
||||
intern/listbase.cc
|
||||
intern/local_allocator.cc
|
||||
intern/math_base.c
|
||||
intern/math_base_inline.c
|
||||
intern/math_base_safe_inline.c
|
||||
|
@ -256,6 +257,7 @@ set(SRC
|
|||
BLI_linklist_stack.h
|
||||
BLI_listbase.h
|
||||
BLI_listbase_wrapper.hh
|
||||
BLI_local_allocator.hh
|
||||
BLI_map.hh
|
||||
BLI_map_slots.hh
|
||||
BLI_math.h
|
||||
|
@ -484,6 +486,7 @@ if(WITH_GTESTS)
|
|||
tests/BLI_linear_allocator_test.cc
|
||||
tests/BLI_linklist_lockfree_test.cc
|
||||
tests/BLI_listbase_test.cc
|
||||
tests/BLI_local_allocator_test.cc
|
||||
tests/BLI_map_test.cc
|
||||
tests/BLI_math_base_safe_test.cc
|
||||
tests/BLI_math_base_test.cc
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BLI_local_allocator.hh"
|
||||
|
||||
namespace blender {
|
||||
|
||||
LocalAllocatorSet::LocalAllocatorSet()
|
||||
: allocator_by_thread_([this]() { return LocalAllocator{*this}; })
|
||||
{
|
||||
}
|
||||
|
||||
LocalAllocatorSet::~LocalAllocatorSet() = default;
|
||||
|
||||
LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set)
|
||||
{
|
||||
linear_allocator_.provide_buffer(initial_buffer_);
|
||||
for (const int64_t i : IndexRange(small_buffer_pools_.size())) {
|
||||
LocalAllocatorPool &pool = small_buffer_pools_[i];
|
||||
pool.element_size = 8 * (i + 1);
|
||||
pool.alignment = power_of_2_min_u(pool.element_size);
|
||||
}
|
||||
}
|
||||
|
||||
LocalAllocator::~LocalAllocator() = default;
|
||||
|
||||
} // namespace blender
|
|
@ -0,0 +1,10 @@
|
|||
/* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "BLI_local_allocator.hh"
|
||||
#include "BLI_strict_flags.h"
|
||||
|
||||
#include "testing/testing.h"
|
||||
|
||||
namespace blender::tests {
|
||||
|
||||
} // namespace blender::tests
|
|
@ -42,6 +42,7 @@
|
|||
#include "BLI_function_ref.hh"
|
||||
#include "BLI_generic_pointer.hh"
|
||||
#include "BLI_linear_allocator.hh"
|
||||
#include "BLI_local_allocator.hh"
|
||||
#include "BLI_vector.hh"
|
||||
|
||||
#include <atomic>
|
||||
|
@ -98,6 +99,8 @@ struct Context {
|
|||
* Custom user data that can be used in the function.
|
||||
*/
|
||||
UserData *user_data;
|
||||
|
||||
LocalAllocator *allocator;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -276,12 +279,12 @@ class LazyFunction {
|
|||
* Allocates storage for this function. The storage will be passed to every call to #execute.
|
||||
* If the function does not keep track of any state, this does not have to be implemented.
|
||||
*/
|
||||
virtual void *init_storage(LinearAllocator<> &allocator) const;
|
||||
virtual void *init_storage(LocalAllocator &allocator) const;
|
||||
|
||||
/**
|
||||
* Destruct the storage created in #init_storage.
|
||||
*/
|
||||
virtual void destruct_storage(void *storage) const;
|
||||
virtual void destruct_storage(void *storage, LocalAllocator &allocator) const;
|
||||
|
||||
/**
|
||||
* Calls `fn` with the input indices that the given `output_index` may depend on. By default
|
||||
|
|
|
@ -85,14 +85,16 @@ inline void execute_lazy_function_eagerly_impl(
|
|||
...);
|
||||
output_usages.fill(ValueUsage::Used);
|
||||
set_outputs.fill(false);
|
||||
LinearAllocator<> allocator;
|
||||
LocalAllocatorSet allocator_set;
|
||||
LocalAllocator &allocator = allocator_set.local();
|
||||
Context context;
|
||||
context.user_data = user_data;
|
||||
context.storage = fn.init_storage(allocator);
|
||||
context.allocator = &allocator;
|
||||
BasicParams params{
|
||||
fn, input_pointers, output_pointers, input_usages, output_usages, set_outputs};
|
||||
fn.execute(params, context);
|
||||
fn.destruct_storage(context.storage);
|
||||
fn.destruct_storage(context.storage, allocator);
|
||||
|
||||
/* Make sure all outputs have been computed. */
|
||||
BLI_assert(!Span<bool>(set_outputs).contains(false));
|
||||
|
|
|
@ -59,11 +59,23 @@ class GraphExecutor : public LazyFunction {
|
|||
using Logger = GraphExecutorLogger;
|
||||
using SideEffectProvider = GraphExecutorSideEffectProvider;
|
||||
|
||||
struct NodeBufferOffsets {
|
||||
int node;
|
||||
int inputs;
|
||||
int outputs;
|
||||
};
|
||||
|
||||
struct PreprocessData {
|
||||
Array<NodeBufferOffsets> offsets;
|
||||
int node_state_buffer_size;
|
||||
};
|
||||
|
||||
private:
|
||||
/**
|
||||
* The graph that is evaluated.
|
||||
*/
|
||||
const Graph &graph_;
|
||||
const PreprocessData &preprocess_data_;
|
||||
/**
|
||||
* Input and output sockets of the entire graph.
|
||||
*/
|
||||
|
@ -85,11 +97,14 @@ class GraphExecutor : public LazyFunction {
|
|||
GraphExecutor(const Graph &graph,
|
||||
Span<const OutputSocket *> graph_inputs,
|
||||
Span<const InputSocket *> graph_outputs,
|
||||
const PreprocessData &preprocess_data,
|
||||
const Logger *logger,
|
||||
const SideEffectProvider *side_effect_provider);
|
||||
|
||||
void *init_storage(LinearAllocator<> &allocator) const override;
|
||||
void destruct_storage(void *storage) const override;
|
||||
void *init_storage(LocalAllocator &allocator) const override;
|
||||
void destruct_storage(void *storage, LocalAllocator &allocator) const override;
|
||||
|
||||
static void preprocess(const Graph &graph, PreprocessData &r_preprocess_data);
|
||||
|
||||
private:
|
||||
void execute_impl(Params ¶ms, const Context &context) const override;
|
||||
|
|
|
@ -25,12 +25,12 @@ std::string LazyFunction::output_name(int index) const
|
|||
return outputs_[index].debug_name;
|
||||
}
|
||||
|
||||
void *LazyFunction::init_storage(LinearAllocator<> & /*allocator*/) const
|
||||
void *LazyFunction::init_storage(LocalAllocator & /*allocator*/) const
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void LazyFunction::destruct_storage(void *storage) const
|
||||
void LazyFunction::destruct_storage(void *storage, LocalAllocator & /*allocator*/) const
|
||||
{
|
||||
BLI_assert(storage == nullptr);
|
||||
UNUSED_VARS_NDEBUG(storage);
|
||||
|
|
|
@ -75,7 +75,7 @@ enum class NodeScheduleState {
|
|||
RunningAndRescheduled,
|
||||
};
|
||||
|
||||
struct InputState {
|
||||
struct alignas(8) InputState {
|
||||
/**
|
||||
* Value of this input socket. By default, the value is empty. When other nodes are done
|
||||
* computing their outputs, the computed values will be forwarded to linked input sockets. The
|
||||
|
@ -97,7 +97,7 @@ struct InputState {
|
|||
bool was_ready_for_execution = false;
|
||||
};
|
||||
|
||||
struct OutputState {
|
||||
struct alignas(8) OutputState {
|
||||
/**
|
||||
* Keeps track of how the output value is used. If a connected input becomes used, this output
|
||||
* has to become used as well. The output becomes unused when it is used by no input socket
|
||||
|
@ -127,7 +127,7 @@ struct OutputState {
|
|||
void *value = nullptr;
|
||||
};
|
||||
|
||||
struct NodeState {
|
||||
struct alignas(8) NodeState {
|
||||
/**
|
||||
* Needs to be locked when any data in this state is accessed that is not explicitly marked as
|
||||
* not needing the lock.
|
||||
|
@ -271,7 +271,7 @@ class Executor {
|
|||
/**
|
||||
* State of every node, indexed by #Node::index_in_graph.
|
||||
*/
|
||||
Array<NodeState *> node_states_;
|
||||
MutableSpan<NodeState *> node_states_;
|
||||
/**
|
||||
* Parameters provided by the caller. This is always non-null, while a node is running.
|
||||
*/
|
||||
|
@ -285,15 +285,7 @@ class Executor {
|
|||
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
||||
std::thread::id current_main_thread_;
|
||||
#endif
|
||||
/**
|
||||
* A separate linear allocator for every thread. We could potentially reuse some memory, but that
|
||||
* doesn't seem worth it yet.
|
||||
*/
|
||||
struct ThreadLocalData {
|
||||
LinearAllocator<> allocator;
|
||||
};
|
||||
std::unique_ptr<threading::EnumerableThreadSpecific<ThreadLocalData>> thread_locals_;
|
||||
LinearAllocator<> main_allocator_;
|
||||
|
||||
/**
|
||||
* Set to false when the first execution ends.
|
||||
*/
|
||||
|
@ -308,18 +300,25 @@ class Executor {
|
|||
BLI_assert(self_.graph_.node_indices_are_valid());
|
||||
}
|
||||
|
||||
~Executor()
|
||||
void destruct_state(LocalAllocator &allocator)
|
||||
{
|
||||
if (TaskPool *task_pool = task_pool_.load()) {
|
||||
BLI_task_pool_free(task_pool);
|
||||
}
|
||||
threading::parallel_for(node_states_.index_range(), 1024, [&](const IndexRange range) {
|
||||
LocalAllocator &local_allocator = allocator.local();
|
||||
for (const int node_index : range) {
|
||||
const Node &node = *self_.graph_.nodes()[node_index];
|
||||
NodeState &node_state = *node_states_[node_index];
|
||||
this->destruct_node_state(node, node_state);
|
||||
if (!node_state.node_has_finished) {
|
||||
this->destruct_node_data(node, node_state, local_allocator);
|
||||
}
|
||||
std::destroy_at(&node_state);
|
||||
}
|
||||
});
|
||||
allocator.deallocate(
|
||||
node_states_[0], self_.preprocess_data_.node_state_buffer_size, alignof(NodeState));
|
||||
allocator.destruct_free_array(node_states_);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -364,7 +363,7 @@ class Executor {
|
|||
}
|
||||
}
|
||||
|
||||
this->initialize_static_value_usages(side_effect_nodes);
|
||||
this->initialize_static_value_usages(side_effect_nodes, this->get_local_allocator());
|
||||
this->schedule_side_effect_nodes(side_effect_nodes, current_task);
|
||||
}
|
||||
|
||||
|
@ -382,54 +381,41 @@ class Executor {
|
|||
void initialize_node_states()
|
||||
{
|
||||
Span<const Node *> nodes = self_.graph_.nodes();
|
||||
node_states_.reinitialize(nodes.size());
|
||||
node_states_ = context_->allocator->allocate_array<NodeState *>(nodes.size());
|
||||
|
||||
auto construct_node_range = [&](const IndexRange range, LinearAllocator<> &allocator) {
|
||||
for (const int i : range) {
|
||||
const Node &node = *nodes[i];
|
||||
NodeState &node_state = *allocator.construct<NodeState>().release();
|
||||
node_states_[i] = &node_state;
|
||||
this->construct_initial_node_state(allocator, node, node_state);
|
||||
}
|
||||
};
|
||||
if (nodes.size() <= 256) {
|
||||
construct_node_range(nodes.index_range(), main_allocator_);
|
||||
}
|
||||
else {
|
||||
this->ensure_thread_locals();
|
||||
/* Construct all node states in parallel. */
|
||||
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
|
||||
LinearAllocator<> &allocator = thread_locals_->local().allocator;
|
||||
construct_node_range(range, allocator);
|
||||
});
|
||||
void *node_states_buffer = context_->allocator->allocate(
|
||||
self_.preprocess_data_.node_state_buffer_size, alignof(NodeState));
|
||||
|
||||
for (const int i : nodes.index_range()) {
|
||||
const Node &node = *nodes[i];
|
||||
const GraphExecutor::NodeBufferOffsets &node_offsets = self_.preprocess_data_.offsets[i];
|
||||
void *state_buffer = POINTER_OFFSET(node_states_buffer, node_offsets.node);
|
||||
NodeState *node_state = new (state_buffer) NodeState();
|
||||
node_state->inputs = {
|
||||
static_cast<InputState *>(POINTER_OFFSET(node_states_buffer, node_offsets.inputs)),
|
||||
node.inputs().size()};
|
||||
node_state->outputs = {
|
||||
static_cast<OutputState *>(POINTER_OFFSET(node_states_buffer, node_offsets.outputs)),
|
||||
node.outputs().size()};
|
||||
default_construct_n(node_state->inputs.data(), node_state->inputs.size());
|
||||
default_construct_n(node_state->outputs.data(), node_state->outputs.size());
|
||||
node_states_[i] = node_state;
|
||||
}
|
||||
}
|
||||
|
||||
void construct_initial_node_state(LinearAllocator<> &allocator,
|
||||
const Node &node,
|
||||
NodeState &node_state)
|
||||
{
|
||||
const Span<const InputSocket *> node_inputs = node.inputs();
|
||||
const Span<const OutputSocket *> node_outputs = node.outputs();
|
||||
|
||||
node_state.inputs = allocator.construct_array<InputState>(node_inputs.size());
|
||||
node_state.outputs = allocator.construct_array<OutputState>(node_outputs.size());
|
||||
}
|
||||
|
||||
void destruct_node_state(const Node &node, NodeState &node_state)
|
||||
void destruct_node_data(const Node &node, NodeState &node_state, LocalAllocator &allocator)
|
||||
{
|
||||
if (node.is_function()) {
|
||||
const LazyFunction &fn = static_cast<const FunctionNode &>(node).function();
|
||||
if (node_state.storage != nullptr) {
|
||||
fn.destruct_storage(node_state.storage);
|
||||
fn.destruct_storage(node_state.storage, allocator);
|
||||
}
|
||||
}
|
||||
for (const int i : node.inputs().index_range()) {
|
||||
InputState &input_state = node_state.inputs[i];
|
||||
const InputSocket &input_socket = node.input(i);
|
||||
this->destruct_input_value_if_exists(input_state, input_socket.type());
|
||||
this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator);
|
||||
}
|
||||
std::destroy_at(&node_state);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -453,7 +439,7 @@ class Executor {
|
|||
this->set_input_required(locked_node, socket);
|
||||
}
|
||||
else {
|
||||
this->set_input_unused(locked_node, socket);
|
||||
this->set_input_unused(locked_node, socket, this->get_local_allocator());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -500,13 +486,14 @@ class Executor {
|
|||
* Most importantly, this function initializes `InputState.usage` and
|
||||
* `OutputState.potential_target_sockets`.
|
||||
*/
|
||||
void initialize_static_value_usages(const Span<const FunctionNode *> side_effect_nodes)
|
||||
void initialize_static_value_usages(const Span<const FunctionNode *> side_effect_nodes,
|
||||
LocalAllocator &allocator)
|
||||
{
|
||||
const Span<const Node *> all_nodes = self_.graph_.nodes();
|
||||
|
||||
/* Used for a search through all nodes that outputs depend on. */
|
||||
Stack<const Node *> reachable_nodes_to_check;
|
||||
Array<bool> reachable_node_flags(all_nodes.size(), false);
|
||||
Stack<const Node *, 16, LocalAllocatorRef> reachable_nodes_to_check{allocator};
|
||||
Array<bool, 16, LocalAllocatorRef> reachable_node_flags{all_nodes.size(), false, allocator};
|
||||
|
||||
/* Graph outputs are always reachable. */
|
||||
for (const InputSocket *socket : self_.graph_outputs_) {
|
||||
|
@ -586,7 +573,7 @@ class Executor {
|
|||
|
||||
void forward_newly_provided_inputs(CurrentTask ¤t_task)
|
||||
{
|
||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
||||
LocalAllocator &allocator = this->get_local_allocator();
|
||||
for (const int graph_input_index : self_.graph_inputs_.index_range()) {
|
||||
std::atomic<uint8_t> &was_loaded = loaded_inputs_[graph_input_index];
|
||||
if (was_loaded.load()) {
|
||||
|
@ -605,7 +592,7 @@ class Executor {
|
|||
}
|
||||
|
||||
void forward_newly_provided_input(CurrentTask ¤t_task,
|
||||
LinearAllocator<> &allocator,
|
||||
LocalAllocator &allocator,
|
||||
const int graph_input_index,
|
||||
void *input_data)
|
||||
{
|
||||
|
@ -621,7 +608,6 @@ class Executor {
|
|||
const Node &node = socket.node();
|
||||
const int index_in_node = socket.index();
|
||||
NodeState &node_state = *node_states_[node.index_in_graph()];
|
||||
OutputState &output_state = node_state.outputs[index_in_node];
|
||||
|
||||
/* The notified output socket might be an input of the entire graph. In this case, notify the
|
||||
* caller that the input is required. */
|
||||
|
@ -640,12 +626,13 @@ class Executor {
|
|||
return;
|
||||
}
|
||||
this->forward_newly_provided_input(
|
||||
current_task, this->get_main_or_local_allocator(), graph_input_index, input_data);
|
||||
current_task, this->get_local_allocator(), graph_input_index, input_data);
|
||||
return;
|
||||
}
|
||||
|
||||
BLI_assert(node.is_function());
|
||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||
OutputState &output_state = node_state.outputs[index_in_node];
|
||||
if (output_state.usage == ValueUsage::Used) {
|
||||
return;
|
||||
}
|
||||
|
@ -659,9 +646,9 @@ class Executor {
|
|||
const Node &node = socket.node();
|
||||
const int index_in_node = socket.index();
|
||||
NodeState &node_state = *node_states_[node.index_in_graph()];
|
||||
OutputState &output_state = node_state.outputs[index_in_node];
|
||||
|
||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||
OutputState &output_state = node_state.outputs[index_in_node];
|
||||
output_state.potential_target_sockets -= 1;
|
||||
if (output_state.potential_target_sockets == 0) {
|
||||
BLI_assert(output_state.usage != ValueUsage::Unused);
|
||||
|
@ -760,7 +747,7 @@ class Executor {
|
|||
void run_node_task(const FunctionNode &node, CurrentTask ¤t_task)
|
||||
{
|
||||
NodeState &node_state = *node_states_[node.index_in_graph()];
|
||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
||||
LocalAllocator &allocator = this->get_local_allocator();
|
||||
const LazyFunction &fn = node.function();
|
||||
|
||||
bool node_needs_execution = false;
|
||||
|
@ -799,6 +786,7 @@ class Executor {
|
|||
node_state.always_used_inputs_requested = true;
|
||||
}
|
||||
|
||||
const bool allow_missing_requested_inputs = fn.allow_missing_requested_inputs();
|
||||
for (const int input_index : node_state.inputs.index_range()) {
|
||||
InputState &input_state = node_state.inputs[input_index];
|
||||
if (input_state.was_ready_for_execution) {
|
||||
|
@ -808,7 +796,11 @@ class Executor {
|
|||
input_state.was_ready_for_execution = true;
|
||||
continue;
|
||||
}
|
||||
if (!fn.allow_missing_requested_inputs()) {
|
||||
const InputSocket &socket = node.input(input_index);
|
||||
if (socket.origin() == nullptr) {
|
||||
continue;
|
||||
}
|
||||
if (!allow_missing_requested_inputs) {
|
||||
if (input_state.usage == ValueUsage::Used) {
|
||||
return;
|
||||
}
|
||||
|
@ -848,7 +840,7 @@ class Executor {
|
|||
/* Importantly, the node must not be locked when it is executed. That would result in locks
|
||||
* being hold very long in some cases and results in multiple locks being hold by the same
|
||||
* thread in the same graph which can lead to deadlocks. */
|
||||
this->execute_node(node, node_state, current_task);
|
||||
this->execute_node(node, node_state, current_task, allocator);
|
||||
}
|
||||
|
||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||
|
@ -857,7 +849,7 @@ class Executor {
|
|||
this->assert_expected_outputs_have_been_computed(locked_node);
|
||||
}
|
||||
#endif
|
||||
this->finish_node_if_possible(locked_node);
|
||||
this->finish_node_if_possible(locked_node, allocator);
|
||||
const bool reschedule_requested = node_state.schedule_state ==
|
||||
NodeScheduleState::RunningAndRescheduled;
|
||||
node_state.schedule_state = NodeScheduleState::NotScheduled;
|
||||
|
@ -895,7 +887,7 @@ class Executor {
|
|||
}
|
||||
}
|
||||
|
||||
void finish_node_if_possible(LockedNode &locked_node)
|
||||
void finish_node_if_possible(LockedNode &locked_node, LocalAllocator &allocator)
|
||||
{
|
||||
const Node &node = locked_node.node;
|
||||
NodeState &node_state = locked_node.node_state;
|
||||
|
@ -923,44 +915,44 @@ class Executor {
|
|||
const InputSocket &input_socket = node.input(input_index);
|
||||
InputState &input_state = node_state.inputs[input_index];
|
||||
if (input_state.usage == ValueUsage::Maybe) {
|
||||
this->set_input_unused(locked_node, input_socket);
|
||||
}
|
||||
else if (input_state.usage == ValueUsage::Used) {
|
||||
this->destruct_input_value_if_exists(input_state, input_socket.type());
|
||||
this->set_input_unused(locked_node, input_socket, allocator);
|
||||
}
|
||||
}
|
||||
|
||||
if (node_state.storage != nullptr) {
|
||||
if (node.is_function()) {
|
||||
const FunctionNode &fn_node = static_cast<const FunctionNode &>(node);
|
||||
fn_node.function().destruct_storage(node_state.storage);
|
||||
}
|
||||
node_state.storage = nullptr;
|
||||
}
|
||||
this->destruct_node_data(node, node_state, allocator);
|
||||
}
|
||||
|
||||
void destruct_input_value_if_exists(InputState &input_state, const CPPType &type)
|
||||
void destruct_input_value_if_exists(InputState &input_state,
|
||||
const CPPType &type,
|
||||
LocalAllocator &allocator)
|
||||
{
|
||||
if (input_state.value != nullptr) {
|
||||
type.destruct(input_state.value);
|
||||
allocator.deallocate(input_state.value, type.size(), type.alignment());
|
||||
input_state.value = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void execute_node(const FunctionNode &node, NodeState &node_state, CurrentTask ¤t_task);
|
||||
void execute_node(const FunctionNode &node,
|
||||
NodeState &node_state,
|
||||
CurrentTask ¤t_task,
|
||||
LocalAllocator &allocator);
|
||||
|
||||
void set_input_unused_during_execution(const Node &node,
|
||||
NodeState &node_state,
|
||||
const int input_index,
|
||||
CurrentTask ¤t_task)
|
||||
{
|
||||
LocalAllocator &allocator = this->get_local_allocator();
|
||||
const InputSocket &input_socket = node.input(input_index);
|
||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||
this->set_input_unused(locked_node, input_socket);
|
||||
this->set_input_unused(locked_node, input_socket, allocator);
|
||||
});
|
||||
}
|
||||
|
||||
void set_input_unused(LockedNode &locked_node, const InputSocket &input_socket)
|
||||
void set_input_unused(LockedNode &locked_node,
|
||||
const InputSocket &input_socket,
|
||||
LocalAllocator &allocator)
|
||||
{
|
||||
NodeState &node_state = locked_node.node_state;
|
||||
const int input_index = input_socket.index();
|
||||
|
@ -972,7 +964,7 @@ class Executor {
|
|||
}
|
||||
input_state.usage = ValueUsage::Unused;
|
||||
|
||||
this->destruct_input_value_if_exists(input_state, input_socket.type());
|
||||
this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator);
|
||||
if (input_state.was_ready_for_execution) {
|
||||
return;
|
||||
}
|
||||
|
@ -1026,7 +1018,7 @@ class Executor {
|
|||
CurrentTask ¤t_task)
|
||||
{
|
||||
BLI_assert(value_to_forward.get() != nullptr);
|
||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
||||
LocalAllocator &allocator = this->get_local_allocator();
|
||||
const CPPType &type = *value_to_forward.type();
|
||||
|
||||
if (self_.logger_ != nullptr) {
|
||||
|
@ -1038,17 +1030,7 @@ class Executor {
|
|||
const Node &target_node = target_socket->node();
|
||||
NodeState &node_state = *node_states_[target_node.index_in_graph()];
|
||||
const int input_index = target_socket->index();
|
||||
InputState &input_state = node_state.inputs[input_index];
|
||||
const bool is_last_target = target_socket == targets.last();
|
||||
#ifdef DEBUG
|
||||
if (input_state.value != nullptr) {
|
||||
if (self_.logger_ != nullptr) {
|
||||
self_.logger_->dump_when_input_is_set_twice(*target_socket, from_socket, *context_);
|
||||
}
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
#endif
|
||||
BLI_assert(!input_state.was_ready_for_execution);
|
||||
BLI_assert(target_socket->type() == type);
|
||||
BLI_assert(target_socket->origin() == &from_socket);
|
||||
|
||||
|
@ -1072,6 +1054,18 @@ class Executor {
|
|||
continue;
|
||||
}
|
||||
this->with_locked_node(target_node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||
InputState &input_state = node_state.inputs[input_index];
|
||||
|
||||
#ifdef DEBUG
|
||||
if (input_state.value != nullptr) {
|
||||
if (self_.logger_ != nullptr) {
|
||||
self_.logger_->dump_when_input_is_set_twice(*target_socket, from_socket, *context_);
|
||||
}
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
#endif
|
||||
BLI_assert(!input_state.was_ready_for_execution);
|
||||
|
||||
if (input_state.usage == ValueUsage::Unused) {
|
||||
return;
|
||||
}
|
||||
|
@ -1089,6 +1083,7 @@ class Executor {
|
|||
}
|
||||
if (value_to_forward.get() != nullptr) {
|
||||
value_to_forward.destruct();
|
||||
allocator.deallocate(value_to_forward.get(), type.size(), type.alignment());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1145,23 +1140,10 @@ class Executor {
|
|||
if (BLI_system_thread_count() <= 1) {
|
||||
return false;
|
||||
}
|
||||
this->ensure_thread_locals();
|
||||
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
|
||||
return true;
|
||||
}
|
||||
|
||||
void ensure_thread_locals()
|
||||
{
|
||||
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
||||
if (current_main_thread_ != std::this_thread::get_id()) {
|
||||
BLI_assert_unreachable();
|
||||
}
|
||||
#endif
|
||||
if (!thread_locals_) {
|
||||
thread_locals_ = std::make_unique<threading::EnumerableThreadSpecific<ThreadLocalData>>();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow other threads to steal all the nodes that are currently scheduled on this thread.
|
||||
*/
|
||||
|
@ -1194,12 +1176,12 @@ class Executor {
|
|||
[](TaskPool * /*pool*/, void *data) { MEM_delete(static_cast<ScheduledNodes *>(data)); });
|
||||
}
|
||||
|
||||
LinearAllocator<> &get_main_or_local_allocator()
|
||||
LocalAllocator &get_local_allocator()
|
||||
{
|
||||
if (this->use_multi_threading()) {
|
||||
return thread_locals_->local().allocator;
|
||||
return context_->allocator->local();
|
||||
}
|
||||
return main_allocator_;
|
||||
return *context_->allocator;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1248,7 +1230,7 @@ class GraphExecutorLFParams final : public Params {
|
|||
OutputState &output_state = node_state_.outputs[index];
|
||||
BLI_assert(!output_state.has_been_computed);
|
||||
if (output_state.value == nullptr) {
|
||||
LinearAllocator<> &allocator = executor_.get_main_or_local_allocator();
|
||||
LocalAllocator &allocator = executor_.get_local_allocator();
|
||||
const CPPType &type = node_.output(index).type();
|
||||
output_state.value = allocator.allocate(type.size(), type.alignment());
|
||||
}
|
||||
|
@ -1297,13 +1279,15 @@ class GraphExecutorLFParams final : public Params {
|
|||
*/
|
||||
inline void Executor::execute_node(const FunctionNode &node,
|
||||
NodeState &node_state,
|
||||
CurrentTask ¤t_task)
|
||||
CurrentTask ¤t_task,
|
||||
LocalAllocator &allocator)
|
||||
{
|
||||
const LazyFunction &fn = node.function();
|
||||
GraphExecutorLFParams node_params{fn, *this, node, node_state, current_task};
|
||||
BLI_assert(context_ != nullptr);
|
||||
Context fn_context = *context_;
|
||||
fn_context.storage = node_state.storage;
|
||||
fn_context.allocator = &allocator;
|
||||
|
||||
if (self_.logger_ != nullptr) {
|
||||
self_.logger_->log_before_node_execute(node, node_params, fn_context);
|
||||
|
@ -1330,12 +1314,32 @@ inline void Executor::execute_node(const FunctionNode &node,
|
|||
}
|
||||
}
|
||||
|
||||
void GraphExecutor::preprocess(const Graph &graph, PreprocessData &r_preprocess_data)
|
||||
{
|
||||
const Span<const Node *> nodes = graph.nodes();
|
||||
r_preprocess_data.offsets.reinitialize(nodes.size());
|
||||
int offset = 0;
|
||||
for (const int i : nodes.index_range()) {
|
||||
const Node &node = *nodes[i];
|
||||
NodeBufferOffsets &node_offsets = r_preprocess_data.offsets[i];
|
||||
node_offsets.node = offset;
|
||||
offset += sizeof(NodeState);
|
||||
node_offsets.inputs = offset;
|
||||
offset += sizeof(InputState) * node.inputs().size();
|
||||
node_offsets.outputs = offset;
|
||||
offset += sizeof(OutputState) * node.outputs().size();
|
||||
}
|
||||
r_preprocess_data.node_state_buffer_size = offset;
|
||||
}
|
||||
|
||||
GraphExecutor::GraphExecutor(const Graph &graph,
|
||||
const Span<const OutputSocket *> graph_inputs,
|
||||
const Span<const InputSocket *> graph_outputs,
|
||||
const PreprocessData &preprocess_data,
|
||||
const Logger *logger,
|
||||
const SideEffectProvider *side_effect_provider)
|
||||
: graph_(graph),
|
||||
preprocess_data_(preprocess_data),
|
||||
graph_inputs_(graph_inputs),
|
||||
graph_outputs_(graph_outputs),
|
||||
logger_(logger),
|
||||
|
@ -1360,15 +1364,17 @@ void GraphExecutor::execute_impl(Params ¶ms, const Context &context) const
|
|||
executor.execute(params, context);
|
||||
}
|
||||
|
||||
void *GraphExecutor::init_storage(LinearAllocator<> &allocator) const
|
||||
void *GraphExecutor::init_storage(LocalAllocator &allocator) const
|
||||
{
|
||||
Executor &executor = *allocator.construct<Executor>(*this).release();
|
||||
Executor &executor = allocator.allocate_new<Executor>(*this);
|
||||
return &executor;
|
||||
}
|
||||
|
||||
void GraphExecutor::destruct_storage(void *storage) const
|
||||
void GraphExecutor::destruct_storage(void *storage, LocalAllocator &allocator) const
|
||||
{
|
||||
std::destroy_at(static_cast<Executor *>(storage));
|
||||
Executor *executor = static_cast<Executor *>(storage);
|
||||
executor->destruct_state(allocator);
|
||||
allocator.destruct_free(executor);
|
||||
}
|
||||
|
||||
void GraphExecutorLogger::log_socket_value(const Socket &socket,
|
||||
|
|
|
@ -105,7 +105,11 @@ TEST(lazy_function, SideEffects)
|
|||
|
||||
SimpleSideEffectProvider side_effect_provider{{&store_node}};
|
||||
|
||||
GraphExecutor executor_fn{graph, {&input_node.output(0)}, {}, nullptr, &side_effect_provider};
|
||||
GraphExecutor::PreprocessData preprocess_data;
|
||||
GraphExecutor::preprocess(graph, preprocess_data);
|
||||
|
||||
GraphExecutor executor_fn{
|
||||
graph, {&input_node.output(0)}, {}, preprocess_data, nullptr, &side_effect_provider};
|
||||
execute_lazy_function_eagerly(executor_fn, nullptr, std::make_tuple(5), std::make_tuple());
|
||||
|
||||
EXPECT_EQ(dst1, 15);
|
||||
|
@ -167,8 +171,11 @@ TEST(lazy_function, GraphWithCycle)
|
|||
|
||||
graph.update_node_indices();
|
||||
|
||||
GraphExecutor::PreprocessData preprocess_data;
|
||||
GraphExecutor::preprocess(graph, preprocess_data);
|
||||
|
||||
GraphExecutor executor_fn{
|
||||
graph, {&input_node.output(0)}, {&output_node.input(0)}, nullptr, nullptr};
|
||||
graph, {&input_node.output(0)}, {&output_node.input(0)}, preprocess_data, nullptr, nullptr};
|
||||
int result = 0;
|
||||
execute_lazy_function_eagerly(
|
||||
executor_fn, nullptr, std::make_tuple(10), std::make_tuple(&result));
|
||||
|
|
|
@ -1146,8 +1146,12 @@ static GeometrySet compute_geometry(
|
|||
blender::nodes::GeometryNodesLazyFunctionLogger lf_logger(lf_graph_info);
|
||||
blender::nodes::GeometryNodesLazyFunctionSideEffectProvider lf_side_effect_provider;
|
||||
|
||||
lf::GraphExecutor graph_executor{
|
||||
lf_graph_info.graph, graph_inputs, graph_outputs, &lf_logger, &lf_side_effect_provider};
|
||||
lf::GraphExecutor graph_executor{lf_graph_info.graph,
|
||||
graph_inputs,
|
||||
graph_outputs,
|
||||
lf_graph_info.graph_preprocess_data,
|
||||
&lf_logger,
|
||||
&lf_side_effect_provider};
|
||||
|
||||
blender::nodes::GeoNodesModifierData geo_nodes_modifier_data;
|
||||
geo_nodes_modifier_data.depsgraph = ctx->depsgraph;
|
||||
|
@ -1169,7 +1173,9 @@ static GeometrySet compute_geometry(
|
|||
blender::bke::ModifierComputeContext modifier_compute_context{nullptr, nmd->modifier.name};
|
||||
user_data.compute_context = &modifier_compute_context;
|
||||
|
||||
blender::LinearAllocator<> allocator;
|
||||
blender::LocalAllocatorSet allocator_set;
|
||||
blender::LocalAllocator &allocator = allocator_set.local();
|
||||
|
||||
Vector<GMutablePointer> inputs_to_destruct;
|
||||
|
||||
int input_index = -1;
|
||||
|
@ -1212,6 +1218,7 @@ static GeometrySet compute_geometry(
|
|||
lf::Context lf_context;
|
||||
lf_context.storage = graph_executor.init_storage(allocator);
|
||||
lf_context.user_data = &user_data;
|
||||
lf_context.allocator = &allocator;
|
||||
lf::BasicParams lf_params{graph_executor,
|
||||
param_inputs,
|
||||
param_outputs,
|
||||
|
@ -1219,7 +1226,7 @@ static GeometrySet compute_geometry(
|
|||
param_output_usages,
|
||||
param_set_outputs};
|
||||
graph_executor.execute(lf_params, lf_context);
|
||||
graph_executor.destruct_storage(lf_context.storage);
|
||||
graph_executor.destruct_storage(lf_context.storage, allocator);
|
||||
|
||||
for (GMutablePointer &ptr : inputs_to_destruct) {
|
||||
ptr.destruct();
|
||||
|
@ -1289,6 +1296,7 @@ static void modifyGeometry(ModifierData *md,
|
|||
const ModifierEvalContext *ctx,
|
||||
GeometrySet &geometry_set)
|
||||
{
|
||||
SCOPED_TIMER_AVERAGED(__func__);
|
||||
NodesModifierData *nmd = reinterpret_cast<NodesModifierData *>(md);
|
||||
if (nmd->node_group == nullptr) {
|
||||
return;
|
||||
|
|
|
@ -187,6 +187,7 @@ struct GeometryNodesLazyFunctionGraphInfo {
|
|||
* Mappings between the lazy-function graph and the #bNodeTree.
|
||||
*/
|
||||
GeometryNodeLazyFunctionGraphMapping mapping;
|
||||
lf::GraphExecutor::PreprocessData graph_preprocess_data;
|
||||
/**
|
||||
* Approximate number of nodes in the graph if all sub-graphs were inlined.
|
||||
* This can be used as a simple heuristic for the complexity of the node group.
|
||||
|
|
|
@ -769,6 +769,7 @@ class LazyFunctionForGroupNode : public LazyFunction {
|
|||
graph_executor_.emplace(lf_graph_info.graph,
|
||||
std::move(graph_inputs),
|
||||
std::move(graph_outputs),
|
||||
lf_graph_info.graph_preprocess_data,
|
||||
&*lf_logger_,
|
||||
&*lf_side_effect_provider_);
|
||||
}
|
||||
|
@ -805,18 +806,18 @@ class LazyFunctionForGroupNode : public LazyFunction {
|
|||
graph_executor_->execute(params, group_context);
|
||||
}
|
||||
|
||||
void *init_storage(LinearAllocator<> &allocator) const override
|
||||
void *init_storage(LocalAllocator &allocator) const override
|
||||
{
|
||||
Storage *s = allocator.construct<Storage>().release();
|
||||
s->graph_executor_storage = graph_executor_->init_storage(allocator);
|
||||
return s;
|
||||
Storage &s = allocator.allocate_new<Storage>();
|
||||
s.graph_executor_storage = graph_executor_->init_storage(allocator);
|
||||
return &s;
|
||||
}
|
||||
|
||||
void destruct_storage(void *storage) const override
|
||||
void destruct_storage(void *storage, LocalAllocator &allocator) const override
|
||||
{
|
||||
Storage *s = static_cast<Storage *>(storage);
|
||||
graph_executor_->destruct_storage(s->graph_executor_storage);
|
||||
std::destroy_at(s);
|
||||
graph_executor_->destruct_storage(s->graph_executor_storage, allocator);
|
||||
allocator.destruct_free(s);
|
||||
}
|
||||
|
||||
std::string name() const override
|
||||
|
@ -1243,6 +1244,7 @@ struct GeometryNodesLazyFunctionGraphBuilder {
|
|||
|
||||
lf_graph_->update_node_indices();
|
||||
lf_graph_info_->num_inline_nodes_approximate += lf_graph_->nodes().size();
|
||||
lf::GraphExecutor::preprocess(*lf_graph_, lf_graph_info_->graph_preprocess_data);
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
Loading…
Reference in New Issue