WIP: Functions: new local allocator for better memory reuse and performance #104630
|
@ -0,0 +1,331 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
#include "BLI_allocator.hh"
|
||||||
|
#include "BLI_asan.h"
|
||||||
|
#include "BLI_enumerable_thread_specific.hh"
|
||||||
|
#include "BLI_linear_allocator.hh"
|
||||||
|
#include "BLI_map.hh"
|
||||||
|
#include "BLI_math_bits.h"
|
||||||
|
#include "BLI_stack.hh"
|
||||||
|
#include "BLI_utility_mixins.hh"
|
||||||
|
#include "BLI_vector.hh"
|
||||||
|
|
||||||
|
// #define BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||||
|
// #define BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||||
|
|
||||||
|
namespace blender {
|
||||||
|
|
||||||
|
class LocalAllocatorSet;
|
||||||
|
class LocalAllocator;
|
||||||
|
class LocalAllocatorPool;
|
||||||
|
|
||||||
|
class LocalAllocatorPool : NonCopyable, NonMovable {
|
||||||
|
private:
|
||||||
|
Stack<void *> buffers;
|
||||||
|
int64_t element_size = -1;
|
||||||
|
int64_t alignment = -1;
|
||||||
|
|
||||||
|
friend LocalAllocator;
|
||||||
|
};
|
||||||
|
|
||||||
|
class LocalAllocator : NonCopyable, NonMovable {
|
||||||
|
private:
|
||||||
|
static constexpr int64_t s_alignment = 64;
|
||||||
|
static constexpr int64_t s_global_allocation_threshold = 5 * 1024 * 1024;
|
||||||
|
LocalAllocatorSet &owner_set_;
|
||||||
|
AlignedBuffer<256, 64> initial_buffer_;
|
||||||
|
LinearAllocator<> linear_allocator_;
|
||||||
|
|
||||||
|
struct Head {
|
||||||
|
int64_t buffer_size;
|
||||||
|
int64_t buffer_alignment;
|
||||||
|
};
|
||||||
|
static_assert(is_power_of_2_constexpr(sizeof(Head)));
|
||||||
|
|
||||||
|
std::array<LocalAllocatorPool, 8> small_buffer_pools_;
|
||||||
|
Map<int, std::unique_ptr<LocalAllocatorPool>> large_buffer_pools_;
|
||||||
|
|
||||||
|
friend LocalAllocatorSet;
|
||||||
|
|
||||||
|
LocalAllocator(LocalAllocatorSet &owner_set);
|
||||||
|
|
||||||
|
public:
|
||||||
|
~LocalAllocator();
|
||||||
|
|
||||||
|
bool is_local() const;
|
||||||
|
LocalAllocator &local();
|
||||||
|
LocalAllocatorSet &owner_set();
|
||||||
|
|
||||||
|
void *allocate(int64_t size, int64_t alignment);
|
||||||
|
void deallocate(const void *buffer, int64_t size, int64_t alignment);
|
||||||
|
|
||||||
|
void *allocate(LocalAllocatorPool &pool);
|
||||||
|
void deallocate(const void *buffer, LocalAllocatorPool &pool);
|
||||||
|
|
||||||
|
void *allocate_with_head(int64_t size, int64_t alignment);
|
||||||
|
void deallocate_with_head(const void *buffer);
|
||||||
|
|
||||||
|
LocalAllocatorPool &get_pool(int64_t size, int64_t alignment);
|
||||||
|
|
||||||
|
template<typename T, typename... Args> T &allocate_new(Args &&...args);
|
||||||
|
template<typename T, typename... Args> void destruct_free(const T *value);
|
||||||
|
template<typename T> MutableSpan<T> allocate_array(int64_t size);
|
||||||
|
template<typename T, typename... Args>
|
||||||
|
MutableSpan<T> allocate_new_array(int64_t size, Args &&...args);
|
||||||
|
template<typename T> void destruct_free_array(Span<T> data);
|
||||||
|
template<typename T> void destruct_free_array(MutableSpan<T> data);
|
||||||
|
};
|
||||||
|
|
||||||
|
class LocalAllocatorSet : NonCopyable, NonMovable {
|
||||||
|
private:
|
||||||
|
threading::EnumerableThreadSpecific<LocalAllocator> allocator_by_thread_;
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||||
|
std::mutex debug_sizes_mutex_;
|
||||||
|
Map<const void *, int64_t> debug_sizes_;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
friend LocalAllocator;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LocalAllocatorSet();
|
||||||
|
~LocalAllocatorSet();
|
||||||
|
|
||||||
|
LocalAllocator &local();
|
||||||
|
};
|
||||||
|
|
||||||
|
class ThreadedLocalAllocatorRef {
|
||||||
|
private:
|
||||||
|
LocalAllocatorSet &allocator_set_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
ThreadedLocalAllocatorRef(LocalAllocator &allocator) : allocator_set_(allocator.owner_set())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void *allocate(const size_t size, const size_t alignment, const char * /*name*/)
|
||||||
|
{
|
||||||
|
LocalAllocator &allocator = allocator_set_.local();
|
||||||
|
return allocator.allocate_with_head(size, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate(void *ptr)
|
||||||
|
{
|
||||||
|
LocalAllocator &allocator = allocator_set_.local();
|
||||||
|
allocator.deallocate_with_head(ptr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class LocalAllocatorRef {
|
||||||
|
private:
|
||||||
|
LocalAllocator &allocator_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
LocalAllocatorRef(LocalAllocator &allocator) : allocator_(allocator)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void *allocate(const size_t size, const size_t alignment, const char * /*name*/)
|
||||||
|
{
|
||||||
|
return allocator_.allocate_with_head(size, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deallocate(void *ptr)
|
||||||
|
{
|
||||||
|
allocator_.deallocate_with_head(ptr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
inline bool LocalAllocator::is_local() const
|
||||||
|
{
|
||||||
|
return this == &owner_set_.local();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline LocalAllocator &LocalAllocator::local()
|
||||||
|
{
|
||||||
|
return owner_set_.local();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline LocalAllocatorSet &LocalAllocator::owner_set()
|
||||||
|
{
|
||||||
|
return owner_set_;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment)
|
||||||
|
{
|
||||||
|
LocalAllocatorPool &pool = this->get_pool(size, alignment);
|
||||||
|
BLI_assert(pool.element_size >= size);
|
||||||
|
BLI_assert(pool.alignment >= alignment);
|
||||||
|
|
||||||
|
return this->allocate(pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void LocalAllocator::deallocate(const void *buffer,
|
||||||
|
const int64_t size,
|
||||||
|
const int64_t alignment)
|
||||||
|
{
|
||||||
|
LocalAllocatorPool &pool = this->get_pool(size, alignment);
|
||||||
|
BLI_assert(pool.element_size >= size);
|
||||||
|
BLI_assert(pool.alignment >= alignment);
|
||||||
|
|
||||||
|
this->deallocate(buffer, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void *LocalAllocator::allocate(LocalAllocatorPool &pool)
|
||||||
|
{
|
||||||
|
BLI_assert(this->is_local());
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||||
|
return MEM_mallocN_aligned(pool.element_size, pool.alignment, __func__);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *buffer;
|
||||||
|
if (!pool.buffers.is_empty()) {
|
||||||
|
buffer = pool.buffers.pop();
|
||||||
|
BLI_asan_unpoison(buffer, pool.element_size);
|
||||||
|
}
|
||||||
|
else if (pool.element_size < s_global_allocation_threshold) {
|
||||||
|
buffer = linear_allocator_.allocate(pool.element_size, pool.alignment);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buffer = MEM_mallocN(pool.element_size, __func__);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||||
|
{
|
||||||
|
std::lock_guard lock{owner_set_.debug_sizes_mutex_};
|
||||||
|
owner_set_.debug_sizes_.add_new(buffer, pool.element_size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void LocalAllocator::deallocate(const void *buffer, LocalAllocatorPool &pool)
|
||||||
|
{
|
||||||
|
BLI_assert(this->is_local());
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||||
|
MEM_freeN(const_cast<void *>(buffer));
|
||||||
|
return;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||||
|
{
|
||||||
|
std::lock_guard lock{owner_set_.debug_sizes_mutex_};
|
||||||
|
auto [last_size, last_alignment] = owner_set_.debug_sizes_.pop(buffer);
|
||||||
|
if (last_size != size) {
|
||||||
|
BLI_assert_unreachable();
|
||||||
|
}
|
||||||
|
if (last_alignment != alignment) {
|
||||||
|
BLI_assert_unreachable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
memset(const_cast<void *>(buffer), -1, pool.element_size);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (pool.element_size < s_global_allocation_threshold) {
|
||||||
|
BLI_asan_poison(buffer, pool.element_size);
|
||||||
|
pool.buffers.push(const_cast<void *>(buffer));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
MEM_freeN(const_cast<void *>(buffer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline LocalAllocatorPool &LocalAllocator::get_pool(const int64_t size, const int64_t alignment)
|
||||||
|
{
|
||||||
|
BLI_assert(size > 0);
|
||||||
|
BLI_assert(alignment <= size);
|
||||||
|
BLI_assert(alignment <= s_alignment);
|
||||||
|
BLI_assert(is_power_of_2_i(alignment));
|
||||||
|
UNUSED_VARS_NDEBUG(alignment);
|
||||||
|
|
||||||
|
BLI_assert(this->is_local());
|
||||||
|
if (size <= 64) {
|
||||||
|
return small_buffer_pools_[(size - 1) >> 3];
|
||||||
|
}
|
||||||
|
const int key = bitscan_reverse_uint64(uint64_t(size));
|
||||||
|
return *large_buffer_pools_.lookup_or_add_cb(key, [&]() {
|
||||||
|
auto pool = std::make_unique<LocalAllocatorPool>();
|
||||||
|
pool->element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
|
||||||
|
pool->alignment = s_alignment;
|
||||||
|
return pool;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void *LocalAllocator::allocate_with_head(int64_t size, int64_t alignment)
|
||||||
|
{
|
||||||
|
const int64_t buffer_size = size + std::max<int64_t>(alignment, sizeof(Head));
|
||||||
|
const int64_t buffer_alignment = std::max<int64_t>(alignment, alignof(Head));
|
||||||
|
void *buffer = this->allocate(buffer_size, buffer_alignment);
|
||||||
|
Head *head = new (buffer) Head;
|
||||||
|
head->buffer_size = buffer_size;
|
||||||
|
head->buffer_alignment = buffer_alignment;
|
||||||
|
return head + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void LocalAllocator::deallocate_with_head(const void *buffer)
|
||||||
|
{
|
||||||
|
const Head *head = static_cast<const Head *>(buffer) - 1;
|
||||||
|
this->deallocate(head, head->buffer_size, head->buffer_alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename... Args> inline T &LocalAllocator::allocate_new(Args &&...args)
|
||||||
|
{
|
||||||
|
void *buffer = this->allocate(sizeof(T), alignof(T));
|
||||||
|
T *value = new (buffer) T(std::forward<Args>(args)...);
|
||||||
|
return *value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename... Args> inline void LocalAllocator::destruct_free(const T *value)
|
||||||
|
{
|
||||||
|
std::destroy_at(value);
|
||||||
|
this->deallocate(value, sizeof(T), alignof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> MutableSpan<T> inline LocalAllocator::allocate_array(const int64_t size)
|
||||||
|
{
|
||||||
|
if (size == 0) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
void *buffer = this->allocate(size * sizeof(T), alignof(T));
|
||||||
|
return {static_cast<T *>(buffer), size};
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T, typename... Args>
|
||||||
|
MutableSpan<T> inline LocalAllocator::allocate_new_array(const int64_t size, Args &&...args)
|
||||||
|
{
|
||||||
|
MutableSpan<T> array = this->allocate_array<T>(size);
|
||||||
|
for (const int64_t i : IndexRange(size)) {
|
||||||
|
new (&array[i]) T(std::forward<Args>(args)...);
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline void LocalAllocator::destruct_free_array(Span<T> data)
|
||||||
|
{
|
||||||
|
if (data.is_empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
destruct_n(const_cast<T *>(data.data()), data.size());
|
||||||
|
this->deallocate(data.data(), data.size_in_bytes(), alignof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T> inline void LocalAllocator::destruct_free_array(MutableSpan<T> data)
|
||||||
|
{
|
||||||
|
this->destruct_free_array(data.as_span());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline LocalAllocator &LocalAllocatorSet::local()
|
||||||
|
{
|
||||||
|
return allocator_by_thread_.local();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace blender
|
|
@ -91,6 +91,7 @@ set(SRC
|
||||||
intern/lazy_threading.cc
|
intern/lazy_threading.cc
|
||||||
intern/length_parameterize.cc
|
intern/length_parameterize.cc
|
||||||
intern/listbase.cc
|
intern/listbase.cc
|
||||||
|
intern/local_allocator.cc
|
||||||
intern/math_base.c
|
intern/math_base.c
|
||||||
intern/math_base_inline.c
|
intern/math_base_inline.c
|
||||||
intern/math_base_safe_inline.c
|
intern/math_base_safe_inline.c
|
||||||
|
@ -256,6 +257,7 @@ set(SRC
|
||||||
BLI_linklist_stack.h
|
BLI_linklist_stack.h
|
||||||
BLI_listbase.h
|
BLI_listbase.h
|
||||||
BLI_listbase_wrapper.hh
|
BLI_listbase_wrapper.hh
|
||||||
|
BLI_local_allocator.hh
|
||||||
BLI_map.hh
|
BLI_map.hh
|
||||||
BLI_map_slots.hh
|
BLI_map_slots.hh
|
||||||
BLI_math.h
|
BLI_math.h
|
||||||
|
@ -484,6 +486,7 @@ if(WITH_GTESTS)
|
||||||
tests/BLI_linear_allocator_test.cc
|
tests/BLI_linear_allocator_test.cc
|
||||||
tests/BLI_linklist_lockfree_test.cc
|
tests/BLI_linklist_lockfree_test.cc
|
||||||
tests/BLI_listbase_test.cc
|
tests/BLI_listbase_test.cc
|
||||||
|
tests/BLI_local_allocator_test.cc
|
||||||
tests/BLI_map_test.cc
|
tests/BLI_map_test.cc
|
||||||
tests/BLI_math_base_safe_test.cc
|
tests/BLI_math_base_safe_test.cc
|
||||||
tests/BLI_math_base_test.cc
|
tests/BLI_math_base_test.cc
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||||
|
|
||||||
|
#include "BLI_local_allocator.hh"
|
||||||
|
|
||||||
|
namespace blender {
|
||||||
|
|
||||||
|
LocalAllocatorSet::LocalAllocatorSet()
|
||||||
|
: allocator_by_thread_([this]() { return LocalAllocator{*this}; })
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
LocalAllocatorSet::~LocalAllocatorSet() = default;
|
||||||
|
|
||||||
|
LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set)
|
||||||
|
{
|
||||||
|
linear_allocator_.provide_buffer(initial_buffer_);
|
||||||
|
for (const int64_t i : IndexRange(small_buffer_pools_.size())) {
|
||||||
|
LocalAllocatorPool &pool = small_buffer_pools_[i];
|
||||||
|
pool.element_size = 8 * (i + 1);
|
||||||
|
pool.alignment = power_of_2_min_u(pool.element_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LocalAllocator::~LocalAllocator() = default;
|
||||||
|
|
||||||
|
} // namespace blender
|
|
@ -0,0 +1,10 @@
|
||||||
|
/* SPDX-License-Identifier: Apache-2.0 */
|
||||||
|
|
||||||
|
#include "BLI_local_allocator.hh"
|
||||||
|
#include "BLI_strict_flags.h"
|
||||||
|
|
||||||
|
#include "testing/testing.h"
|
||||||
|
|
||||||
|
namespace blender::tests {
|
||||||
|
|
||||||
|
} // namespace blender::tests
|
|
@ -42,6 +42,7 @@
|
||||||
#include "BLI_function_ref.hh"
|
#include "BLI_function_ref.hh"
|
||||||
#include "BLI_generic_pointer.hh"
|
#include "BLI_generic_pointer.hh"
|
||||||
#include "BLI_linear_allocator.hh"
|
#include "BLI_linear_allocator.hh"
|
||||||
|
#include "BLI_local_allocator.hh"
|
||||||
#include "BLI_vector.hh"
|
#include "BLI_vector.hh"
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
|
@ -98,6 +99,8 @@ struct Context {
|
||||||
* Custom user data that can be used in the function.
|
* Custom user data that can be used in the function.
|
||||||
*/
|
*/
|
||||||
UserData *user_data;
|
UserData *user_data;
|
||||||
|
|
||||||
|
LocalAllocator *allocator;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -276,12 +279,12 @@ class LazyFunction {
|
||||||
* Allocates storage for this function. The storage will be passed to every call to #execute.
|
* Allocates storage for this function. The storage will be passed to every call to #execute.
|
||||||
* If the function does not keep track of any state, this does not have to be implemented.
|
* If the function does not keep track of any state, this does not have to be implemented.
|
||||||
*/
|
*/
|
||||||
virtual void *init_storage(LinearAllocator<> &allocator) const;
|
virtual void *init_storage(LocalAllocator &allocator) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Destruct the storage created in #init_storage.
|
* Destruct the storage created in #init_storage.
|
||||||
*/
|
*/
|
||||||
virtual void destruct_storage(void *storage) const;
|
virtual void destruct_storage(void *storage, LocalAllocator &allocator) const;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calls `fn` with the input indices that the given `output_index` may depend on. By default
|
* Calls `fn` with the input indices that the given `output_index` may depend on. By default
|
||||||
|
|
|
@ -85,14 +85,16 @@ inline void execute_lazy_function_eagerly_impl(
|
||||||
...);
|
...);
|
||||||
output_usages.fill(ValueUsage::Used);
|
output_usages.fill(ValueUsage::Used);
|
||||||
set_outputs.fill(false);
|
set_outputs.fill(false);
|
||||||
LinearAllocator<> allocator;
|
LocalAllocatorSet allocator_set;
|
||||||
|
LocalAllocator &allocator = allocator_set.local();
|
||||||
Context context;
|
Context context;
|
||||||
context.user_data = user_data;
|
context.user_data = user_data;
|
||||||
context.storage = fn.init_storage(allocator);
|
context.storage = fn.init_storage(allocator);
|
||||||
|
context.allocator = &allocator;
|
||||||
BasicParams params{
|
BasicParams params{
|
||||||
fn, input_pointers, output_pointers, input_usages, output_usages, set_outputs};
|
fn, input_pointers, output_pointers, input_usages, output_usages, set_outputs};
|
||||||
fn.execute(params, context);
|
fn.execute(params, context);
|
||||||
fn.destruct_storage(context.storage);
|
fn.destruct_storage(context.storage, allocator);
|
||||||
|
|
||||||
/* Make sure all outputs have been computed. */
|
/* Make sure all outputs have been computed. */
|
||||||
BLI_assert(!Span<bool>(set_outputs).contains(false));
|
BLI_assert(!Span<bool>(set_outputs).contains(false));
|
||||||
|
|
|
@ -59,11 +59,23 @@ class GraphExecutor : public LazyFunction {
|
||||||
using Logger = GraphExecutorLogger;
|
using Logger = GraphExecutorLogger;
|
||||||
using SideEffectProvider = GraphExecutorSideEffectProvider;
|
using SideEffectProvider = GraphExecutorSideEffectProvider;
|
||||||
|
|
||||||
|
struct NodeBufferOffsets {
|
||||||
|
int node;
|
||||||
|
int inputs;
|
||||||
|
int outputs;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PreprocessData {
|
||||||
|
Array<NodeBufferOffsets> offsets;
|
||||||
|
int node_state_buffer_size;
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/**
|
/**
|
||||||
* The graph that is evaluated.
|
* The graph that is evaluated.
|
||||||
*/
|
*/
|
||||||
const Graph &graph_;
|
const Graph &graph_;
|
||||||
|
const PreprocessData &preprocess_data_;
|
||||||
/**
|
/**
|
||||||
* Input and output sockets of the entire graph.
|
* Input and output sockets of the entire graph.
|
||||||
*/
|
*/
|
||||||
|
@ -85,11 +97,14 @@ class GraphExecutor : public LazyFunction {
|
||||||
GraphExecutor(const Graph &graph,
|
GraphExecutor(const Graph &graph,
|
||||||
Span<const OutputSocket *> graph_inputs,
|
Span<const OutputSocket *> graph_inputs,
|
||||||
Span<const InputSocket *> graph_outputs,
|
Span<const InputSocket *> graph_outputs,
|
||||||
|
const PreprocessData &preprocess_data,
|
||||||
const Logger *logger,
|
const Logger *logger,
|
||||||
const SideEffectProvider *side_effect_provider);
|
const SideEffectProvider *side_effect_provider);
|
||||||
|
|
||||||
void *init_storage(LinearAllocator<> &allocator) const override;
|
void *init_storage(LocalAllocator &allocator) const override;
|
||||||
void destruct_storage(void *storage) const override;
|
void destruct_storage(void *storage, LocalAllocator &allocator) const override;
|
||||||
|
|
||||||
|
static void preprocess(const Graph &graph, PreprocessData &r_preprocess_data);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void execute_impl(Params ¶ms, const Context &context) const override;
|
void execute_impl(Params ¶ms, const Context &context) const override;
|
||||||
|
|
|
@ -25,12 +25,12 @@ std::string LazyFunction::output_name(int index) const
|
||||||
return outputs_[index].debug_name;
|
return outputs_[index].debug_name;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *LazyFunction::init_storage(LinearAllocator<> & /*allocator*/) const
|
void *LazyFunction::init_storage(LocalAllocator & /*allocator*/) const
|
||||||
{
|
{
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void LazyFunction::destruct_storage(void *storage) const
|
void LazyFunction::destruct_storage(void *storage, LocalAllocator & /*allocator*/) const
|
||||||
{
|
{
|
||||||
BLI_assert(storage == nullptr);
|
BLI_assert(storage == nullptr);
|
||||||
UNUSED_VARS_NDEBUG(storage);
|
UNUSED_VARS_NDEBUG(storage);
|
||||||
|
|
|
@ -75,7 +75,7 @@ enum class NodeScheduleState {
|
||||||
RunningAndRescheduled,
|
RunningAndRescheduled,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct InputState {
|
struct alignas(8) InputState {
|
||||||
/**
|
/**
|
||||||
* Value of this input socket. By default, the value is empty. When other nodes are done
|
* Value of this input socket. By default, the value is empty. When other nodes are done
|
||||||
* computing their outputs, the computed values will be forwarded to linked input sockets. The
|
* computing their outputs, the computed values will be forwarded to linked input sockets. The
|
||||||
|
@ -97,7 +97,7 @@ struct InputState {
|
||||||
bool was_ready_for_execution = false;
|
bool was_ready_for_execution = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OutputState {
|
struct alignas(8) OutputState {
|
||||||
/**
|
/**
|
||||||
* Keeps track of how the output value is used. If a connected input becomes used, this output
|
* Keeps track of how the output value is used. If a connected input becomes used, this output
|
||||||
* has to become used as well. The output becomes unused when it is used by no input socket
|
* has to become used as well. The output becomes unused when it is used by no input socket
|
||||||
|
@ -127,7 +127,7 @@ struct OutputState {
|
||||||
void *value = nullptr;
|
void *value = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct NodeState {
|
struct alignas(8) NodeState {
|
||||||
/**
|
/**
|
||||||
* Needs to be locked when any data in this state is accessed that is not explicitly marked as
|
* Needs to be locked when any data in this state is accessed that is not explicitly marked as
|
||||||
* not needing the lock.
|
* not needing the lock.
|
||||||
|
@ -271,7 +271,7 @@ class Executor {
|
||||||
/**
|
/**
|
||||||
* State of every node, indexed by #Node::index_in_graph.
|
* State of every node, indexed by #Node::index_in_graph.
|
||||||
*/
|
*/
|
||||||
Array<NodeState *> node_states_;
|
MutableSpan<NodeState *> node_states_;
|
||||||
/**
|
/**
|
||||||
* Parameters provided by the caller. This is always non-null, while a node is running.
|
* Parameters provided by the caller. This is always non-null, while a node is running.
|
||||||
*/
|
*/
|
||||||
|
@ -285,15 +285,7 @@ class Executor {
|
||||||
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
||||||
std::thread::id current_main_thread_;
|
std::thread::id current_main_thread_;
|
||||||
#endif
|
#endif
|
||||||
/**
|
|
||||||
* A separate linear allocator for every thread. We could potentially reuse some memory, but that
|
|
||||||
* doesn't seem worth it yet.
|
|
||||||
*/
|
|
||||||
struct ThreadLocalData {
|
|
||||||
LinearAllocator<> allocator;
|
|
||||||
};
|
|
||||||
std::unique_ptr<threading::EnumerableThreadSpecific<ThreadLocalData>> thread_locals_;
|
|
||||||
LinearAllocator<> main_allocator_;
|
|
||||||
/**
|
/**
|
||||||
* Set to false when the first execution ends.
|
* Set to false when the first execution ends.
|
||||||
*/
|
*/
|
||||||
|
@ -308,18 +300,25 @@ class Executor {
|
||||||
BLI_assert(self_.graph_.node_indices_are_valid());
|
BLI_assert(self_.graph_.node_indices_are_valid());
|
||||||
}
|
}
|
||||||
|
|
||||||
~Executor()
|
void destruct_state(LocalAllocator &allocator)
|
||||||
{
|
{
|
||||||
if (TaskPool *task_pool = task_pool_.load()) {
|
if (TaskPool *task_pool = task_pool_.load()) {
|
||||||
BLI_task_pool_free(task_pool);
|
BLI_task_pool_free(task_pool);
|
||||||
}
|
}
|
||||||
threading::parallel_for(node_states_.index_range(), 1024, [&](const IndexRange range) {
|
threading::parallel_for(node_states_.index_range(), 1024, [&](const IndexRange range) {
|
||||||
|
LocalAllocator &local_allocator = allocator.local();
|
||||||
for (const int node_index : range) {
|
for (const int node_index : range) {
|
||||||
const Node &node = *self_.graph_.nodes()[node_index];
|
const Node &node = *self_.graph_.nodes()[node_index];
|
||||||
NodeState &node_state = *node_states_[node_index];
|
NodeState &node_state = *node_states_[node_index];
|
||||||
this->destruct_node_state(node, node_state);
|
if (!node_state.node_has_finished) {
|
||||||
|
this->destruct_node_data(node, node_state, local_allocator);
|
||||||
|
}
|
||||||
|
std::destroy_at(&node_state);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
allocator.deallocate(
|
||||||
|
node_states_[0], self_.preprocess_data_.node_state_buffer_size, alignof(NodeState));
|
||||||
|
allocator.destruct_free_array(node_states_);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -364,7 +363,7 @@ class Executor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
this->initialize_static_value_usages(side_effect_nodes);
|
this->initialize_static_value_usages(side_effect_nodes, this->get_local_allocator());
|
||||||
this->schedule_side_effect_nodes(side_effect_nodes, current_task);
|
this->schedule_side_effect_nodes(side_effect_nodes, current_task);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,54 +381,41 @@ class Executor {
|
||||||
void initialize_node_states()
|
void initialize_node_states()
|
||||||
{
|
{
|
||||||
Span<const Node *> nodes = self_.graph_.nodes();
|
Span<const Node *> nodes = self_.graph_.nodes();
|
||||||
node_states_.reinitialize(nodes.size());
|
node_states_ = context_->allocator->allocate_array<NodeState *>(nodes.size());
|
||||||
|
|
||||||
auto construct_node_range = [&](const IndexRange range, LinearAllocator<> &allocator) {
|
void *node_states_buffer = context_->allocator->allocate(
|
||||||
for (const int i : range) {
|
self_.preprocess_data_.node_state_buffer_size, alignof(NodeState));
|
||||||
const Node &node = *nodes[i];
|
|
||||||
NodeState &node_state = *allocator.construct<NodeState>().release();
|
for (const int i : nodes.index_range()) {
|
||||||
node_states_[i] = &node_state;
|
const Node &node = *nodes[i];
|
||||||
this->construct_initial_node_state(allocator, node, node_state);
|
const GraphExecutor::NodeBufferOffsets &node_offsets = self_.preprocess_data_.offsets[i];
|
||||||
}
|
void *state_buffer = POINTER_OFFSET(node_states_buffer, node_offsets.node);
|
||||||
};
|
NodeState *node_state = new (state_buffer) NodeState();
|
||||||
if (nodes.size() <= 256) {
|
node_state->inputs = {
|
||||||
construct_node_range(nodes.index_range(), main_allocator_);
|
static_cast<InputState *>(POINTER_OFFSET(node_states_buffer, node_offsets.inputs)),
|
||||||
}
|
node.inputs().size()};
|
||||||
else {
|
node_state->outputs = {
|
||||||
this->ensure_thread_locals();
|
static_cast<OutputState *>(POINTER_OFFSET(node_states_buffer, node_offsets.outputs)),
|
||||||
/* Construct all node states in parallel. */
|
node.outputs().size()};
|
||||||
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
|
default_construct_n(node_state->inputs.data(), node_state->inputs.size());
|
||||||
LinearAllocator<> &allocator = thread_locals_->local().allocator;
|
default_construct_n(node_state->outputs.data(), node_state->outputs.size());
|
||||||
construct_node_range(range, allocator);
|
node_states_[i] = node_state;
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void construct_initial_node_state(LinearAllocator<> &allocator,
|
void destruct_node_data(const Node &node, NodeState &node_state, LocalAllocator &allocator)
|
||||||
const Node &node,
|
|
||||||
NodeState &node_state)
|
|
||||||
{
|
|
||||||
const Span<const InputSocket *> node_inputs = node.inputs();
|
|
||||||
const Span<const OutputSocket *> node_outputs = node.outputs();
|
|
||||||
|
|
||||||
node_state.inputs = allocator.construct_array<InputState>(node_inputs.size());
|
|
||||||
node_state.outputs = allocator.construct_array<OutputState>(node_outputs.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
void destruct_node_state(const Node &node, NodeState &node_state)
|
|
||||||
{
|
{
|
||||||
if (node.is_function()) {
|
if (node.is_function()) {
|
||||||
const LazyFunction &fn = static_cast<const FunctionNode &>(node).function();
|
const LazyFunction &fn = static_cast<const FunctionNode &>(node).function();
|
||||||
if (node_state.storage != nullptr) {
|
if (node_state.storage != nullptr) {
|
||||||
fn.destruct_storage(node_state.storage);
|
fn.destruct_storage(node_state.storage, allocator);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const int i : node.inputs().index_range()) {
|
for (const int i : node.inputs().index_range()) {
|
||||||
InputState &input_state = node_state.inputs[i];
|
InputState &input_state = node_state.inputs[i];
|
||||||
const InputSocket &input_socket = node.input(i);
|
const InputSocket &input_socket = node.input(i);
|
||||||
this->destruct_input_value_if_exists(input_state, input_socket.type());
|
this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator);
|
||||||
}
|
}
|
||||||
std::destroy_at(&node_state);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -453,7 +439,7 @@ class Executor {
|
||||||
this->set_input_required(locked_node, socket);
|
this->set_input_required(locked_node, socket);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
this->set_input_unused(locked_node, socket);
|
this->set_input_unused(locked_node, socket, this->get_local_allocator());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
@ -500,13 +486,14 @@ class Executor {
|
||||||
* Most importantly, this function initializes `InputState.usage` and
|
* Most importantly, this function initializes `InputState.usage` and
|
||||||
* `OutputState.potential_target_sockets`.
|
* `OutputState.potential_target_sockets`.
|
||||||
*/
|
*/
|
||||||
void initialize_static_value_usages(const Span<const FunctionNode *> side_effect_nodes)
|
void initialize_static_value_usages(const Span<const FunctionNode *> side_effect_nodes,
|
||||||
|
LocalAllocator &allocator)
|
||||||
{
|
{
|
||||||
const Span<const Node *> all_nodes = self_.graph_.nodes();
|
const Span<const Node *> all_nodes = self_.graph_.nodes();
|
||||||
|
|
||||||
/* Used for a search through all nodes that outputs depend on. */
|
/* Used for a search through all nodes that outputs depend on. */
|
||||||
Stack<const Node *> reachable_nodes_to_check;
|
Stack<const Node *, 16, LocalAllocatorRef> reachable_nodes_to_check{allocator};
|
||||||
Array<bool> reachable_node_flags(all_nodes.size(), false);
|
Array<bool, 16, LocalAllocatorRef> reachable_node_flags{all_nodes.size(), false, allocator};
|
||||||
|
|
||||||
/* Graph outputs are always reachable. */
|
/* Graph outputs are always reachable. */
|
||||||
for (const InputSocket *socket : self_.graph_outputs_) {
|
for (const InputSocket *socket : self_.graph_outputs_) {
|
||||||
|
@ -586,7 +573,7 @@ class Executor {
|
||||||
|
|
||||||
void forward_newly_provided_inputs(CurrentTask ¤t_task)
|
void forward_newly_provided_inputs(CurrentTask ¤t_task)
|
||||||
{
|
{
|
||||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
LocalAllocator &allocator = this->get_local_allocator();
|
||||||
for (const int graph_input_index : self_.graph_inputs_.index_range()) {
|
for (const int graph_input_index : self_.graph_inputs_.index_range()) {
|
||||||
std::atomic<uint8_t> &was_loaded = loaded_inputs_[graph_input_index];
|
std::atomic<uint8_t> &was_loaded = loaded_inputs_[graph_input_index];
|
||||||
if (was_loaded.load()) {
|
if (was_loaded.load()) {
|
||||||
|
@ -605,7 +592,7 @@ class Executor {
|
||||||
}
|
}
|
||||||
|
|
||||||
void forward_newly_provided_input(CurrentTask ¤t_task,
|
void forward_newly_provided_input(CurrentTask ¤t_task,
|
||||||
LinearAllocator<> &allocator,
|
LocalAllocator &allocator,
|
||||||
const int graph_input_index,
|
const int graph_input_index,
|
||||||
void *input_data)
|
void *input_data)
|
||||||
{
|
{
|
||||||
|
@ -621,7 +608,6 @@ class Executor {
|
||||||
const Node &node = socket.node();
|
const Node &node = socket.node();
|
||||||
const int index_in_node = socket.index();
|
const int index_in_node = socket.index();
|
||||||
NodeState &node_state = *node_states_[node.index_in_graph()];
|
NodeState &node_state = *node_states_[node.index_in_graph()];
|
||||||
OutputState &output_state = node_state.outputs[index_in_node];
|
|
||||||
|
|
||||||
/* The notified output socket might be an input of the entire graph. In this case, notify the
|
/* The notified output socket might be an input of the entire graph. In this case, notify the
|
||||||
* caller that the input is required. */
|
* caller that the input is required. */
|
||||||
|
@ -640,12 +626,13 @@ class Executor {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
this->forward_newly_provided_input(
|
this->forward_newly_provided_input(
|
||||||
current_task, this->get_main_or_local_allocator(), graph_input_index, input_data);
|
current_task, this->get_local_allocator(), graph_input_index, input_data);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
BLI_assert(node.is_function());
|
BLI_assert(node.is_function());
|
||||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||||
|
OutputState &output_state = node_state.outputs[index_in_node];
|
||||||
if (output_state.usage == ValueUsage::Used) {
|
if (output_state.usage == ValueUsage::Used) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -659,9 +646,9 @@ class Executor {
|
||||||
const Node &node = socket.node();
|
const Node &node = socket.node();
|
||||||
const int index_in_node = socket.index();
|
const int index_in_node = socket.index();
|
||||||
NodeState &node_state = *node_states_[node.index_in_graph()];
|
NodeState &node_state = *node_states_[node.index_in_graph()];
|
||||||
OutputState &output_state = node_state.outputs[index_in_node];
|
|
||||||
|
|
||||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||||
|
OutputState &output_state = node_state.outputs[index_in_node];
|
||||||
output_state.potential_target_sockets -= 1;
|
output_state.potential_target_sockets -= 1;
|
||||||
if (output_state.potential_target_sockets == 0) {
|
if (output_state.potential_target_sockets == 0) {
|
||||||
BLI_assert(output_state.usage != ValueUsage::Unused);
|
BLI_assert(output_state.usage != ValueUsage::Unused);
|
||||||
|
@ -760,7 +747,7 @@ class Executor {
|
||||||
void run_node_task(const FunctionNode &node, CurrentTask ¤t_task)
|
void run_node_task(const FunctionNode &node, CurrentTask ¤t_task)
|
||||||
{
|
{
|
||||||
NodeState &node_state = *node_states_[node.index_in_graph()];
|
NodeState &node_state = *node_states_[node.index_in_graph()];
|
||||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
LocalAllocator &allocator = this->get_local_allocator();
|
||||||
const LazyFunction &fn = node.function();
|
const LazyFunction &fn = node.function();
|
||||||
|
|
||||||
bool node_needs_execution = false;
|
bool node_needs_execution = false;
|
||||||
|
@ -799,6 +786,7 @@ class Executor {
|
||||||
node_state.always_used_inputs_requested = true;
|
node_state.always_used_inputs_requested = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool allow_missing_requested_inputs = fn.allow_missing_requested_inputs();
|
||||||
for (const int input_index : node_state.inputs.index_range()) {
|
for (const int input_index : node_state.inputs.index_range()) {
|
||||||
InputState &input_state = node_state.inputs[input_index];
|
InputState &input_state = node_state.inputs[input_index];
|
||||||
if (input_state.was_ready_for_execution) {
|
if (input_state.was_ready_for_execution) {
|
||||||
|
@ -808,7 +796,11 @@ class Executor {
|
||||||
input_state.was_ready_for_execution = true;
|
input_state.was_ready_for_execution = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!fn.allow_missing_requested_inputs()) {
|
const InputSocket &socket = node.input(input_index);
|
||||||
|
if (socket.origin() == nullptr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!allow_missing_requested_inputs) {
|
||||||
if (input_state.usage == ValueUsage::Used) {
|
if (input_state.usage == ValueUsage::Used) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -848,7 +840,7 @@ class Executor {
|
||||||
/* Importantly, the node must not be locked when it is executed. That would result in locks
|
/* Importantly, the node must not be locked when it is executed. That would result in locks
|
||||||
* being hold very long in some cases and results in multiple locks being hold by the same
|
* being hold very long in some cases and results in multiple locks being hold by the same
|
||||||
* thread in the same graph which can lead to deadlocks. */
|
* thread in the same graph which can lead to deadlocks. */
|
||||||
this->execute_node(node, node_state, current_task);
|
this->execute_node(node, node_state, current_task, allocator);
|
||||||
}
|
}
|
||||||
|
|
||||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||||
|
@ -857,7 +849,7 @@ class Executor {
|
||||||
this->assert_expected_outputs_have_been_computed(locked_node);
|
this->assert_expected_outputs_have_been_computed(locked_node);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
this->finish_node_if_possible(locked_node);
|
this->finish_node_if_possible(locked_node, allocator);
|
||||||
const bool reschedule_requested = node_state.schedule_state ==
|
const bool reschedule_requested = node_state.schedule_state ==
|
||||||
NodeScheduleState::RunningAndRescheduled;
|
NodeScheduleState::RunningAndRescheduled;
|
||||||
node_state.schedule_state = NodeScheduleState::NotScheduled;
|
node_state.schedule_state = NodeScheduleState::NotScheduled;
|
||||||
|
@ -895,7 +887,7 @@ class Executor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish_node_if_possible(LockedNode &locked_node)
|
void finish_node_if_possible(LockedNode &locked_node, LocalAllocator &allocator)
|
||||||
{
|
{
|
||||||
const Node &node = locked_node.node;
|
const Node &node = locked_node.node;
|
||||||
NodeState &node_state = locked_node.node_state;
|
NodeState &node_state = locked_node.node_state;
|
||||||
|
@ -923,44 +915,44 @@ class Executor {
|
||||||
const InputSocket &input_socket = node.input(input_index);
|
const InputSocket &input_socket = node.input(input_index);
|
||||||
InputState &input_state = node_state.inputs[input_index];
|
InputState &input_state = node_state.inputs[input_index];
|
||||||
if (input_state.usage == ValueUsage::Maybe) {
|
if (input_state.usage == ValueUsage::Maybe) {
|
||||||
this->set_input_unused(locked_node, input_socket);
|
this->set_input_unused(locked_node, input_socket, allocator);
|
||||||
}
|
|
||||||
else if (input_state.usage == ValueUsage::Used) {
|
|
||||||
this->destruct_input_value_if_exists(input_state, input_socket.type());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node_state.storage != nullptr) {
|
this->destruct_node_data(node, node_state, allocator);
|
||||||
if (node.is_function()) {
|
|
||||||
const FunctionNode &fn_node = static_cast<const FunctionNode &>(node);
|
|
||||||
fn_node.function().destruct_storage(node_state.storage);
|
|
||||||
}
|
|
||||||
node_state.storage = nullptr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void destruct_input_value_if_exists(InputState &input_state, const CPPType &type)
|
void destruct_input_value_if_exists(InputState &input_state,
|
||||||
|
const CPPType &type,
|
||||||
|
LocalAllocator &allocator)
|
||||||
{
|
{
|
||||||
if (input_state.value != nullptr) {
|
if (input_state.value != nullptr) {
|
||||||
type.destruct(input_state.value);
|
type.destruct(input_state.value);
|
||||||
|
allocator.deallocate(input_state.value, type.size(), type.alignment());
|
||||||
input_state.value = nullptr;
|
input_state.value = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void execute_node(const FunctionNode &node, NodeState &node_state, CurrentTask ¤t_task);
|
void execute_node(const FunctionNode &node,
|
||||||
|
NodeState &node_state,
|
||||||
|
CurrentTask ¤t_task,
|
||||||
|
LocalAllocator &allocator);
|
||||||
|
|
||||||
void set_input_unused_during_execution(const Node &node,
|
void set_input_unused_during_execution(const Node &node,
|
||||||
NodeState &node_state,
|
NodeState &node_state,
|
||||||
const int input_index,
|
const int input_index,
|
||||||
CurrentTask ¤t_task)
|
CurrentTask ¤t_task)
|
||||||
{
|
{
|
||||||
|
LocalAllocator &allocator = this->get_local_allocator();
|
||||||
const InputSocket &input_socket = node.input(input_index);
|
const InputSocket &input_socket = node.input(input_index);
|
||||||
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
this->with_locked_node(node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||||
this->set_input_unused(locked_node, input_socket);
|
this->set_input_unused(locked_node, input_socket, allocator);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_input_unused(LockedNode &locked_node, const InputSocket &input_socket)
|
void set_input_unused(LockedNode &locked_node,
|
||||||
|
const InputSocket &input_socket,
|
||||||
|
LocalAllocator &allocator)
|
||||||
{
|
{
|
||||||
NodeState &node_state = locked_node.node_state;
|
NodeState &node_state = locked_node.node_state;
|
||||||
const int input_index = input_socket.index();
|
const int input_index = input_socket.index();
|
||||||
|
@ -972,7 +964,7 @@ class Executor {
|
||||||
}
|
}
|
||||||
input_state.usage = ValueUsage::Unused;
|
input_state.usage = ValueUsage::Unused;
|
||||||
|
|
||||||
this->destruct_input_value_if_exists(input_state, input_socket.type());
|
this->destruct_input_value_if_exists(input_state, input_socket.type(), allocator);
|
||||||
if (input_state.was_ready_for_execution) {
|
if (input_state.was_ready_for_execution) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1026,7 +1018,7 @@ class Executor {
|
||||||
CurrentTask ¤t_task)
|
CurrentTask ¤t_task)
|
||||||
{
|
{
|
||||||
BLI_assert(value_to_forward.get() != nullptr);
|
BLI_assert(value_to_forward.get() != nullptr);
|
||||||
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
LocalAllocator &allocator = this->get_local_allocator();
|
||||||
const CPPType &type = *value_to_forward.type();
|
const CPPType &type = *value_to_forward.type();
|
||||||
|
|
||||||
if (self_.logger_ != nullptr) {
|
if (self_.logger_ != nullptr) {
|
||||||
|
@ -1038,17 +1030,7 @@ class Executor {
|
||||||
const Node &target_node = target_socket->node();
|
const Node &target_node = target_socket->node();
|
||||||
NodeState &node_state = *node_states_[target_node.index_in_graph()];
|
NodeState &node_state = *node_states_[target_node.index_in_graph()];
|
||||||
const int input_index = target_socket->index();
|
const int input_index = target_socket->index();
|
||||||
InputState &input_state = node_state.inputs[input_index];
|
|
||||||
const bool is_last_target = target_socket == targets.last();
|
const bool is_last_target = target_socket == targets.last();
|
||||||
#ifdef DEBUG
|
|
||||||
if (input_state.value != nullptr) {
|
|
||||||
if (self_.logger_ != nullptr) {
|
|
||||||
self_.logger_->dump_when_input_is_set_twice(*target_socket, from_socket, *context_);
|
|
||||||
}
|
|
||||||
BLI_assert_unreachable();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
BLI_assert(!input_state.was_ready_for_execution);
|
|
||||||
BLI_assert(target_socket->type() == type);
|
BLI_assert(target_socket->type() == type);
|
||||||
BLI_assert(target_socket->origin() == &from_socket);
|
BLI_assert(target_socket->origin() == &from_socket);
|
||||||
|
|
||||||
|
@ -1072,6 +1054,18 @@ class Executor {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
this->with_locked_node(target_node, node_state, current_task, [&](LockedNode &locked_node) {
|
this->with_locked_node(target_node, node_state, current_task, [&](LockedNode &locked_node) {
|
||||||
|
InputState &input_state = node_state.inputs[input_index];
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
if (input_state.value != nullptr) {
|
||||||
|
if (self_.logger_ != nullptr) {
|
||||||
|
self_.logger_->dump_when_input_is_set_twice(*target_socket, from_socket, *context_);
|
||||||
|
}
|
||||||
|
BLI_assert_unreachable();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
BLI_assert(!input_state.was_ready_for_execution);
|
||||||
|
|
||||||
if (input_state.usage == ValueUsage::Unused) {
|
if (input_state.usage == ValueUsage::Unused) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1089,6 +1083,7 @@ class Executor {
|
||||||
}
|
}
|
||||||
if (value_to_forward.get() != nullptr) {
|
if (value_to_forward.get() != nullptr) {
|
||||||
value_to_forward.destruct();
|
value_to_forward.destruct();
|
||||||
|
allocator.deallocate(value_to_forward.get(), type.size(), type.alignment());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1145,23 +1140,10 @@ class Executor {
|
||||||
if (BLI_system_thread_count() <= 1) {
|
if (BLI_system_thread_count() <= 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
this->ensure_thread_locals();
|
|
||||||
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
|
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ensure_thread_locals()
|
|
||||||
{
|
|
||||||
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
|
||||||
if (current_main_thread_ != std::this_thread::get_id()) {
|
|
||||||
BLI_assert_unreachable();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
if (!thread_locals_) {
|
|
||||||
thread_locals_ = std::make_unique<threading::EnumerableThreadSpecific<ThreadLocalData>>();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allow other threads to steal all the nodes that are currently scheduled on this thread.
|
* Allow other threads to steal all the nodes that are currently scheduled on this thread.
|
||||||
*/
|
*/
|
||||||
|
@ -1194,12 +1176,12 @@ class Executor {
|
||||||
[](TaskPool * /*pool*/, void *data) { MEM_delete(static_cast<ScheduledNodes *>(data)); });
|
[](TaskPool * /*pool*/, void *data) { MEM_delete(static_cast<ScheduledNodes *>(data)); });
|
||||||
}
|
}
|
||||||
|
|
||||||
LinearAllocator<> &get_main_or_local_allocator()
|
LocalAllocator &get_local_allocator()
|
||||||
{
|
{
|
||||||
if (this->use_multi_threading()) {
|
if (this->use_multi_threading()) {
|
||||||
return thread_locals_->local().allocator;
|
return context_->allocator->local();
|
||||||
}
|
}
|
||||||
return main_allocator_;
|
return *context_->allocator;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1248,7 +1230,7 @@ class GraphExecutorLFParams final : public Params {
|
||||||
OutputState &output_state = node_state_.outputs[index];
|
OutputState &output_state = node_state_.outputs[index];
|
||||||
BLI_assert(!output_state.has_been_computed);
|
BLI_assert(!output_state.has_been_computed);
|
||||||
if (output_state.value == nullptr) {
|
if (output_state.value == nullptr) {
|
||||||
LinearAllocator<> &allocator = executor_.get_main_or_local_allocator();
|
LocalAllocator &allocator = executor_.get_local_allocator();
|
||||||
const CPPType &type = node_.output(index).type();
|
const CPPType &type = node_.output(index).type();
|
||||||
output_state.value = allocator.allocate(type.size(), type.alignment());
|
output_state.value = allocator.allocate(type.size(), type.alignment());
|
||||||
}
|
}
|
||||||
|
@ -1297,13 +1279,15 @@ class GraphExecutorLFParams final : public Params {
|
||||||
*/
|
*/
|
||||||
inline void Executor::execute_node(const FunctionNode &node,
|
inline void Executor::execute_node(const FunctionNode &node,
|
||||||
NodeState &node_state,
|
NodeState &node_state,
|
||||||
CurrentTask ¤t_task)
|
CurrentTask ¤t_task,
|
||||||
|
LocalAllocator &allocator)
|
||||||
{
|
{
|
||||||
const LazyFunction &fn = node.function();
|
const LazyFunction &fn = node.function();
|
||||||
GraphExecutorLFParams node_params{fn, *this, node, node_state, current_task};
|
GraphExecutorLFParams node_params{fn, *this, node, node_state, current_task};
|
||||||
BLI_assert(context_ != nullptr);
|
BLI_assert(context_ != nullptr);
|
||||||
Context fn_context = *context_;
|
Context fn_context = *context_;
|
||||||
fn_context.storage = node_state.storage;
|
fn_context.storage = node_state.storage;
|
||||||
|
fn_context.allocator = &allocator;
|
||||||
|
|
||||||
if (self_.logger_ != nullptr) {
|
if (self_.logger_ != nullptr) {
|
||||||
self_.logger_->log_before_node_execute(node, node_params, fn_context);
|
self_.logger_->log_before_node_execute(node, node_params, fn_context);
|
||||||
|
@ -1330,12 +1314,32 @@ inline void Executor::execute_node(const FunctionNode &node,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GraphExecutor::preprocess(const Graph &graph, PreprocessData &r_preprocess_data)
|
||||||
|
{
|
||||||
|
const Span<const Node *> nodes = graph.nodes();
|
||||||
|
r_preprocess_data.offsets.reinitialize(nodes.size());
|
||||||
|
int offset = 0;
|
||||||
|
for (const int i : nodes.index_range()) {
|
||||||
|
const Node &node = *nodes[i];
|
||||||
|
NodeBufferOffsets &node_offsets = r_preprocess_data.offsets[i];
|
||||||
|
node_offsets.node = offset;
|
||||||
|
offset += sizeof(NodeState);
|
||||||
|
node_offsets.inputs = offset;
|
||||||
|
offset += sizeof(InputState) * node.inputs().size();
|
||||||
|
node_offsets.outputs = offset;
|
||||||
|
offset += sizeof(OutputState) * node.outputs().size();
|
||||||
|
}
|
||||||
|
r_preprocess_data.node_state_buffer_size = offset;
|
||||||
|
}
|
||||||
|
|
||||||
GraphExecutor::GraphExecutor(const Graph &graph,
|
GraphExecutor::GraphExecutor(const Graph &graph,
|
||||||
const Span<const OutputSocket *> graph_inputs,
|
const Span<const OutputSocket *> graph_inputs,
|
||||||
const Span<const InputSocket *> graph_outputs,
|
const Span<const InputSocket *> graph_outputs,
|
||||||
|
const PreprocessData &preprocess_data,
|
||||||
const Logger *logger,
|
const Logger *logger,
|
||||||
const SideEffectProvider *side_effect_provider)
|
const SideEffectProvider *side_effect_provider)
|
||||||
: graph_(graph),
|
: graph_(graph),
|
||||||
|
preprocess_data_(preprocess_data),
|
||||||
graph_inputs_(graph_inputs),
|
graph_inputs_(graph_inputs),
|
||||||
graph_outputs_(graph_outputs),
|
graph_outputs_(graph_outputs),
|
||||||
logger_(logger),
|
logger_(logger),
|
||||||
|
@ -1360,15 +1364,17 @@ void GraphExecutor::execute_impl(Params ¶ms, const Context &context) const
|
||||||
executor.execute(params, context);
|
executor.execute(params, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *GraphExecutor::init_storage(LinearAllocator<> &allocator) const
|
void *GraphExecutor::init_storage(LocalAllocator &allocator) const
|
||||||
{
|
{
|
||||||
Executor &executor = *allocator.construct<Executor>(*this).release();
|
Executor &executor = allocator.allocate_new<Executor>(*this);
|
||||||
return &executor;
|
return &executor;
|
||||||
}
|
}
|
||||||
|
|
||||||
void GraphExecutor::destruct_storage(void *storage) const
|
void GraphExecutor::destruct_storage(void *storage, LocalAllocator &allocator) const
|
||||||
{
|
{
|
||||||
std::destroy_at(static_cast<Executor *>(storage));
|
Executor *executor = static_cast<Executor *>(storage);
|
||||||
|
executor->destruct_state(allocator);
|
||||||
|
allocator.destruct_free(executor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GraphExecutorLogger::log_socket_value(const Socket &socket,
|
void GraphExecutorLogger::log_socket_value(const Socket &socket,
|
||||||
|
|
|
@ -105,7 +105,11 @@ TEST(lazy_function, SideEffects)
|
||||||
|
|
||||||
SimpleSideEffectProvider side_effect_provider{{&store_node}};
|
SimpleSideEffectProvider side_effect_provider{{&store_node}};
|
||||||
|
|
||||||
GraphExecutor executor_fn{graph, {&input_node.output(0)}, {}, nullptr, &side_effect_provider};
|
GraphExecutor::PreprocessData preprocess_data;
|
||||||
|
GraphExecutor::preprocess(graph, preprocess_data);
|
||||||
|
|
||||||
|
GraphExecutor executor_fn{
|
||||||
|
graph, {&input_node.output(0)}, {}, preprocess_data, nullptr, &side_effect_provider};
|
||||||
execute_lazy_function_eagerly(executor_fn, nullptr, std::make_tuple(5), std::make_tuple());
|
execute_lazy_function_eagerly(executor_fn, nullptr, std::make_tuple(5), std::make_tuple());
|
||||||
|
|
||||||
EXPECT_EQ(dst1, 15);
|
EXPECT_EQ(dst1, 15);
|
||||||
|
@ -167,8 +171,11 @@ TEST(lazy_function, GraphWithCycle)
|
||||||
|
|
||||||
graph.update_node_indices();
|
graph.update_node_indices();
|
||||||
|
|
||||||
|
GraphExecutor::PreprocessData preprocess_data;
|
||||||
|
GraphExecutor::preprocess(graph, preprocess_data);
|
||||||
|
|
||||||
GraphExecutor executor_fn{
|
GraphExecutor executor_fn{
|
||||||
graph, {&input_node.output(0)}, {&output_node.input(0)}, nullptr, nullptr};
|
graph, {&input_node.output(0)}, {&output_node.input(0)}, preprocess_data, nullptr, nullptr};
|
||||||
int result = 0;
|
int result = 0;
|
||||||
execute_lazy_function_eagerly(
|
execute_lazy_function_eagerly(
|
||||||
executor_fn, nullptr, std::make_tuple(10), std::make_tuple(&result));
|
executor_fn, nullptr, std::make_tuple(10), std::make_tuple(&result));
|
||||||
|
|
|
@ -1146,8 +1146,12 @@ static GeometrySet compute_geometry(
|
||||||
blender::nodes::GeometryNodesLazyFunctionLogger lf_logger(lf_graph_info);
|
blender::nodes::GeometryNodesLazyFunctionLogger lf_logger(lf_graph_info);
|
||||||
blender::nodes::GeometryNodesLazyFunctionSideEffectProvider lf_side_effect_provider;
|
blender::nodes::GeometryNodesLazyFunctionSideEffectProvider lf_side_effect_provider;
|
||||||
|
|
||||||
lf::GraphExecutor graph_executor{
|
lf::GraphExecutor graph_executor{lf_graph_info.graph,
|
||||||
lf_graph_info.graph, graph_inputs, graph_outputs, &lf_logger, &lf_side_effect_provider};
|
graph_inputs,
|
||||||
|
graph_outputs,
|
||||||
|
lf_graph_info.graph_preprocess_data,
|
||||||
|
&lf_logger,
|
||||||
|
&lf_side_effect_provider};
|
||||||
|
|
||||||
blender::nodes::GeoNodesModifierData geo_nodes_modifier_data;
|
blender::nodes::GeoNodesModifierData geo_nodes_modifier_data;
|
||||||
geo_nodes_modifier_data.depsgraph = ctx->depsgraph;
|
geo_nodes_modifier_data.depsgraph = ctx->depsgraph;
|
||||||
|
@ -1169,7 +1173,9 @@ static GeometrySet compute_geometry(
|
||||||
blender::bke::ModifierComputeContext modifier_compute_context{nullptr, nmd->modifier.name};
|
blender::bke::ModifierComputeContext modifier_compute_context{nullptr, nmd->modifier.name};
|
||||||
user_data.compute_context = &modifier_compute_context;
|
user_data.compute_context = &modifier_compute_context;
|
||||||
|
|
||||||
blender::LinearAllocator<> allocator;
|
blender::LocalAllocatorSet allocator_set;
|
||||||
|
blender::LocalAllocator &allocator = allocator_set.local();
|
||||||
|
|
||||||
Vector<GMutablePointer> inputs_to_destruct;
|
Vector<GMutablePointer> inputs_to_destruct;
|
||||||
|
|
||||||
int input_index = -1;
|
int input_index = -1;
|
||||||
|
@ -1212,6 +1218,7 @@ static GeometrySet compute_geometry(
|
||||||
lf::Context lf_context;
|
lf::Context lf_context;
|
||||||
lf_context.storage = graph_executor.init_storage(allocator);
|
lf_context.storage = graph_executor.init_storage(allocator);
|
||||||
lf_context.user_data = &user_data;
|
lf_context.user_data = &user_data;
|
||||||
|
lf_context.allocator = &allocator;
|
||||||
lf::BasicParams lf_params{graph_executor,
|
lf::BasicParams lf_params{graph_executor,
|
||||||
param_inputs,
|
param_inputs,
|
||||||
param_outputs,
|
param_outputs,
|
||||||
|
@ -1219,7 +1226,7 @@ static GeometrySet compute_geometry(
|
||||||
param_output_usages,
|
param_output_usages,
|
||||||
param_set_outputs};
|
param_set_outputs};
|
||||||
graph_executor.execute(lf_params, lf_context);
|
graph_executor.execute(lf_params, lf_context);
|
||||||
graph_executor.destruct_storage(lf_context.storage);
|
graph_executor.destruct_storage(lf_context.storage, allocator);
|
||||||
|
|
||||||
for (GMutablePointer &ptr : inputs_to_destruct) {
|
for (GMutablePointer &ptr : inputs_to_destruct) {
|
||||||
ptr.destruct();
|
ptr.destruct();
|
||||||
|
@ -1289,6 +1296,7 @@ static void modifyGeometry(ModifierData *md,
|
||||||
const ModifierEvalContext *ctx,
|
const ModifierEvalContext *ctx,
|
||||||
GeometrySet &geometry_set)
|
GeometrySet &geometry_set)
|
||||||
{
|
{
|
||||||
|
SCOPED_TIMER_AVERAGED(__func__);
|
||||||
NodesModifierData *nmd = reinterpret_cast<NodesModifierData *>(md);
|
NodesModifierData *nmd = reinterpret_cast<NodesModifierData *>(md);
|
||||||
if (nmd->node_group == nullptr) {
|
if (nmd->node_group == nullptr) {
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -187,6 +187,7 @@ struct GeometryNodesLazyFunctionGraphInfo {
|
||||||
* Mappings between the lazy-function graph and the #bNodeTree.
|
* Mappings between the lazy-function graph and the #bNodeTree.
|
||||||
*/
|
*/
|
||||||
GeometryNodeLazyFunctionGraphMapping mapping;
|
GeometryNodeLazyFunctionGraphMapping mapping;
|
||||||
|
lf::GraphExecutor::PreprocessData graph_preprocess_data;
|
||||||
/**
|
/**
|
||||||
* Approximate number of nodes in the graph if all sub-graphs were inlined.
|
* Approximate number of nodes in the graph if all sub-graphs were inlined.
|
||||||
* This can be used as a simple heuristic for the complexity of the node group.
|
* This can be used as a simple heuristic for the complexity of the node group.
|
||||||
|
|
|
@ -769,6 +769,7 @@ class LazyFunctionForGroupNode : public LazyFunction {
|
||||||
graph_executor_.emplace(lf_graph_info.graph,
|
graph_executor_.emplace(lf_graph_info.graph,
|
||||||
std::move(graph_inputs),
|
std::move(graph_inputs),
|
||||||
std::move(graph_outputs),
|
std::move(graph_outputs),
|
||||||
|
lf_graph_info.graph_preprocess_data,
|
||||||
&*lf_logger_,
|
&*lf_logger_,
|
||||||
&*lf_side_effect_provider_);
|
&*lf_side_effect_provider_);
|
||||||
}
|
}
|
||||||
|
@ -805,18 +806,18 @@ class LazyFunctionForGroupNode : public LazyFunction {
|
||||||
graph_executor_->execute(params, group_context);
|
graph_executor_->execute(params, group_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *init_storage(LinearAllocator<> &allocator) const override
|
void *init_storage(LocalAllocator &allocator) const override
|
||||||
{
|
{
|
||||||
Storage *s = allocator.construct<Storage>().release();
|
Storage &s = allocator.allocate_new<Storage>();
|
||||||
s->graph_executor_storage = graph_executor_->init_storage(allocator);
|
s.graph_executor_storage = graph_executor_->init_storage(allocator);
|
||||||
return s;
|
return &s;
|
||||||
}
|
}
|
||||||
|
|
||||||
void destruct_storage(void *storage) const override
|
void destruct_storage(void *storage, LocalAllocator &allocator) const override
|
||||||
{
|
{
|
||||||
Storage *s = static_cast<Storage *>(storage);
|
Storage *s = static_cast<Storage *>(storage);
|
||||||
graph_executor_->destruct_storage(s->graph_executor_storage);
|
graph_executor_->destruct_storage(s->graph_executor_storage, allocator);
|
||||||
std::destroy_at(s);
|
allocator.destruct_free(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string name() const override
|
std::string name() const override
|
||||||
|
@ -1243,6 +1244,7 @@ struct GeometryNodesLazyFunctionGraphBuilder {
|
||||||
|
|
||||||
lf_graph_->update_node_indices();
|
lf_graph_->update_node_indices();
|
||||||
lf_graph_info_->num_inline_nodes_approximate += lf_graph_->nodes().size();
|
lf_graph_info_->num_inline_nodes_approximate += lf_graph_->nodes().size();
|
||||||
|
lf::GraphExecutor::preprocess(*lf_graph_, lf_graph_info_->graph_preprocess_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Reference in New Issue