WIP: Functions: new local allocator for better memory reuse and performance #104630

Draft
Jacques Lucke wants to merge 44 commits from JacquesLucke/blender:local-allocator into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
3 changed files with 58 additions and 21 deletions
Showing only changes of commit a48c1eb20d - Show all commits

View File

@ -22,6 +22,7 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
struct BufferStack { struct BufferStack {
int64_t element_size = -1; int64_t element_size = -1;
int64_t min_alignment = -1;
Stack<void *, 0> stack; Stack<void *, 0> stack;
}; };
@ -32,7 +33,9 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
LocalPool() LocalPool()
{ {
for (const int64_t i : IndexRange(small_stacks_.size())) { for (const int64_t i : IndexRange(small_stacks_.size())) {
small_stacks_[i].element_size = 8 * (i + 1); BufferStack &buffer_stack = small_stacks_[i];
buffer_stack.element_size = 8 * (i + 1);
buffer_stack.min_alignment = power_of_2_min_u(buffer_stack.element_size);
} }
} }
@ -42,24 +45,33 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
void *allocate(const int64_t size, const int64_t alignment) void *allocate(const int64_t size, const int64_t alignment)
{ {
BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); BLI_assert(size > 0);
BLI_assert(alignment <= size && alignment <= s_alignment);
BufferStack &buffer_stack = this->get_buffer_stack(size, alignment); BufferStack &buffer_stack = this->get_buffer_stack(size, alignment);
BLI_assert(buffer_stack.element_size >= size); BLI_assert(buffer_stack.element_size >= size);
BLI_assert(buffer_stack.min_alignment >= alignment);
void *buffer;
if (!buffer_stack.stack.is_empty()) { if (!buffer_stack.stack.is_empty()) {
void *buffer = buffer_stack.stack.pop(); buffer = buffer_stack.stack.pop();
BLI_asan_unpoison(buffer, size); BLI_asan_unpoison(buffer, size);
return buffer;
} }
if (size <= 4096) { else if (size <= 4096) {
return linear_allocator_.allocate(size, alignment); buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.min_alignment);
} }
return linear_allocator_.allocate(size_t(size), else {
std::max<size_t>(s_alignment, size_t(alignment))); buffer = linear_allocator_.allocate(size_t(size),
std::max<size_t>(s_alignment, size_t(alignment)));
}
return buffer;
} }
void deallocate(const void *buffer, const int64_t size, const int64_t alignment) void deallocate(const void *buffer, const int64_t size, const int64_t alignment)
{ {
BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment); BLI_assert(size > 0);
BLI_assert(alignment <= size && alignment <= s_alignment);
#ifdef DEBUG #ifdef DEBUG
memset(const_cast<void *>(buffer), -1, size); memset(const_cast<void *>(buffer), -1, size);
#endif #endif
@ -78,6 +90,9 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
template<typename T> MutableSpan<T> allocate_array(int64_t size) template<typename T> MutableSpan<T> allocate_array(int64_t size)
{ {
if (size == 0) {
return {};
}
T *array = static_cast<T *>(this->allocate(sizeof(T) * size, alignof(T))); T *array = static_cast<T *>(this->allocate(sizeof(T) * size, alignof(T)));
return MutableSpan<T>(array, size); return MutableSpan<T>(array, size);
} }
@ -92,11 +107,31 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
return array; return array;
} }
template<typename T> void destruct_array(Span<T> data)
{
if (data.is_empty()) {
return;
}
destruct_n(const_cast<T *>(data.data()), data.size());
this->deallocate(data.data(), data.size() * sizeof(T), alignof(T));
}
template<typename T> void destruct_array(MutableSpan<T> data)
{
this->destruct_array(data.as_span());
}
template<typename T> void destruct(const T *value)
{
std::destroy_at(value);
this->deallocate(value, sizeof(T), alignof(T));
}
private: private:
BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/) BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/)
{ {
if (size <= 64) { if (size <= 64) {
return small_stacks_[(size - (size != 0)) >> 3]; return small_stacks_[(size - 1) >> 3];
} }
if (!large_stacks_) { if (!large_stacks_) {
large_stacks_ = std::make_unique<Map<int, BufferStack>>(); large_stacks_ = std::make_unique<Map<int, BufferStack>>();
@ -105,6 +140,7 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
return large_stacks_->lookup_or_add_cb(key, [&]() { return large_stacks_->lookup_or_add_cb(key, [&]() {
BufferStack buffer_stack; BufferStack buffer_stack;
buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key); buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
buffer_stack.min_alignment = s_alignment;
return buffer_stack; return buffer_stack;
}); });
} }
@ -115,6 +151,10 @@ class LocalMemoryPools {
threading::EnumerableThreadSpecific<LocalPool<>> pool_by_thread_; threading::EnumerableThreadSpecific<LocalPool<>> pool_by_thread_;
public: public:
~LocalMemoryPools()
{
}
LocalPool<> &local() LocalPool<> &local()
{ {
return pool_by_thread_.local(); return pool_by_thread_.local();

View File

@ -262,7 +262,7 @@ class Executor {
BLI_assert(self_.graph_.node_indices_are_valid()); BLI_assert(self_.graph_.node_indices_are_valid());
} }
void destruct_self(Pools &pools) void destruct_state(Pools &pools)
{ {
if (TaskPool *task_pool = task_pool_.load()) { if (TaskPool *task_pool = task_pool_.load()) {
BLI_task_pool_free(task_pool); BLI_task_pool_free(task_pool);
@ -276,7 +276,6 @@ class Executor {
this->destruct_node_state(node, node_state, sub_pools); this->destruct_node_state(node, node_state, sub_pools);
} }
}); });
this->~Executor();
} }
/** /**
@ -377,7 +376,9 @@ class Executor {
const InputSocket &input_socket = node.input(i); const InputSocket &input_socket = node.input(i);
this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local); this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local);
} }
std::destroy_at(&node_state); pools.local->destruct_array(node_state.inputs);
pools.local->destruct_array(node_state.outputs);
pools.local->destruct(&node_state);
} }
void schedule_newly_requested_outputs(CurrentTask &current_task) void schedule_newly_requested_outputs(CurrentTask &current_task)
@ -447,10 +448,7 @@ class Executor {
/* Used for a search through all nodes that outputs depend on. */ /* Used for a search through all nodes that outputs depend on. */
Stack<const Node *, 100> reachable_nodes_to_check; Stack<const Node *, 100> reachable_nodes_to_check;
MutableSpan<bool> reachable_node_flags = allocator.allocate_array<bool>(all_nodes.size()); MutableSpan<bool> reachable_node_flags = allocator.allocate_array<bool>(all_nodes.size());
BLI_SCOPED_DEFER([&]() { BLI_SCOPED_DEFER([&]() { allocator.destruct_array(reachable_node_flags); });
allocator.deallocate(
reachable_node_flags.data(), reachable_node_flags.size() * sizeof(bool), alignof(bool));
});
reachable_node_flags.fill(false); reachable_node_flags.fill(false);
/* Graph outputs are always reachable. */ /* Graph outputs are always reachable. */
@ -1316,8 +1314,8 @@ void *GraphExecutor::init_storage(Pools &pools) const
void GraphExecutor::destruct_storage(void *storage, Pools &pools) const void GraphExecutor::destruct_storage(void *storage, Pools &pools) const
{ {
Executor *executor = static_cast<Executor *>(storage); Executor *executor = static_cast<Executor *>(storage);
executor->destruct_self(pools); executor->destruct_state(pools);
pools.local->deallocate(executor, sizeof(Executor), alignof(Executor)); pools.local->destruct(executor);
} }
void GraphExecutorLogger::log_socket_value(const Socket &socket, void GraphExecutorLogger::log_socket_value(const Socket &socket,

View File

@ -700,8 +700,7 @@ class LazyFunctionForGroupNode : public LazyFunction {
{ {
Storage *s = static_cast<Storage *>(storage); Storage *s = static_cast<Storage *>(storage);
graph_executor_->destruct_storage(s->graph_executor_storage, pools); graph_executor_->destruct_storage(s->graph_executor_storage, pools);
std::destroy_at(s); pools.local->destruct(s);
pools.local->deallocate(storage, sizeof(Storage), alignof(Storage));
} }
}; };