WIP: Functions: new local allocator for better memory reuse and performance #104630

Draft
Jacques Lucke wants to merge 44 commits from JacquesLucke/blender:local-allocator into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
3 changed files with 58 additions and 21 deletions
Showing only changes of commit a48c1eb20d - Show all commits

View File

@ -22,6 +22,7 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
struct BufferStack {
int64_t element_size = -1;
int64_t min_alignment = -1;
Stack<void *, 0> stack;
};
@ -32,7 +33,9 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
LocalPool()
{
for (const int64_t i : IndexRange(small_stacks_.size())) {
small_stacks_[i].element_size = 8 * (i + 1);
BufferStack &buffer_stack = small_stacks_[i];
buffer_stack.element_size = 8 * (i + 1);
buffer_stack.min_alignment = power_of_2_min_u(buffer_stack.element_size);
}
}
@ -42,24 +45,33 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
void *allocate(const int64_t size, const int64_t alignment)
{
BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment);
BLI_assert(size > 0);
BLI_assert(alignment <= size && alignment <= s_alignment);
BufferStack &buffer_stack = this->get_buffer_stack(size, alignment);
BLI_assert(buffer_stack.element_size >= size);
BLI_assert(buffer_stack.min_alignment >= alignment);
void *buffer;
if (!buffer_stack.stack.is_empty()) {
void *buffer = buffer_stack.stack.pop();
buffer = buffer_stack.stack.pop();
BLI_asan_unpoison(buffer, size);
return buffer;
}
if (size <= 4096) {
return linear_allocator_.allocate(size, alignment);
else if (size <= 4096) {
buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.min_alignment);
}
return linear_allocator_.allocate(size_t(size),
std::max<size_t>(s_alignment, size_t(alignment)));
else {
buffer = linear_allocator_.allocate(size_t(size),
std::max<size_t>(s_alignment, size_t(alignment)));
}
return buffer;
}
void deallocate(const void *buffer, const int64_t size, const int64_t alignment)
{
BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment);
BLI_assert(size > 0);
BLI_assert(alignment <= size && alignment <= s_alignment);
#ifdef DEBUG
memset(const_cast<void *>(buffer), -1, size);
#endif
@ -78,6 +90,9 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
template<typename T> MutableSpan<T> allocate_array(int64_t size)
{
if (size == 0) {
return {};
}
T *array = static_cast<T *>(this->allocate(sizeof(T) * size, alignof(T)));
return MutableSpan<T>(array, size);
}
@ -92,11 +107,31 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
return array;
}
template<typename T> void destruct_array(Span<T> data)
{
if (data.is_empty()) {
return;
}
destruct_n(const_cast<T *>(data.data()), data.size());
this->deallocate(data.data(), data.size() * sizeof(T), alignof(T));
}
template<typename T> void destruct_array(MutableSpan<T> data)
{
this->destruct_array(data.as_span());
}
template<typename T> void destruct(const T *value)
{
std::destroy_at(value);
this->deallocate(value, sizeof(T), alignof(T));
}
private:
BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/)
{
if (size <= 64) {
return small_stacks_[(size - (size != 0)) >> 3];
return small_stacks_[(size - 1) >> 3];
}
if (!large_stacks_) {
large_stacks_ = std::make_unique<Map<int, BufferStack>>();
@ -105,6 +140,7 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
return large_stacks_->lookup_or_add_cb(key, [&]() {
BufferStack buffer_stack;
buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
buffer_stack.min_alignment = s_alignment;
return buffer_stack;
});
}
@ -115,6 +151,10 @@ class LocalMemoryPools {
threading::EnumerableThreadSpecific<LocalPool<>> pool_by_thread_;
public:
~LocalMemoryPools()
{
}
LocalPool<> &local()
{
return pool_by_thread_.local();

View File

@ -262,7 +262,7 @@ class Executor {
BLI_assert(self_.graph_.node_indices_are_valid());
}
void destruct_self(Pools &pools)
void destruct_state(Pools &pools)
{
if (TaskPool *task_pool = task_pool_.load()) {
BLI_task_pool_free(task_pool);
@ -276,7 +276,6 @@ class Executor {
this->destruct_node_state(node, node_state, sub_pools);
}
});
this->~Executor();
}
/**
@ -377,7 +376,9 @@ class Executor {
const InputSocket &input_socket = node.input(i);
this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local);
}
std::destroy_at(&node_state);
pools.local->destruct_array(node_state.inputs);
pools.local->destruct_array(node_state.outputs);
pools.local->destruct(&node_state);
}
void schedule_newly_requested_outputs(CurrentTask &current_task)
@ -447,10 +448,7 @@ class Executor {
/* Used for a search through all nodes that outputs depend on. */
Stack<const Node *, 100> reachable_nodes_to_check;
MutableSpan<bool> reachable_node_flags = allocator.allocate_array<bool>(all_nodes.size());
BLI_SCOPED_DEFER([&]() {
allocator.deallocate(
reachable_node_flags.data(), reachable_node_flags.size() * sizeof(bool), alignof(bool));
});
BLI_SCOPED_DEFER([&]() { allocator.destruct_array(reachable_node_flags); });
reachable_node_flags.fill(false);
/* Graph outputs are always reachable. */
@ -1316,8 +1314,8 @@ void *GraphExecutor::init_storage(Pools &pools) const
void GraphExecutor::destruct_storage(void *storage, Pools &pools) const
{
Executor *executor = static_cast<Executor *>(storage);
executor->destruct_self(pools);
pools.local->deallocate(executor, sizeof(Executor), alignof(Executor));
executor->destruct_state(pools);
pools.local->destruct(executor);
}
void GraphExecutorLogger::log_socket_value(const Socket &socket,

View File

@ -700,8 +700,7 @@ class LazyFunctionForGroupNode : public LazyFunction {
{
Storage *s = static_cast<Storage *>(storage);
graph_executor_->destruct_storage(s->graph_executor_storage, pools);
std::destroy_at(s);
pools.local->deallocate(storage, sizeof(Storage), alignof(Storage));
pools.local->destruct(s);
}
};