WIP: Functions: new local allocator for better memory reuse and performance #104630
|
@ -22,6 +22,7 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
|
|||
|
||||
struct BufferStack {
|
||||
int64_t element_size = -1;
|
||||
int64_t min_alignment = -1;
|
||||
Stack<void *, 0> stack;
|
||||
};
|
||||
|
||||
|
@ -32,7 +33,9 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
|
|||
LocalPool()
|
||||
{
|
||||
for (const int64_t i : IndexRange(small_stacks_.size())) {
|
||||
small_stacks_[i].element_size = 8 * (i + 1);
|
||||
BufferStack &buffer_stack = small_stacks_[i];
|
||||
buffer_stack.element_size = 8 * (i + 1);
|
||||
buffer_stack.min_alignment = power_of_2_min_u(buffer_stack.element_size);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,24 +45,33 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
|
|||
|
||||
void *allocate(const int64_t size, const int64_t alignment)
|
||||
{
|
||||
BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment);
|
||||
BLI_assert(size > 0);
|
||||
BLI_assert(alignment <= size && alignment <= s_alignment);
|
||||
|
||||
BufferStack &buffer_stack = this->get_buffer_stack(size, alignment);
|
||||
BLI_assert(buffer_stack.element_size >= size);
|
||||
BLI_assert(buffer_stack.min_alignment >= alignment);
|
||||
|
||||
void *buffer;
|
||||
if (!buffer_stack.stack.is_empty()) {
|
||||
void *buffer = buffer_stack.stack.pop();
|
||||
buffer = buffer_stack.stack.pop();
|
||||
BLI_asan_unpoison(buffer, size);
|
||||
return buffer;
|
||||
}
|
||||
if (size <= 4096) {
|
||||
return linear_allocator_.allocate(size, alignment);
|
||||
else if (size <= 4096) {
|
||||
buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.min_alignment);
|
||||
}
|
||||
return linear_allocator_.allocate(size_t(size),
|
||||
std::max<size_t>(s_alignment, size_t(alignment)));
|
||||
else {
|
||||
buffer = linear_allocator_.allocate(size_t(size),
|
||||
std::max<size_t>(s_alignment, size_t(alignment)));
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void deallocate(const void *buffer, const int64_t size, const int64_t alignment)
|
||||
{
|
||||
BLI_assert((size == 0 || alignment <= size) && alignment <= s_alignment);
|
||||
BLI_assert(size > 0);
|
||||
BLI_assert(alignment <= size && alignment <= s_alignment);
|
||||
|
||||
#ifdef DEBUG
|
||||
memset(const_cast<void *>(buffer), -1, size);
|
||||
#endif
|
||||
|
@ -78,6 +90,9 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
|
|||
|
||||
template<typename T> MutableSpan<T> allocate_array(int64_t size)
|
||||
{
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
T *array = static_cast<T *>(this->allocate(sizeof(T) * size, alignof(T)));
|
||||
return MutableSpan<T>(array, size);
|
||||
}
|
||||
|
@ -92,11 +107,31 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
|
|||
return array;
|
||||
}
|
||||
|
||||
template<typename T> void destruct_array(Span<T> data)
|
||||
{
|
||||
if (data.is_empty()) {
|
||||
return;
|
||||
}
|
||||
destruct_n(const_cast<T *>(data.data()), data.size());
|
||||
this->deallocate(data.data(), data.size() * sizeof(T), alignof(T));
|
||||
}
|
||||
|
||||
template<typename T> void destruct_array(MutableSpan<T> data)
|
||||
{
|
||||
this->destruct_array(data.as_span());
|
||||
}
|
||||
|
||||
template<typename T> void destruct(const T *value)
|
||||
{
|
||||
std::destroy_at(value);
|
||||
this->deallocate(value, sizeof(T), alignof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
BufferStack &get_buffer_stack(const int64_t size, const int64_t /*alignment*/)
|
||||
{
|
||||
if (size <= 64) {
|
||||
return small_stacks_[(size - (size != 0)) >> 3];
|
||||
return small_stacks_[(size - 1) >> 3];
|
||||
}
|
||||
if (!large_stacks_) {
|
||||
large_stacks_ = std::make_unique<Map<int, BufferStack>>();
|
||||
|
@ -105,6 +140,7 @@ template<typename Allocator = GuardedAllocator> class LocalPool : NonCopyable, N
|
|||
return large_stacks_->lookup_or_add_cb(key, [&]() {
|
||||
BufferStack buffer_stack;
|
||||
buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
|
||||
buffer_stack.min_alignment = s_alignment;
|
||||
return buffer_stack;
|
||||
});
|
||||
}
|
||||
|
@ -115,6 +151,10 @@ class LocalMemoryPools {
|
|||
threading::EnumerableThreadSpecific<LocalPool<>> pool_by_thread_;
|
||||
|
||||
public:
|
||||
~LocalMemoryPools()
|
||||
{
|
||||
}
|
||||
|
||||
LocalPool<> &local()
|
||||
{
|
||||
return pool_by_thread_.local();
|
||||
|
|
|
@ -262,7 +262,7 @@ class Executor {
|
|||
BLI_assert(self_.graph_.node_indices_are_valid());
|
||||
}
|
||||
|
||||
void destruct_self(Pools &pools)
|
||||
void destruct_state(Pools &pools)
|
||||
{
|
||||
if (TaskPool *task_pool = task_pool_.load()) {
|
||||
BLI_task_pool_free(task_pool);
|
||||
|
@ -276,7 +276,6 @@ class Executor {
|
|||
this->destruct_node_state(node, node_state, sub_pools);
|
||||
}
|
||||
});
|
||||
this->~Executor();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -377,7 +376,9 @@ class Executor {
|
|||
const InputSocket &input_socket = node.input(i);
|
||||
this->destruct_input_value_if_exists(input_state, input_socket.type(), *pools.local);
|
||||
}
|
||||
std::destroy_at(&node_state);
|
||||
pools.local->destruct_array(node_state.inputs);
|
||||
pools.local->destruct_array(node_state.outputs);
|
||||
pools.local->destruct(&node_state);
|
||||
}
|
||||
|
||||
void schedule_newly_requested_outputs(CurrentTask ¤t_task)
|
||||
|
@ -447,10 +448,7 @@ class Executor {
|
|||
/* Used for a search through all nodes that outputs depend on. */
|
||||
Stack<const Node *, 100> reachable_nodes_to_check;
|
||||
MutableSpan<bool> reachable_node_flags = allocator.allocate_array<bool>(all_nodes.size());
|
||||
BLI_SCOPED_DEFER([&]() {
|
||||
allocator.deallocate(
|
||||
reachable_node_flags.data(), reachable_node_flags.size() * sizeof(bool), alignof(bool));
|
||||
});
|
||||
BLI_SCOPED_DEFER([&]() { allocator.destruct_array(reachable_node_flags); });
|
||||
reachable_node_flags.fill(false);
|
||||
|
||||
/* Graph outputs are always reachable. */
|
||||
|
@ -1316,8 +1314,8 @@ void *GraphExecutor::init_storage(Pools &pools) const
|
|||
void GraphExecutor::destruct_storage(void *storage, Pools &pools) const
|
||||
{
|
||||
Executor *executor = static_cast<Executor *>(storage);
|
||||
executor->destruct_self(pools);
|
||||
pools.local->deallocate(executor, sizeof(Executor), alignof(Executor));
|
||||
executor->destruct_state(pools);
|
||||
pools.local->destruct(executor);
|
||||
}
|
||||
|
||||
void GraphExecutorLogger::log_socket_value(const Socket &socket,
|
||||
|
|
|
@ -700,8 +700,7 @@ class LazyFunctionForGroupNode : public LazyFunction {
|
|||
{
|
||||
Storage *s = static_cast<Storage *>(storage);
|
||||
graph_executor_->destruct_storage(s->graph_executor_storage, pools);
|
||||
std::destroy_at(s);
|
||||
pools.local->deallocate(storage, sizeof(Storage), alignof(Storage));
|
||||
pools.local->destruct(s);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue