WIP: Functions: new local allocator for better memory reuse and performance #104630

Draft
Jacques Lucke wants to merge 44 commits from JacquesLucke/blender:local-allocator into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
2 changed files with 80 additions and 8 deletions
Showing only changes of commit ff3273357f - Show all commits

View File

@ -29,6 +29,12 @@ class LocalAllocator : NonCopyable, NonMovable {
int64_t alignment = -1;
};
struct Head {
int64_t buffer_size;
int64_t buffer_alignment;
};
static_assert(is_power_of_2_constexpr(sizeof(Head)));
std::array<BufferStack, 8> small_stacks_;
Map<int, BufferStack> large_stacks_;
@ -41,20 +47,24 @@ class LocalAllocator : NonCopyable, NonMovable {
bool is_local() const;
LocalAllocator &local();
LocalAllocatorSet &owner_set();
void *allocate(const int64_t size, const int64_t alignment);
void deallocate(const void *buffer, const int64_t size, const int64_t alignment);
void *allocate(int64_t size, int64_t alignment);
void deallocate(const void *buffer, int64_t size, int64_t alignment);
void *allocate_with_head(int64_t size, int64_t alignment);
void deallocate_with_head(const void *buffer);
template<typename T, typename... Args> T &allocate_new(Args &&...args);
template<typename T, typename... Args> void destruct_free(const T *value);
template<typename T> MutableSpan<T> allocate_array(const int64_t size);
template<typename T> MutableSpan<T> allocate_array(int64_t size);
template<typename T, typename... Args>
MutableSpan<T> allocate_new_array(const int64_t size, Args &&...args);
MutableSpan<T> allocate_new_array(int64_t size, Args &&...args);
template<typename T> void destruct_free_array(Span<T> data);
template<typename T> void destruct_free_array(MutableSpan<T> data);
private:
BufferStack &get_buffer_stack(const int64_t size, const int64_t alignment);
BufferStack &get_buffer_stack(int64_t size, int64_t alignment);
};
class LocalAllocatorSet : NonCopyable, NonMovable {
@ -68,6 +78,48 @@ class LocalAllocatorSet : NonCopyable, NonMovable {
LocalAllocator &local();
};
class ThreadedLocalAllocatorRef {
private:
LocalAllocatorSet &allocator_set_;
public:
ThreadedLocalAllocatorRef(LocalAllocator &allocator) : allocator_set_(allocator.owner_set())
{
}
void *allocate(const size_t size, const size_t alignment, const char * /*name*/)
{
LocalAllocator &allocator = allocator_set_.local();
return allocator.allocate_with_head(size, alignment);
}
void deallocate(void *ptr)
{
LocalAllocator &allocator = allocator_set_.local();
allocator.deallocate_with_head(ptr);
}
};
class LocalAllocatorRef {
private:
LocalAllocator &allocator_;
public:
LocalAllocatorRef(LocalAllocator &allocator) : allocator_(allocator)
{
}
void *allocate(const size_t size, const size_t alignment, const char * /*name*/)
{
return allocator_.allocate_with_head(size, alignment);
}
void deallocate(void *ptr)
{
allocator_.deallocate_with_head(ptr);
}
};
inline bool LocalAllocator::is_local() const
{
return this == &owner_set_.local();
@ -78,6 +130,11 @@ inline LocalAllocator &LocalAllocator::local()
return owner_set_.local();
}
inline LocalAllocatorSet &LocalAllocator::owner_set()
{
return owner_set_;
}
inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment)
{
BLI_assert(size > 0);
@ -138,6 +195,23 @@ inline LocalAllocator::BufferStack &LocalAllocator::get_buffer_stack(const int64
});
}
inline void *LocalAllocator::allocate_with_head(int64_t size, int64_t alignment)
{
const int64_t buffer_size = size + std::max<int64_t>(alignment, sizeof(Head));
const int64_t buffer_alignment = std::max<int64_t>(alignment, alignof(Head));
void *buffer = this->allocate(buffer_size, buffer_alignment);
Head *head = new (buffer) Head;
head->buffer_size = buffer_size;
head->buffer_alignment = buffer_alignment;
return head + 1;
}
inline void LocalAllocator::deallocate_with_head(const void *buffer)
{
const Head *head = static_cast<const Head *>(buffer) - 1;
this->deallocate(head, head->buffer_size, head->buffer_alignment);
}
template<typename T, typename... Args> inline T &LocalAllocator::allocate_new(Args &&...args)
{
void *buffer = this->allocate(sizeof(T), alignof(T));

View File

@ -444,9 +444,7 @@ class Executor {
/* Used for a search through all nodes that outputs depend on. */
Stack<const Node *, 100> reachable_nodes_to_check;
MutableSpan<bool> reachable_node_flags = allocator.allocate_new_array<bool>(all_nodes.size());
BLI_SCOPED_DEFER([&]() { allocator.destruct_free_array(reachable_node_flags); });
reachable_node_flags.fill(false);
Array<bool, 16, LocalAllocatorRef> reachable_node_flags{all_nodes.size(), false, allocator};
/* Graph outputs are always reachable. */
for (const InputSocket *socket : self_.graph_outputs_) {