WIP: Functions: new local allocator for better memory reuse and performance #104630
|
@ -19,6 +19,17 @@
|
||||||
namespace blender {
|
namespace blender {
|
||||||
|
|
||||||
class LocalAllocatorSet;
|
class LocalAllocatorSet;
|
||||||
|
class LocalAllocator;
|
||||||
|
class LocalAllocatorPool;
|
||||||
|
|
||||||
|
class LocalAllocatorPool : NonCopyable, NonMovable {
|
||||||
|
private:
|
||||||
|
Stack<void *> buffers;
|
||||||
|
int64_t element_size = -1;
|
||||||
|
int64_t alignment = -1;
|
||||||
|
|
||||||
|
friend LocalAllocator;
|
||||||
|
};
|
||||||
|
|
||||||
class LocalAllocator : NonCopyable, NonMovable {
|
class LocalAllocator : NonCopyable, NonMovable {
|
||||||
private:
|
private:
|
||||||
|
@ -26,20 +37,14 @@ class LocalAllocator : NonCopyable, NonMovable {
|
||||||
LocalAllocatorSet &owner_set_;
|
LocalAllocatorSet &owner_set_;
|
||||||
LinearAllocator<> linear_allocator_;
|
LinearAllocator<> linear_allocator_;
|
||||||
|
|
||||||
struct BufferStack {
|
|
||||||
Stack<void *, 0> stack;
|
|
||||||
int64_t element_size = -1;
|
|
||||||
int64_t alignment = -1;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Head {
|
struct Head {
|
||||||
int64_t buffer_size;
|
int64_t buffer_size;
|
||||||
int64_t buffer_alignment;
|
int64_t buffer_alignment;
|
||||||
};
|
};
|
||||||
static_assert(is_power_of_2_constexpr(sizeof(Head)));
|
static_assert(is_power_of_2_constexpr(sizeof(Head)));
|
||||||
|
|
||||||
std::array<BufferStack, 8> small_stacks_;
|
std::array<LocalAllocatorPool, 8> small_buffer_pools_;
|
||||||
Map<int, BufferStack> large_stacks_;
|
Map<int, std::unique_ptr<LocalAllocatorPool>> large_buffer_pools_;
|
||||||
|
|
||||||
friend LocalAllocatorSet;
|
friend LocalAllocatorSet;
|
||||||
|
|
||||||
|
@ -55,9 +60,14 @@ class LocalAllocator : NonCopyable, NonMovable {
|
||||||
void *allocate(int64_t size, int64_t alignment);
|
void *allocate(int64_t size, int64_t alignment);
|
||||||
void deallocate(const void *buffer, int64_t size, int64_t alignment);
|
void deallocate(const void *buffer, int64_t size, int64_t alignment);
|
||||||
|
|
||||||
|
void *allocate(LocalAllocatorPool &pool);
|
||||||
|
void deallocate(const void *buffer, LocalAllocatorPool &pool);
|
||||||
|
|
||||||
void *allocate_with_head(int64_t size, int64_t alignment);
|
void *allocate_with_head(int64_t size, int64_t alignment);
|
||||||
void deallocate_with_head(const void *buffer);
|
void deallocate_with_head(const void *buffer);
|
||||||
|
|
||||||
|
LocalAllocatorPool &get_pool(int64_t size, int64_t alignment);
|
||||||
|
|
||||||
template<typename T, typename... Args> T &allocate_new(Args &&...args);
|
template<typename T, typename... Args> T &allocate_new(Args &&...args);
|
||||||
template<typename T, typename... Args> void destruct_free(const T *value);
|
template<typename T, typename... Args> void destruct_free(const T *value);
|
||||||
template<typename T> MutableSpan<T> allocate_array(int64_t size);
|
template<typename T> MutableSpan<T> allocate_array(int64_t size);
|
||||||
|
@ -65,9 +75,6 @@ class LocalAllocator : NonCopyable, NonMovable {
|
||||||
MutableSpan<T> allocate_new_array(int64_t size, Args &&...args);
|
MutableSpan<T> allocate_new_array(int64_t size, Args &&...args);
|
||||||
template<typename T> void destruct_free_array(Span<T> data);
|
template<typename T> void destruct_free_array(Span<T> data);
|
||||||
template<typename T> void destruct_free_array(MutableSpan<T> data);
|
template<typename T> void destruct_free_array(MutableSpan<T> data);
|
||||||
|
|
||||||
private:
|
|
||||||
BufferStack &get_buffer_stack(int64_t size, int64_t alignment);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
class LocalAllocatorSet : NonCopyable, NonMovable {
|
class LocalAllocatorSet : NonCopyable, NonMovable {
|
||||||
|
@ -76,7 +83,7 @@ class LocalAllocatorSet : NonCopyable, NonMovable {
|
||||||
|
|
||||||
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||||
std::mutex debug_sizes_mutex_;
|
std::mutex debug_sizes_mutex_;
|
||||||
Map<const void *, std::pair<int64_t, int64_t>> debug_sizes_;
|
Map<const void *, int64_t> debug_sizes_;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
friend LocalAllocator;
|
friend LocalAllocator;
|
||||||
|
@ -147,47 +154,53 @@ inline LocalAllocatorSet &LocalAllocator::owner_set()
|
||||||
|
|
||||||
BLI_NOINLINE inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment)
|
BLI_NOINLINE inline void *LocalAllocator::allocate(const int64_t size, const int64_t alignment)
|
||||||
{
|
{
|
||||||
BLI_assert(size > 0);
|
LocalAllocatorPool &pool = this->get_pool(size, alignment);
|
||||||
BLI_assert(alignment <= size);
|
BLI_assert(pool.element_size >= size);
|
||||||
BLI_assert(alignment <= s_alignment);
|
BLI_assert(pool.alignment >= alignment);
|
||||||
BLI_assert(is_power_of_2_i(alignment));
|
|
||||||
BLI_assert(this->is_local());
|
|
||||||
|
|
||||||
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
return this->allocate(pool);
|
||||||
return MEM_mallocN_aligned(size, alignment, __func__);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
BufferStack &buffer_stack = this->get_buffer_stack(size, alignment);
|
|
||||||
BLI_assert(buffer_stack.element_size >= size);
|
|
||||||
BLI_assert(buffer_stack.alignment >= alignment);
|
|
||||||
|
|
||||||
void *buffer;
|
|
||||||
if (!buffer_stack.stack.is_empty()) {
|
|
||||||
buffer = buffer_stack.stack.pop();
|
|
||||||
BLI_asan_unpoison(buffer, size);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
buffer = linear_allocator_.allocate(buffer_stack.element_size, buffer_stack.alignment);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
|
||||||
{
|
|
||||||
std::lock_guard lock{owner_set_.debug_sizes_mutex_};
|
|
||||||
owner_set_.debug_sizes_.add_new(buffer, {size, alignment});
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return buffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer,
|
BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer,
|
||||||
const int64_t size,
|
const int64_t size,
|
||||||
const int64_t alignment)
|
const int64_t alignment)
|
||||||
{
|
{
|
||||||
BLI_assert(size > 0);
|
LocalAllocatorPool &pool = this->get_pool(size, alignment);
|
||||||
BLI_assert(alignment <= size);
|
BLI_assert(pool.element_size >= size);
|
||||||
BLI_assert(alignment <= s_alignment);
|
BLI_assert(pool.alignment >= alignment);
|
||||||
BLI_assert(is_power_of_2_i(alignment));
|
|
||||||
|
this->deallocate(buffer, pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void *LocalAllocator::allocate(LocalAllocatorPool &pool)
|
||||||
|
{
|
||||||
|
BLI_assert(this->is_local());
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||||
|
return MEM_mallocN_aligned(size, alignment, __func__);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void *buffer;
|
||||||
|
if (!pool.buffers.is_empty()) {
|
||||||
|
buffer = pool.buffers.pop();
|
||||||
|
BLI_asan_unpoison(buffer, pool.element_size);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
buffer = linear_allocator_.allocate(pool.element_size, pool.alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BLI_LOCAL_ALLOCATOR_DEBUG_SIZES
|
||||||
|
{
|
||||||
|
std::lock_guard lock{owner_set_.debug_sizes_mutex_};
|
||||||
|
owner_set_.debug_sizes_.add_new(buffer, pool.element_size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void LocalAllocator::deallocate(const void *buffer, LocalAllocatorPool &pool)
|
||||||
|
{
|
||||||
BLI_assert(this->is_local());
|
BLI_assert(this->is_local());
|
||||||
|
|
||||||
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
#ifdef BLI_LOCAL_ALLOCATOR_USE_GUARDED
|
||||||
|
@ -210,29 +223,32 @@ BLI_NOINLINE inline void LocalAllocator::deallocate(const void *buffer,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
memset(const_cast<void *>(buffer), -1, size);
|
memset(const_cast<void *>(buffer), -1, pool.element_size);
|
||||||
#endif
|
#endif
|
||||||
BLI_asan_poison(buffer, size);
|
|
||||||
|
|
||||||
BufferStack &buffer_stack = this->get_buffer_stack(size, alignment);
|
BLI_asan_poison(buffer, pool.element_size);
|
||||||
BLI_assert(buffer_stack.element_size >= size);
|
|
||||||
BLI_assert(buffer_stack.alignment >= alignment);
|
|
||||||
|
|
||||||
buffer_stack.stack.push(const_cast<void *>(buffer));
|
pool.buffers.push(const_cast<void *>(buffer));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline LocalAllocator::BufferStack &LocalAllocator::get_buffer_stack(const int64_t size,
|
inline LocalAllocatorPool &LocalAllocator::get_pool(const int64_t size, const int64_t alignment)
|
||||||
const int64_t /*alignment*/)
|
|
||||||
{
|
{
|
||||||
|
BLI_assert(size > 0);
|
||||||
|
BLI_assert(alignment <= size);
|
||||||
|
BLI_assert(alignment <= s_alignment);
|
||||||
|
BLI_assert(is_power_of_2_i(alignment));
|
||||||
|
UNUSED_VARS_NDEBUG(alignment);
|
||||||
|
|
||||||
|
BLI_assert(this->is_local());
|
||||||
if (size <= 64) {
|
if (size <= 64) {
|
||||||
return small_stacks_[(size - 1) >> 3];
|
return small_buffer_pools_[(size - 1) >> 3];
|
||||||
}
|
}
|
||||||
const int key = bitscan_reverse_uint64(uint64_t(size));
|
const int key = bitscan_reverse_uint64(uint64_t(size));
|
||||||
return large_stacks_.lookup_or_add_cb(key, [&]() {
|
return *large_buffer_pools_.lookup_or_add_cb(key, [&]() {
|
||||||
BufferStack buffer_stack;
|
auto pool = std::make_unique<LocalAllocatorPool>();
|
||||||
buffer_stack.element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
|
pool->element_size = int64_t(1) << (8 * sizeof(int64_t) - key);
|
||||||
buffer_stack.alignment = s_alignment;
|
pool->alignment = s_alignment;
|
||||||
return buffer_stack;
|
return pool;
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,10 +13,10 @@ LocalAllocatorSet::~LocalAllocatorSet() = default;
|
||||||
|
|
||||||
LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set)
|
LocalAllocator::LocalAllocator(LocalAllocatorSet &owner_set) : owner_set_(owner_set)
|
||||||
{
|
{
|
||||||
for (const int64_t i : IndexRange(small_stacks_.size())) {
|
for (const int64_t i : IndexRange(small_buffer_pools_.size())) {
|
||||||
BufferStack &buffer_stack = small_stacks_[i];
|
LocalAllocatorPool &pool = small_buffer_pools_[i];
|
||||||
buffer_stack.element_size = 8 * (i + 1);
|
pool.element_size = 8 * (i + 1);
|
||||||
buffer_stack.alignment = power_of_2_min_u(buffer_stack.element_size);
|
pool.alignment = power_of_2_min_u(pool.element_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue