BLI: refactor IndexMask for better performance and memory usage #104629

Merged
Jacques Lucke merged 254 commits from JacquesLucke/blender:index-mask-refactor into main 2023-05-24 18:11:47 +02:00
3 changed files with 47 additions and 30 deletions
Showing only changes of commit 3d37fdb267 - Show all commits

View File

@ -16,7 +16,11 @@
namespace blender {
#define BLI_DEBUG_LINEAR_ALLOCATOR_SIZE
/**
* If enabled, #LinearAllocator keeps track of how much memory it owns and how much it has
* allocated.
*/
// #define BLI_DEBUG_LINEAR_ALLOCATOR_SIZE
template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopyable, NonMovable {
private:
@ -207,20 +211,14 @@ template<typename Allocator = GuardedAllocator> class LinearAllocator : NonCopya
this->provide_buffer(aligned_buffer.ptr(), Size);
}
/**
* Pass ownership of a buffer to this allocator. It is freed when the allocator is freed. The
* buffer won't be used for further small allocations. For that purpose use #provide_buffer.
*/
void give_ownership_of_buffer(const void *buffer)
{
owned_buffers_.append(const_cast<void *>(buffer));
}
/**
* This allocator takes ownership of the buffers owned by `other`. Therefor, when `other` is
* destructed, memory allocated using it is not freed.
*
* Note that the caller is responsible for making sure that buffers passed into #provide_buffer
* of `other` live at least as long as this allocator.
*/
void give_ownership_of_buffers_in(LinearAllocator<> &other)
void transfer_ownership_from(LinearAllocator<> &other)
{
owned_buffers_.extend(other.owned_buffers_);
#ifdef BLI_DEBUG_LINEAR_ALLOCATOR_SIZE

View File

@ -51,7 +51,7 @@ struct GrainSize {
namespace blender::threading {
template<typename Range, typename Function>
void parallel_for_each(Range &&range, const Function &function)
inline void parallel_for_each(Range &&range, const Function &function)
{
#ifdef WITH_TBB
tbb::parallel_for_each(range, function);
@ -111,10 +111,10 @@ inline IndexRange align_sub_range(const IndexRange unaligned_range,
* larger, which means that work is distributed less evenly.
*/
template<typename Function>
void parallel_for_aligned(const IndexRange range,
const int64_t grain_size,
const int64_t alignment,
const Function &function)
inline void parallel_for_aligned(const IndexRange range,
const int64_t grain_size,
const int64_t alignment,
const Function &function)
{
parallel_for(range, grain_size, [&](const IndexRange unaligned_range) {
const IndexRange aligned_range = align_sub_range(unaligned_range, alignment, range);
@ -123,11 +123,11 @@ void parallel_for_aligned(const IndexRange range,
}
template<typename Value, typename Function, typename Reduction>
Value parallel_reduce(IndexRange range,
int64_t grain_size,
const Value &identity,
const Function &function,
const Reduction &reduction)
inline Value parallel_reduce(IndexRange range,
int64_t grain_size,
const Value &identity,
const Function &function,
const Reduction &reduction)
{
#ifdef WITH_TBB
if (range.size() >= grain_size) {
@ -147,12 +147,12 @@ Value parallel_reduce(IndexRange range,
}
template<typename Value, typename Function, typename Reduction>
Value parallel_reduce_aligned(const IndexRange range,
const int64_t grain_size,
const int64_t alignment,
const Value &identity,
const Function &function,
const Reduction &reduction)
inline Value parallel_reduce_aligned(const IndexRange range,
const int64_t grain_size,
const int64_t alignment,
const Value &identity,
const Function &function,
const Reduction &reduction)
{
parallel_reduce(
range,
@ -169,7 +169,7 @@ Value parallel_reduce_aligned(const IndexRange range,
* Execute all of the provided functions. The functions might be executed in parallel or in serial
* or some combination of both.
*/
template<typename... Functions> void parallel_invoke(Functions &&...functions)
template<typename... Functions> inline void parallel_invoke(Functions &&...functions)
{
#ifdef WITH_TBB
tbb::parallel_invoke(std::forward<Functions>(functions)...);
@ -184,7 +184,7 @@ template<typename... Functions> void parallel_invoke(Functions &&...functions)
* tasks.
*/
template<typename... Functions>
void parallel_invoke(const bool use_threading, Functions &&...functions)
inline void parallel_invoke(const bool use_threading, Functions &&...functions)
{
if (use_threading) {
lazy_threading::send_hint();
@ -196,7 +196,7 @@ void parallel_invoke(const bool use_threading, Functions &&...functions)
}
/** See #BLI_task_isolate for a description of what isolating a task means. */
template<typename Function> void isolate_task(const Function &function)
template<typename Function> inline void isolate_task(const Function &function)
{
#ifdef WITH_TBB
lazy_threading::ReceiverIsolation isolation;

View File

@ -149,4 +149,23 @@ TEST(linear_allocator, ConstructArray)
}
}
TEST(linear_allocator, TransferOwnership)
{
LinearAllocator<> main_allocator;
MutableSpan<int> values;
/* Allocate a large buffer that is likely to be given back to the system when freed. This test
* essentially only fails by crashing with a segfault. */
const int size = 1'000'000;
const int value = 42;
const int index = 500'000;
{
LinearAllocator<> nested_allocator;
values = nested_allocator.allocate_array<int>(size);
values[index] = value;
main_allocator.transfer_ownership_from(nested_allocator);
}
EXPECT_EQ(values[index], value);
}
} // namespace blender::tests