Compare commits
13 Commits
refactor-m
...
devirtuali
Author | SHA1 | Date | |
---|---|---|---|
ba41c84d08 | |||
99ccdcf75f | |||
210e84234b | |||
9fd2e540ab | |||
5e4d36dbbe | |||
5749cb3221 | |||
f7908bda06 | |||
3e5b988272 | |||
96b9f0d021 | |||
0264b27c5e | |||
475685fe63 | |||
91fd4ff924 | |||
594596d5aa |
233
source/blender/blenlib/BLI_virtual_array_devirtualize.hh
Normal file
233
source/blender/blenlib/BLI_virtual_array_devirtualize.hh
Normal file
@@ -0,0 +1,233 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "BLI_virtual_array.hh"
|
||||
|
||||
namespace blender {
|
||||
|
||||
struct SingleInputTagBase {
|
||||
};
|
||||
template<typename T> struct SingleInputTag : public SingleInputTagBase {
|
||||
using BaseType = T;
|
||||
};
|
||||
struct SingleOutputTagBase {
|
||||
};
|
||||
template<typename T> struct SingleOutputTag : public SingleOutputTagBase {
|
||||
using BaseType = T;
|
||||
};
|
||||
|
||||
template<typename T> struct ParamType {
|
||||
};
|
||||
|
||||
template<typename T> struct ParamType<SingleInputTag<T>> {
|
||||
using type = VArray<T>;
|
||||
};
|
||||
|
||||
template<typename T> struct ParamType<SingleOutputTag<T>> {
|
||||
using type = MutableSpan<T>;
|
||||
};
|
||||
|
||||
struct DevirtualizeNone {
|
||||
};
|
||||
struct DevirtualizeSpan {
|
||||
};
|
||||
struct DevirtualizeSingle {
|
||||
};
|
||||
|
||||
template<typename TagsTuple, size_t I>
|
||||
using BaseType = typename std::tuple_element_t<I, TagsTuple>::BaseType;
|
||||
|
||||
template<typename Fn, typename... Args> class ArrayDevirtualizer {
|
||||
private:
|
||||
using TagsTuple = std::tuple<Args...>;
|
||||
|
||||
Fn fn_;
|
||||
IndexMask mask_;
|
||||
std::tuple<const typename ParamType<Args>::type *...> params_;
|
||||
|
||||
std::array<bool, sizeof...(Args)> varray_is_span_;
|
||||
std::array<bool, sizeof...(Args)> varray_is_single_;
|
||||
|
||||
bool executed_ = false;
|
||||
|
||||
public:
|
||||
ArrayDevirtualizer(Fn fn, const IndexMask *mask, const typename ParamType<Args>::type *...params)
|
||||
: fn_(std::move(fn)), mask_(*mask), params_{params...}
|
||||
{
|
||||
this->init(std::make_index_sequence<sizeof...(Args)>{});
|
||||
}
|
||||
|
||||
void execute_fallback()
|
||||
{
|
||||
BLI_assert(!executed_);
|
||||
this->execute_fallback_impl(std::make_index_sequence<sizeof...(Args)>{});
|
||||
}
|
||||
|
||||
bool try_execute_devirtualized()
|
||||
{
|
||||
BLI_assert(!executed_);
|
||||
return this->try_execute_devirtualized_impl();
|
||||
}
|
||||
|
||||
void execute_materialized()
|
||||
{
|
||||
BLI_assert(!executed_);
|
||||
this->execute_materialized_impl(std::make_index_sequence<sizeof...(Args)>{});
|
||||
}
|
||||
|
||||
private:
|
||||
template<size_t... I> void execute_materialized_impl(std::index_sequence<I...> /* indices */)
|
||||
{
|
||||
static constexpr int64_t MaxChunkSize = 32;
|
||||
const int64_t mask_size = mask_.size();
|
||||
std::tuple<TypedBuffer<BaseType<TagsTuple, I>, MaxChunkSize>...> buffers_owner;
|
||||
std::tuple<MutableSpan<BaseType<TagsTuple, I>>...> buffers = {
|
||||
MutableSpan{std::get<I>(buffers_owner).ptr(), std::min(mask_size, MaxChunkSize)}...};
|
||||
|
||||
(
|
||||
[&]() {
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
using T = typename ParamTag::BaseType;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
MutableSpan in_chunk = std::get<I>(buffers);
|
||||
if (varray_is_single_[I]) {
|
||||
const VArray<T> *varray = std::get<I>(params_);
|
||||
const T in_single = varray->get_internal_single();
|
||||
in_chunk.fill(in_single);
|
||||
}
|
||||
}
|
||||
}(),
|
||||
...);
|
||||
|
||||
for (int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) {
|
||||
const int64_t chunk_size = std::min(mask_size - chunk_start, MaxChunkSize);
|
||||
const IndexMask sliced_mask = mask_.slice(chunk_start, chunk_size);
|
||||
const int64_t sliced_mask_size = sliced_mask.size();
|
||||
(
|
||||
[&]() {
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
using T = typename ParamTag::BaseType;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
if (!varray_is_single_[I]) {
|
||||
MutableSpan in_chunk = std::get<I>(buffers).take_front(sliced_mask_size);
|
||||
const VArray<T> *varray = std::get<I>(params_);
|
||||
varray->materialize_compressed_to_uninitialized(sliced_mask, in_chunk);
|
||||
}
|
||||
}
|
||||
}(),
|
||||
...);
|
||||
|
||||
fn_(IndexRange(sliced_mask_size), sliced_mask, [&]() {
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
using T = typename ParamTag::BaseType;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
MutableSpan<T> in_chunk = std::get<I>(buffers).take_front(sliced_mask_size);
|
||||
return in_chunk;
|
||||
}
|
||||
else if constexpr (std::is_base_of_v<SingleOutputTagBase, ParamTag>) {
|
||||
MutableSpan<T> out_span = *std::get<I>(params_);
|
||||
return out_span.data();
|
||||
}
|
||||
}()...);
|
||||
|
||||
(
|
||||
[&]() {
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
using T = typename ParamTag::BaseType;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
MutableSpan<T> in_chunk = std::get<I>(buffers);
|
||||
destruct_n(in_chunk.data(), sliced_mask_size);
|
||||
}
|
||||
}(),
|
||||
...);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename... Mode> bool try_execute_devirtualized_impl()
|
||||
{
|
||||
if constexpr (sizeof...(Mode) == sizeof...(Args)) {
|
||||
this->try_execute_devirtualized_impl_call(std::tuple<Mode...>(),
|
||||
std::make_index_sequence<sizeof...(Args)>());
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
constexpr size_t I = sizeof...(Mode);
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
if (varray_is_single_[I]) {
|
||||
return this->try_execute_devirtualized_impl<Mode..., DevirtualizeSingle>();
|
||||
}
|
||||
else if (varray_is_span_[I]) {
|
||||
return this->try_execute_devirtualized_impl<Mode..., DevirtualizeSpan>();
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return this->try_execute_devirtualized_impl<Mode..., DevirtualizeNone>();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename... Mode, size_t... I>
|
||||
void try_execute_devirtualized_impl_call(std::tuple<Mode...> /* modes */,
|
||||
std::index_sequence<I...> /* indices */)
|
||||
{
|
||||
mask_.to_best_mask_type([&](auto mask) {
|
||||
fn_(mask,
|
||||
mask,
|
||||
this->get_execute_param<I, std::tuple_element_t<I, std::tuple<Mode...>>>()...);
|
||||
});
|
||||
executed_ = true;
|
||||
}
|
||||
|
||||
template<size_t... I> void init(std::index_sequence<I...> /* indices */)
|
||||
{
|
||||
varray_is_span_.fill(false);
|
||||
varray_is_single_.fill(false);
|
||||
(this->init_param<I>(), ...);
|
||||
}
|
||||
|
||||
template<size_t I> void init_param()
|
||||
{
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
const typename ParamType<ParamTag>::type *varray = std::get<I>(params_);
|
||||
varray_is_span_[I] = varray->is_span();
|
||||
varray_is_single_[I] = varray->is_single();
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t... I> void execute_fallback_impl(std::index_sequence<I...> /* indices */)
|
||||
{
|
||||
fn_(mask_, mask_, this->get_execute_param<I, DevirtualizeNone>()...);
|
||||
executed_ = true;
|
||||
}
|
||||
|
||||
template<size_t I, typename Mode> auto get_execute_param()
|
||||
{
|
||||
using ParamTag = std::tuple_element_t<I, TagsTuple>;
|
||||
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
|
||||
using T = typename ParamTag::BaseType;
|
||||
const VArray<T> *varray = std::get<I>(params_);
|
||||
if constexpr (std::is_same_v<Mode, DevirtualizeNone>) {
|
||||
return *varray;
|
||||
}
|
||||
else if constexpr (std::is_same_v<Mode, DevirtualizeSingle>) {
|
||||
return SingleAsSpan(*varray);
|
||||
}
|
||||
else if constexpr (std::is_same_v<Mode, DevirtualizeSpan>) {
|
||||
return varray->get_internal_span();
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_base_of_v<SingleOutputTagBase, ParamTag>) {
|
||||
return std::get<I>(params_)->data();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace blender
|
@@ -5,6 +5,7 @@
|
||||
#include "BLI_vector.hh"
|
||||
#include "BLI_vector_set.hh"
|
||||
#include "BLI_virtual_array.hh"
|
||||
#include "BLI_virtual_array_devirtualize.hh"
|
||||
#include "testing/testing.h"
|
||||
|
||||
namespace blender::tests {
|
||||
@@ -222,4 +223,44 @@ TEST(virtual_array, MaterializeCompressed)
|
||||
}
|
||||
}
|
||||
|
||||
struct MyOperator {
|
||||
template<typename InIndices, typename OutIndices, typename In1Array, typename In2Array>
|
||||
void operator()(InIndices in_indices,
|
||||
OutIndices out_indices,
|
||||
In1Array in1,
|
||||
In2Array in2,
|
||||
int *__restrict out1)
|
||||
{
|
||||
for (const int64_t i : IndexRange(in_indices.size())) {
|
||||
const int64_t in_i = in_indices[i];
|
||||
const int64_t out_i = out_indices[i];
|
||||
out1[out_i] = in1[in_i] + in2[in_i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST(virtual_array, Devirtualize)
|
||||
{
|
||||
MyOperator fn;
|
||||
|
||||
IndexMask mask(IndexRange(10));
|
||||
VArray<int> in1 = VArray<int>::ForSingle(3, 10);
|
||||
VArray<int> in2 = VArray<int>::ForSingle(5, 10);
|
||||
// VArray<int> in2 = VArray<int>::ForContainer(Array<int>(10, 5));
|
||||
// VArray<int> in2 = VArray<int>::ForFunc(10, [](int64_t i) { return (int)i; });
|
||||
std::array<int, 10> out1_array;
|
||||
MutableSpan<int> out1 = out1_array;
|
||||
out1.fill(-1);
|
||||
|
||||
ArrayDevirtualizer<decltype(fn), SingleInputTag<int>, SingleInputTag<int>, SingleOutputTag<int>>
|
||||
devirtualizer{fn, &mask, &in1, &in2, &out1};
|
||||
|
||||
devirtualizer.execute_materialized();
|
||||
// if (!devirtualizer.try_execute_devirtualized()) {
|
||||
// }
|
||||
|
||||
EXPECT_EQ(out1[0], 8);
|
||||
EXPECT_EQ(out1[1], 8);
|
||||
}
|
||||
|
||||
} // namespace blender::tests
|
||||
|
@@ -10,6 +10,8 @@
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "BLI_virtual_array_devirtualize.hh"
|
||||
|
||||
#include "FN_multi_function.hh"
|
||||
|
||||
namespace blender::fn {
|
||||
@@ -47,72 +49,23 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
|
||||
template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn)
|
||||
{
|
||||
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
|
||||
if (in1.is_single()) {
|
||||
/* Only evaluate the function once when the input is a single value. */
|
||||
const In1 in1_single = in1.get_internal_single();
|
||||
const Out1 out1_single = element_fn(in1_single);
|
||||
out1.fill_indices(mask, out1_single);
|
||||
return;
|
||||
}
|
||||
|
||||
if (in1.is_span()) {
|
||||
const Span<In1> in1_span = in1.get_internal_span();
|
||||
mask.to_best_mask_type(
|
||||
[&](auto mask) { execute_SI_SO(element_fn, mask, in1_span, out1.data()); });
|
||||
return;
|
||||
}
|
||||
|
||||
/* The input is an unknown virtual array type. To avoid virtual function call overhead for
|
||||
* every element, elements are retrieved and processed in chunks. */
|
||||
|
||||
static constexpr int64_t MaxChunkSize = 32;
|
||||
TypedBuffer<In1, MaxChunkSize> in1_buffer_owner;
|
||||
MutableSpan<In1> in1_buffer{in1_buffer_owner.ptr(), MaxChunkSize};
|
||||
|
||||
const int64_t mask_size = mask.size();
|
||||
for (int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) {
|
||||
const int64_t chunk_size = std::min(mask_size - chunk_start, MaxChunkSize);
|
||||
const IndexMask sliced_mask = mask.slice(chunk_start, chunk_size);
|
||||
|
||||
/* Load input from the virtual array. */
|
||||
MutableSpan<In1> in1_chunk = in1_buffer.take_front(chunk_size);
|
||||
in1.materialize_compressed_to_uninitialized(sliced_mask, in1_chunk);
|
||||
|
||||
if (sliced_mask.is_range()) {
|
||||
execute_SI_SO(
|
||||
element_fn, IndexRange(chunk_size), in1_chunk, out1.data() + sliced_mask[0]);
|
||||
auto fn = [&](auto in_indices, auto out_indices, auto in1, Out1 *__restrict out1) {
|
||||
BLI_assert(in_indices.size() == out_indices.size());
|
||||
for (const int64_t i : IndexRange(in_indices.size())) {
|
||||
const int64_t in_index = in_indices[i];
|
||||
const int64_t out_index = out_indices[i];
|
||||
new (out1 + out_index) Out1(element_fn(in1[in_index]));
|
||||
}
|
||||
else {
|
||||
execute_SI_SO_compressed(element_fn, sliced_mask, in1_chunk, out1.data());
|
||||
}
|
||||
destruct_n(in1_chunk.data(), chunk_size);
|
||||
};
|
||||
|
||||
ArrayDevirtualizer<decltype(fn), SingleInputTag<In1>, SingleOutputTag<Out1>> devirtualizer{
|
||||
fn, &mask, &in1, &out1};
|
||||
if (!devirtualizer.try_execute_devirtualized()) {
|
||||
devirtualizer.execute_materialized();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template<typename ElementFuncT, typename MaskT, typename In1Array>
|
||||
BLI_NOINLINE static void execute_SI_SO(const ElementFuncT &element_fn,
|
||||
MaskT mask,
|
||||
const In1Array &in1,
|
||||
Out1 *__restrict r_out)
|
||||
{
|
||||
for (const int64_t i : mask) {
|
||||
new (r_out + i) Out1(element_fn(in1[i]));
|
||||
}
|
||||
}
|
||||
|
||||
/** Expects the input array to be "compressed", i.e. there are no gaps between the elements. */
|
||||
template<typename ElementFuncT, typename MaskT, typename In1Array>
|
||||
BLI_NOINLINE static void execute_SI_SO_compressed(const ElementFuncT &element_fn,
|
||||
MaskT mask,
|
||||
const In1Array &in1,
|
||||
Out1 *__restrict r_out)
|
||||
{
|
||||
for (const int64_t i : IndexRange(mask.size())) {
|
||||
new (r_out + mask[i]) Out1(element_fn(in1[i]));
|
||||
}
|
||||
}
|
||||
|
||||
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
|
||||
{
|
||||
const VArray<In1> &in1 = params.readonly_single_input<In1>(0);
|
||||
@@ -158,24 +111,24 @@ class CustomMF_SI_SI_SO : public MultiFunction {
|
||||
const VArray<In1> &in1,
|
||||
const VArray<In2> &in2,
|
||||
MutableSpan<Out1> out1) {
|
||||
/* Devirtualization results in a 2-3x speedup for some simple functions. */
|
||||
devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
|
||||
mask.to_best_mask_type(
|
||||
[&](const auto &mask) { execute_SI_SI_SO(element_fn, mask, in1, in2, out1.data()); });
|
||||
});
|
||||
};
|
||||
}
|
||||
auto fn = [&](auto in_indices, auto out_indices, auto in1, auto in2, Out1 *__restrict out1) {
|
||||
BLI_assert(in_indices.size() == out_indices.size());
|
||||
for (const int64_t i : IndexRange(in_indices.size())) {
|
||||
const int64_t in_index = in_indices[i];
|
||||
const int64_t out_index = out_indices[i];
|
||||
new (out1 + out_index) Out1(element_fn(in1[in_index], in2[in_index]));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ElementFuncT, typename MaskT, typename In1Array, typename In2Array>
|
||||
BLI_NOINLINE static void execute_SI_SI_SO(const ElementFuncT &element_fn,
|
||||
MaskT mask,
|
||||
const In1Array &in1,
|
||||
const In2Array &in2,
|
||||
Out1 *__restrict r_out)
|
||||
{
|
||||
for (const int64_t i : mask) {
|
||||
new (r_out + i) Out1(element_fn(in1[i], in2[i]));
|
||||
}
|
||||
ArrayDevirtualizer<decltype(fn),
|
||||
SingleInputTag<In1>,
|
||||
SingleInputTag<In2>,
|
||||
SingleOutputTag<Out1>>
|
||||
devirtualizer{fn, &mask, &in1, &in2, &out1};
|
||||
if (!devirtualizer.try_execute_devirtualized()) {
|
||||
devirtualizer.execute_materialized();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
|
||||
@@ -230,27 +183,30 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction {
|
||||
const VArray<In2> &in2,
|
||||
const VArray<In3> &in3,
|
||||
MutableSpan<Out1> out1) {
|
||||
/* Virtual arrays are not devirtualized yet, to avoid generating lots of code without further
|
||||
* consideration. */
|
||||
execute_SI_SI_SI_SO(element_fn, mask, in1, in2, in3, out1.data());
|
||||
};
|
||||
}
|
||||
auto fn = [&](auto in_indices,
|
||||
auto out_indices,
|
||||
auto in1,
|
||||
auto in2,
|
||||
auto in3,
|
||||
Out1 *__restrict out1) {
|
||||
BLI_assert(in_indices.size() == out_indices.size());
|
||||
for (const int64_t i : IndexRange(in_indices.size())) {
|
||||
const int64_t in_index = in_indices[i];
|
||||
const int64_t out_index = out_indices[i];
|
||||
new (out1 + out_index) Out1(element_fn(in1[in_index], in2[in_index], in3[in_index]));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename ElementFuncT,
|
||||
typename MaskT,
|
||||
typename In1Array,
|
||||
typename In2Array,
|
||||
typename In3Array>
|
||||
BLI_NOINLINE static void execute_SI_SI_SI_SO(const ElementFuncT &element_fn,
|
||||
MaskT mask,
|
||||
const In1Array &in1,
|
||||
const In2Array &in2,
|
||||
const In3Array &in3,
|
||||
Out1 *__restrict r_out)
|
||||
{
|
||||
for (const int64_t i : mask) {
|
||||
new (r_out + i) Out1(element_fn(in1[i], in2[i], in3[i]));
|
||||
}
|
||||
ArrayDevirtualizer<decltype(fn),
|
||||
SingleInputTag<In1>,
|
||||
SingleInputTag<In2>,
|
||||
SingleInputTag<In3>,
|
||||
SingleOutputTag<Out1>>
|
||||
devirtualizer{fn, &mask, &in1, &in2, &in3, &out1};
|
||||
if (!devirtualizer.try_execute_devirtualized()) {
|
||||
devirtualizer.execute_materialized();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
|
||||
|
Reference in New Issue
Block a user