1
1

Compare commits

...

13 Commits

Author SHA1 Message Date
ba41c84d08 use in CustomMF_SI_SI_SI_SO 2022-04-10 15:57:40 +02:00
99ccdcf75f cleanup 2022-04-10 15:47:56 +02:00
210e84234b use in CustomMF_SI_SI_SO 2022-04-10 15:47:38 +02:00
9fd2e540ab use in CustomMF_SI_SO 2022-04-10 15:37:26 +02:00
5e4d36dbbe avoid materializing single array many times 2022-04-10 15:25:55 +02:00
5749cb3221 make test run again 2022-04-10 15:19:35 +02:00
f7908bda06 support execute materialized 2022-04-10 15:17:57 +02:00
3e5b988272 cleanup 2022-04-10 14:45:14 +02:00
96b9f0d021 progress 2022-04-10 14:42:41 +02:00
0264b27c5e progress 2022-04-10 14:27:52 +02:00
475685fe63 progress 2022-04-10 13:21:54 +02:00
91fd4ff924 progress 2022-04-10 13:14:12 +02:00
594596d5aa initial devirtualizer 2022-04-10 13:03:28 +02:00
3 changed files with 328 additions and 98 deletions

View File

@@ -0,0 +1,233 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include <tuple>
#include "BLI_virtual_array.hh"
namespace blender {
struct SingleInputTagBase {
};
template<typename T> struct SingleInputTag : public SingleInputTagBase {
using BaseType = T;
};
struct SingleOutputTagBase {
};
template<typename T> struct SingleOutputTag : public SingleOutputTagBase {
using BaseType = T;
};
template<typename T> struct ParamType {
};
template<typename T> struct ParamType<SingleInputTag<T>> {
using type = VArray<T>;
};
template<typename T> struct ParamType<SingleOutputTag<T>> {
using type = MutableSpan<T>;
};
struct DevirtualizeNone {
};
struct DevirtualizeSpan {
};
struct DevirtualizeSingle {
};
template<typename TagsTuple, size_t I>
using BaseType = typename std::tuple_element_t<I, TagsTuple>::BaseType;
template<typename Fn, typename... Args> class ArrayDevirtualizer {
private:
using TagsTuple = std::tuple<Args...>;
Fn fn_;
IndexMask mask_;
std::tuple<const typename ParamType<Args>::type *...> params_;
std::array<bool, sizeof...(Args)> varray_is_span_;
std::array<bool, sizeof...(Args)> varray_is_single_;
bool executed_ = false;
public:
ArrayDevirtualizer(Fn fn, const IndexMask *mask, const typename ParamType<Args>::type *...params)
: fn_(std::move(fn)), mask_(*mask), params_{params...}
{
this->init(std::make_index_sequence<sizeof...(Args)>{});
}
void execute_fallback()
{
BLI_assert(!executed_);
this->execute_fallback_impl(std::make_index_sequence<sizeof...(Args)>{});
}
bool try_execute_devirtualized()
{
BLI_assert(!executed_);
return this->try_execute_devirtualized_impl();
}
void execute_materialized()
{
BLI_assert(!executed_);
this->execute_materialized_impl(std::make_index_sequence<sizeof...(Args)>{});
}
private:
template<size_t... I> void execute_materialized_impl(std::index_sequence<I...> /* indices */)
{
static constexpr int64_t MaxChunkSize = 32;
const int64_t mask_size = mask_.size();
std::tuple<TypedBuffer<BaseType<TagsTuple, I>, MaxChunkSize>...> buffers_owner;
std::tuple<MutableSpan<BaseType<TagsTuple, I>>...> buffers = {
MutableSpan{std::get<I>(buffers_owner).ptr(), std::min(mask_size, MaxChunkSize)}...};
(
[&]() {
using ParamTag = std::tuple_element_t<I, TagsTuple>;
using T = typename ParamTag::BaseType;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
MutableSpan in_chunk = std::get<I>(buffers);
if (varray_is_single_[I]) {
const VArray<T> *varray = std::get<I>(params_);
const T in_single = varray->get_internal_single();
in_chunk.fill(in_single);
}
}
}(),
...);
for (int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) {
const int64_t chunk_size = std::min(mask_size - chunk_start, MaxChunkSize);
const IndexMask sliced_mask = mask_.slice(chunk_start, chunk_size);
const int64_t sliced_mask_size = sliced_mask.size();
(
[&]() {
using ParamTag = std::tuple_element_t<I, TagsTuple>;
using T = typename ParamTag::BaseType;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
if (!varray_is_single_[I]) {
MutableSpan in_chunk = std::get<I>(buffers).take_front(sliced_mask_size);
const VArray<T> *varray = std::get<I>(params_);
varray->materialize_compressed_to_uninitialized(sliced_mask, in_chunk);
}
}
}(),
...);
fn_(IndexRange(sliced_mask_size), sliced_mask, [&]() {
using ParamTag = std::tuple_element_t<I, TagsTuple>;
using T = typename ParamTag::BaseType;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
MutableSpan<T> in_chunk = std::get<I>(buffers).take_front(sliced_mask_size);
return in_chunk;
}
else if constexpr (std::is_base_of_v<SingleOutputTagBase, ParamTag>) {
MutableSpan<T> out_span = *std::get<I>(params_);
return out_span.data();
}
}()...);
(
[&]() {
using ParamTag = std::tuple_element_t<I, TagsTuple>;
using T = typename ParamTag::BaseType;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
MutableSpan<T> in_chunk = std::get<I>(buffers);
destruct_n(in_chunk.data(), sliced_mask_size);
}
}(),
...);
}
}
template<typename... Mode> bool try_execute_devirtualized_impl()
{
if constexpr (sizeof...(Mode) == sizeof...(Args)) {
this->try_execute_devirtualized_impl_call(std::tuple<Mode...>(),
std::make_index_sequence<sizeof...(Args)>());
return true;
}
else {
constexpr size_t I = sizeof...(Mode);
using ParamTag = std::tuple_element_t<I, TagsTuple>;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
if (varray_is_single_[I]) {
return this->try_execute_devirtualized_impl<Mode..., DevirtualizeSingle>();
}
else if (varray_is_span_[I]) {
return this->try_execute_devirtualized_impl<Mode..., DevirtualizeSpan>();
}
else {
return false;
}
}
else {
return this->try_execute_devirtualized_impl<Mode..., DevirtualizeNone>();
}
}
}
template<typename... Mode, size_t... I>
void try_execute_devirtualized_impl_call(std::tuple<Mode...> /* modes */,
std::index_sequence<I...> /* indices */)
{
mask_.to_best_mask_type([&](auto mask) {
fn_(mask,
mask,
this->get_execute_param<I, std::tuple_element_t<I, std::tuple<Mode...>>>()...);
});
executed_ = true;
}
template<size_t... I> void init(std::index_sequence<I...> /* indices */)
{
varray_is_span_.fill(false);
varray_is_single_.fill(false);
(this->init_param<I>(), ...);
}
template<size_t I> void init_param()
{
using ParamTag = std::tuple_element_t<I, TagsTuple>;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
const typename ParamType<ParamTag>::type *varray = std::get<I>(params_);
varray_is_span_[I] = varray->is_span();
varray_is_single_[I] = varray->is_single();
}
}
template<size_t... I> void execute_fallback_impl(std::index_sequence<I...> /* indices */)
{
fn_(mask_, mask_, this->get_execute_param<I, DevirtualizeNone>()...);
executed_ = true;
}
template<size_t I, typename Mode> auto get_execute_param()
{
using ParamTag = std::tuple_element_t<I, TagsTuple>;
if constexpr (std::is_base_of_v<SingleInputTagBase, ParamTag>) {
using T = typename ParamTag::BaseType;
const VArray<T> *varray = std::get<I>(params_);
if constexpr (std::is_same_v<Mode, DevirtualizeNone>) {
return *varray;
}
else if constexpr (std::is_same_v<Mode, DevirtualizeSingle>) {
return SingleAsSpan(*varray);
}
else if constexpr (std::is_same_v<Mode, DevirtualizeSpan>) {
return varray->get_internal_span();
}
}
else if constexpr (std::is_base_of_v<SingleOutputTagBase, ParamTag>) {
return std::get<I>(params_)->data();
}
}
};
} // namespace blender

View File

@@ -5,6 +5,7 @@
#include "BLI_vector.hh"
#include "BLI_vector_set.hh"
#include "BLI_virtual_array.hh"
#include "BLI_virtual_array_devirtualize.hh"
#include "testing/testing.h"
namespace blender::tests {
@@ -222,4 +223,44 @@ TEST(virtual_array, MaterializeCompressed)
}
}
struct MyOperator {
template<typename InIndices, typename OutIndices, typename In1Array, typename In2Array>
void operator()(InIndices in_indices,
OutIndices out_indices,
In1Array in1,
In2Array in2,
int *__restrict out1)
{
for (const int64_t i : IndexRange(in_indices.size())) {
const int64_t in_i = in_indices[i];
const int64_t out_i = out_indices[i];
out1[out_i] = in1[in_i] + in2[in_i];
}
}
};
TEST(virtual_array, Devirtualize)
{
MyOperator fn;
IndexMask mask(IndexRange(10));
VArray<int> in1 = VArray<int>::ForSingle(3, 10);
VArray<int> in2 = VArray<int>::ForSingle(5, 10);
// VArray<int> in2 = VArray<int>::ForContainer(Array<int>(10, 5));
// VArray<int> in2 = VArray<int>::ForFunc(10, [](int64_t i) { return (int)i; });
std::array<int, 10> out1_array;
MutableSpan<int> out1 = out1_array;
out1.fill(-1);
ArrayDevirtualizer<decltype(fn), SingleInputTag<int>, SingleInputTag<int>, SingleOutputTag<int>>
devirtualizer{fn, &mask, &in1, &in2, &out1};
devirtualizer.execute_materialized();
// if (!devirtualizer.try_execute_devirtualized()) {
// }
EXPECT_EQ(out1[0], 8);
EXPECT_EQ(out1[1], 8);
}
} // namespace blender::tests

View File

@@ -10,6 +10,8 @@
#include <functional>
#include "BLI_virtual_array_devirtualize.hh"
#include "FN_multi_function.hh"
namespace blender::fn {
@@ -47,72 +49,23 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn)
{
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
if (in1.is_single()) {
/* Only evaluate the function once when the input is a single value. */
const In1 in1_single = in1.get_internal_single();
const Out1 out1_single = element_fn(in1_single);
out1.fill_indices(mask, out1_single);
return;
}
if (in1.is_span()) {
const Span<In1> in1_span = in1.get_internal_span();
mask.to_best_mask_type(
[&](auto mask) { execute_SI_SO(element_fn, mask, in1_span, out1.data()); });
return;
}
/* The input is an unknown virtual array type. To avoid virtual function call overhead for
* every element, elements are retrieved and processed in chunks. */
static constexpr int64_t MaxChunkSize = 32;
TypedBuffer<In1, MaxChunkSize> in1_buffer_owner;
MutableSpan<In1> in1_buffer{in1_buffer_owner.ptr(), MaxChunkSize};
const int64_t mask_size = mask.size();
for (int64_t chunk_start = 0; chunk_start < mask_size; chunk_start += MaxChunkSize) {
const int64_t chunk_size = std::min(mask_size - chunk_start, MaxChunkSize);
const IndexMask sliced_mask = mask.slice(chunk_start, chunk_size);
/* Load input from the virtual array. */
MutableSpan<In1> in1_chunk = in1_buffer.take_front(chunk_size);
in1.materialize_compressed_to_uninitialized(sliced_mask, in1_chunk);
if (sliced_mask.is_range()) {
execute_SI_SO(
element_fn, IndexRange(chunk_size), in1_chunk, out1.data() + sliced_mask[0]);
auto fn = [&](auto in_indices, auto out_indices, auto in1, Out1 *__restrict out1) {
BLI_assert(in_indices.size() == out_indices.size());
for (const int64_t i : IndexRange(in_indices.size())) {
const int64_t in_index = in_indices[i];
const int64_t out_index = out_indices[i];
new (out1 + out_index) Out1(element_fn(in1[in_index]));
}
else {
execute_SI_SO_compressed(element_fn, sliced_mask, in1_chunk, out1.data());
}
destruct_n(in1_chunk.data(), chunk_size);
};
ArrayDevirtualizer<decltype(fn), SingleInputTag<In1>, SingleOutputTag<Out1>> devirtualizer{
fn, &mask, &in1, &out1};
if (!devirtualizer.try_execute_devirtualized()) {
devirtualizer.execute_materialized();
}
};
}
template<typename ElementFuncT, typename MaskT, typename In1Array>
BLI_NOINLINE static void execute_SI_SO(const ElementFuncT &element_fn,
MaskT mask,
const In1Array &in1,
Out1 *__restrict r_out)
{
for (const int64_t i : mask) {
new (r_out + i) Out1(element_fn(in1[i]));
}
}
/** Expects the input array to be "compressed", i.e. there are no gaps between the elements. */
template<typename ElementFuncT, typename MaskT, typename In1Array>
BLI_NOINLINE static void execute_SI_SO_compressed(const ElementFuncT &element_fn,
MaskT mask,
const In1Array &in1,
Out1 *__restrict r_out)
{
for (const int64_t i : IndexRange(mask.size())) {
new (r_out + mask[i]) Out1(element_fn(in1[i]));
}
}
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
{
const VArray<In1> &in1 = params.readonly_single_input<In1>(0);
@@ -158,24 +111,24 @@ class CustomMF_SI_SI_SO : public MultiFunction {
const VArray<In1> &in1,
const VArray<In2> &in2,
MutableSpan<Out1> out1) {
/* Devirtualization results in a 2-3x speedup for some simple functions. */
devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
mask.to_best_mask_type(
[&](const auto &mask) { execute_SI_SI_SO(element_fn, mask, in1, in2, out1.data()); });
});
};
}
auto fn = [&](auto in_indices, auto out_indices, auto in1, auto in2, Out1 *__restrict out1) {
BLI_assert(in_indices.size() == out_indices.size());
for (const int64_t i : IndexRange(in_indices.size())) {
const int64_t in_index = in_indices[i];
const int64_t out_index = out_indices[i];
new (out1 + out_index) Out1(element_fn(in1[in_index], in2[in_index]));
}
};
template<typename ElementFuncT, typename MaskT, typename In1Array, typename In2Array>
BLI_NOINLINE static void execute_SI_SI_SO(const ElementFuncT &element_fn,
MaskT mask,
const In1Array &in1,
const In2Array &in2,
Out1 *__restrict r_out)
{
for (const int64_t i : mask) {
new (r_out + i) Out1(element_fn(in1[i], in2[i]));
}
ArrayDevirtualizer<decltype(fn),
SingleInputTag<In1>,
SingleInputTag<In2>,
SingleOutputTag<Out1>>
devirtualizer{fn, &mask, &in1, &in2, &out1};
if (!devirtualizer.try_execute_devirtualized()) {
devirtualizer.execute_materialized();
}
};
}
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override
@@ -230,27 +183,30 @@ class CustomMF_SI_SI_SI_SO : public MultiFunction {
const VArray<In2> &in2,
const VArray<In3> &in3,
MutableSpan<Out1> out1) {
/* Virtual arrays are not devirtualized yet, to avoid generating lots of code without further
* consideration. */
execute_SI_SI_SI_SO(element_fn, mask, in1, in2, in3, out1.data());
};
}
auto fn = [&](auto in_indices,
auto out_indices,
auto in1,
auto in2,
auto in3,
Out1 *__restrict out1) {
BLI_assert(in_indices.size() == out_indices.size());
for (const int64_t i : IndexRange(in_indices.size())) {
const int64_t in_index = in_indices[i];
const int64_t out_index = out_indices[i];
new (out1 + out_index) Out1(element_fn(in1[in_index], in2[in_index], in3[in_index]));
}
};
template<typename ElementFuncT,
typename MaskT,
typename In1Array,
typename In2Array,
typename In3Array>
BLI_NOINLINE static void execute_SI_SI_SI_SO(const ElementFuncT &element_fn,
MaskT mask,
const In1Array &in1,
const In2Array &in2,
const In3Array &in3,
Out1 *__restrict r_out)
{
for (const int64_t i : mask) {
new (r_out + i) Out1(element_fn(in1[i], in2[i], in3[i]));
}
ArrayDevirtualizer<decltype(fn),
SingleInputTag<In1>,
SingleInputTag<In2>,
SingleInputTag<In3>,
SingleOutputTag<Out1>>
devirtualizer{fn, &mask, &in1, &in2, &in3, &out1};
if (!devirtualizer.try_execute_devirtualized()) {
devirtualizer.execute_materialized();
}
};
}
void call(IndexMask mask, MFParams params, MFContext UNUSED(context)) const override