Functions: improve devirtualization in multi-function builder

This refactors how devirtualization is done in general and how
multi-functions use it.

* The old `Devirtualizer` class has been removed in favor of a simpler
  solution. It is also more general in the sense that it is not coupled
  with `IndexMask` and `VArray`. Instead there is a function that has
  inputs which control how different types are devirtualized. The
  new implementation is currently less general with regard to the number
  of parameters it supports. This can be changed in the future, but
  does not seem necessary now and would make the code less obvious.
* Devirtualizers for different types are now defined in their respective
  headers.
* The multi-function builder works with the `GVArray` stored in `MFParams`
  directly now, instead of first converting it to a `VArray<T>`. This reduces
  some constant overhead, which makes the multi-function slightly
  faster. This is only noticable when very few elements are processed though.

No functional changes or performance regressions are expected.
This commit is contained in:
2023-01-07 12:55:48 +01:00
parent 1942d55c07
commit 1bbf1ed03c
14 changed files with 315 additions and 366 deletions

View File

@@ -26,302 +26,139 @@
* times and binary sizes, depending on the number of parameters that are devirtualized separately.
* So there is always a trade-off between run-time performance and compile-time/binary-size.
*
* This file provides a utility to devirtualize array parameters to a function using a high level
* API. This makes it easy to experiment with different extremes of the mentioned trade-off and
* allows finding a good compromise for each function.
* This file provides a utility to devirtualize function parameters using a high level API. This
* makes it easy to experiment with different extremes of the mentioned trade-off and allows
* finding a good compromise for each function.
*/
#include "BLI_parameter_pack_utils.hh"
#include "BLI_virtual_array.hh"
namespace blender::devirtualize_parameters {
/**
* Bit flag that specifies how an individual parameter is or can be devirtualized.
*/
enum class DeviMode {
/* This is used as zero-value to compare to, to avoid casting to int. */
None = 0,
/* Don't use devirtualization for that parameter, just pass it along. */
Keep = (1 << 0),
/* Devirtualize #Varray as #Span. */
Span = (1 << 1),
/* Devirtualize #VArray as #SingleAsSpan. */
Single = (1 << 2),
/* Devirtualize #IndexMask as #IndexRange. */
Range = (1 << 3),
};
ENUM_OPERATORS(DeviMode, DeviMode::Range);
/** Utility to encode multiple #DeviMode in a type. */
template<DeviMode... Mode> using DeviModeSequence = ValueSequence<DeviMode, Mode...>;
/**
* Main class that performs the devirtualization.
*/
template<typename Fn, typename... SourceTypes> class Devirtualizer {
private:
/** Utility to get the tag of the I-th source type. */
template<size_t I>
using type_at_index = typename TypeSequence<SourceTypes...>::template at_index<I>;
static constexpr size_t SourceTypesNum = sizeof...(SourceTypes);
/** Function to devirtualize. */
Fn fn_;
/**
* Source values that will be devirtualized. Note that these are stored as pointers to avoid
* unnecessary copies. The caller is responsible for keeping the memory alive.
*/
std::tuple<const SourceTypes *...> sources_;
/** Keeps track of whether #fn_ has been called already to avoid calling it twice. */
bool executed_ = false;
public:
Devirtualizer(Fn fn, const SourceTypes *...sources) : fn_(std::move(fn)), sources_{sources...}
{
}
/**
* Return true when the function passed to the constructor has been called already.
*/
bool executed() const
{
return executed_;
}
/**
* At compile time, generates multiple variants of the function, each optimized for a different
* combination of devirtualized parameters. For every parameter, a bit flag is passed that
* determines how it will be devirtualized. At run-time, if possible, one of the generated
* functions is picked and executed.
*
* To check whether the function was called successfully, call #executed() afterwards.
*
* \note This generates an exponential amount of code in the final binary, depending on how many
* to-be-virtualized parameters there are.
*/
template<DeviMode... AllowedModes>
void try_execute_devirtualized(DeviModeSequence<AllowedModes...> /* allowed_modes */)
{
BLI_assert(!executed_);
static_assert(sizeof...(AllowedModes) == SourceTypesNum);
this->try_execute_devirtualized_impl(DeviModeSequence<>(),
DeviModeSequence<AllowedModes...>());
}
/**
* Execute the function and pass in the original parameters without doing any devirtualization.
*/
void execute_without_devirtualization()
{
BLI_assert(!executed_);
this->try_execute_devirtualized_impl_call(
make_value_sequence<DeviMode, DeviMode::Keep, SourceTypesNum>(),
std::make_index_sequence<SourceTypesNum>());
}
private:
/**
* A recursive method that generates all the combinations of devirtualized parameters that the
* caller requested. A recursive function is necessary to achieve generating an exponential
* number of function calls (which has to be used with care, but is expected here).
*
* At every recursive step, the #DeviMode of one parameter is determined. This is achieved by
* extending #DeviModeSequence<Mode...> by one element in each step. The recursion ends once all
* parameters are handled.
*
* \return True when the function has been executed.
*/
template<DeviMode... Mode, DeviMode... AllowedModes>
bool try_execute_devirtualized_impl(
/* Initially empty, but then extended by one element in each recursive step. */
DeviModeSequence<Mode...> /* modes */,
/* Bit flag for every parameter. */
DeviModeSequence<AllowedModes...> /* allowed_modes */)
{
static_assert(SourceTypesNum == sizeof...(AllowedModes));
if constexpr (SourceTypesNum == sizeof...(Mode)) {
/* End of recursion, now call the function with the determined #DeviModes. */
this->try_execute_devirtualized_impl_call(DeviModeSequence<Mode...>(),
std::make_index_sequence<SourceTypesNum>());
return true;
}
else {
/* Index of the parameter that is checked in the current recursive step. */
constexpr size_t I = sizeof...(Mode);
/* Non-devirtualized parameter type. */
using SourceType = type_at_index<I>;
/* A bit flag indicating what devirtualizations are allowed in this step. */
[[maybe_unused]] constexpr DeviMode allowed_modes =
DeviModeSequence<AllowedModes...>::template at_index<I>();
/* Handle #VArray types. */
if constexpr (is_VArray_v<SourceType>) {
/* The actual virtual array, used for dynamic dispatch at run-time. */
const SourceType &varray = *std::get<I>(sources_);
/* Check if the virtual array is a single value. */
if constexpr ((allowed_modes & DeviMode::Single) != DeviMode::None) {
if (varray.is_single()) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Single>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Check if the virtual array is a span. */
if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
if (varray.is_span()) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Check if it is ok if the virtual array is not devirtualized. */
if constexpr ((allowed_modes & DeviMode::Keep) != DeviMode::None) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Handle #IndexMask. */
else if constexpr (std::is_same_v<IndexMask, SourceType>) {
/* Check if the mask is actually a contiguous range. */
if constexpr ((allowed_modes & DeviMode::Range) != DeviMode::None) {
/* The actual mask used for dynamic dispatch at run-time. */
const IndexMask &mask = *std::get<I>(sources_);
if (mask.is_range()) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Range>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Check if mask is also allowed to stay a span. */
if constexpr ((allowed_modes & DeviMode::Span) != DeviMode::None) {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Span>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
/* Handle unknown types. */
else {
if (this->try_execute_devirtualized_impl(DeviModeSequence<Mode..., DeviMode::Keep>(),
DeviModeSequence<AllowedModes...>())) {
return true;
}
}
}
return false;
}
/**
* Actually call the function with devirtualized parameters.
*/
template<DeviMode... Mode, size_t... I>
void try_execute_devirtualized_impl_call(DeviModeSequence<Mode...> /* modes */,
std::index_sequence<I...> /* indices */)
{
BLI_assert(!executed_);
fn_(this->get_devirtualized_parameter<I, Mode>()...);
executed_ = true;
}
/**
* Return the I-th parameter devirtualized using the passed in #DeviMode. This has different
* return types based on the template parameters.
*
* \note It is expected that the caller already knows that the parameter can be devirtualized
* with the given mode.
*/
template<size_t I, DeviMode Mode> decltype(auto) get_devirtualized_parameter()
{
using SourceType = type_at_index<I>;
static_assert(Mode != DeviMode::None);
if constexpr (Mode == DeviMode::Keep) {
/* Don't change the original parameter at all. */
return *std::get<I>(sources_);
}
if constexpr (is_VArray_v<SourceType>) {
const SourceType &varray = *std::get<I>(sources_);
if constexpr (Mode == DeviMode::Single) {
/* Devirtualize virtual array as single value. */
return SingleAsSpan(varray);
}
else if constexpr (Mode == DeviMode::Span) {
/* Devirtualize virtual array as span. */
return varray.get_internal_span();
}
}
else if constexpr (std::is_same_v<IndexMask, SourceType>) {
const IndexMask &mask = *std::get<I>(sources_);
if constexpr (ELEM(Mode, DeviMode::Span)) {
/* Don't devirtualize mask, it's still a span. */
return mask;
}
else if constexpr (Mode == DeviMode::Range) {
/* Devirtualize the mask as range. */
return mask.as_range();
}
}
}
};
} // namespace blender::devirtualize_parameters
namespace blender {
/**
* Generate multiple versions of the given function optimized for different virtual arrays.
* One has to be careful with nesting multiple devirtualizations, because that results in an
* exponential number of function instantiations (increasing compile time and binary size).
* Calls the given function with devirtualized parameters if possible. Note that using many
* non-trivial devirtualizers results in exponential code growth.
*
* Generally, this function should only be used when the virtual method call overhead to get an
* element from a virtual array is significant.
* \return True if the function has been called.
*
* Every devirtualizer is expected to have a `devirtualize(auto fn) -> bool` method.
* This method is expected to do one of two things:
* - Call `fn` with the devirtualized argument and return what `fn` returns.
* - Don't call `fn` (because the devirtualization failed) and return false.
*
* Examples for devirtualizers: #BasicDevirtualizer, #IndexMaskDevirtualizer, #VArrayDevirtualizer.
*/
template<typename T, typename Func>
inline void devirtualize_varray(const VArray<T> &varray, const Func &func, bool enable = true)
template<typename Fn, typename... Devirtualizers>
inline bool call_with_devirtualized_parameters(const std::tuple<Devirtualizers...> &devis,
const Fn &fn)
{
using namespace devirtualize_parameters;
if (enable) {
Devirtualizer<decltype(func), VArray<T>> devirtualizer(func, &varray);
constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode>());
if (devirtualizer.executed()) {
return;
}
/* In theory the code below could be generalized to avoid code duplication. However, the maximum
* number of parameters is expected to be relatively low. Explicitely implementing the different
* cases makes it more obvious to see what is going on and also makes inlining everything easier
* for the compiler. */
constexpr size_t DeviNum = sizeof...(Devirtualizers);
if constexpr (DeviNum == 0) {
fn();
return true;
}
func(varray);
if constexpr (DeviNum == 1) {
return std::get<0>(devis).devirtualize([&](auto param0) {
fn(param0);
return true;
});
}
if constexpr (DeviNum == 2) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
fn(param0, param1);
return true;
});
});
}
if constexpr (DeviNum == 3) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
fn(param0, param1, param2);
return true;
});
});
});
}
if constexpr (DeviNum == 4) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
fn(param0, param1, param2, param3);
return true;
});
});
});
});
}
if constexpr (DeviNum == 5) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
return std::get<4>(devis).devirtualize([&](auto &&param4) {
fn(param0, param1, param2, param3, param4);
return true;
});
});
});
});
});
}
if constexpr (DeviNum == 6) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
return std::get<4>(devis).devirtualize([&](auto &&param4) {
return std::get<5>(devis).devirtualize([&](auto &&param5) {
fn(param0, param1, param2, param3, param4, param5);
return true;
});
});
});
});
});
});
}
if constexpr (DeviNum == 7) {
return std::get<0>(devis).devirtualize([&](auto &&param0) {
return std::get<1>(devis).devirtualize([&](auto &&param1) {
return std::get<2>(devis).devirtualize([&](auto &&param2) {
return std::get<3>(devis).devirtualize([&](auto &&param3) {
return std::get<4>(devis).devirtualize([&](auto &&param4) {
return std::get<5>(devis).devirtualize([&](auto &&param5) {
return std::get<6>(devis).devirtualize([&](auto &&param6) {
fn(param0, param1, param2, param3, param4, param5, param6);
return true;
});
});
});
});
});
});
});
}
return false;
}
/**
* Same as `devirtualize_varray`, but devirtualizes two virtual arrays at the same time.
* This is better than nesting two calls to `devirtualize_varray`, because it instantiates fewer
* cases.
* A devirtualizer to be used with #call_with_devirtualized_parameters.
*
* This one is very simple, it does not perform any actual devirtualization. It can be used to pass
* parameters to the function that shouldn't be devirtualized.
*/
template<typename T1, typename T2, typename Func>
inline void devirtualize_varray2(const VArray<T1> &varray1,
const VArray<T2> &varray2,
const Func &func,
bool enable = true)
{
using namespace devirtualize_parameters;
if (enable) {
Devirtualizer<decltype(func), VArray<T1>, VArray<T2>> devirtualizer(func, &varray1, &varray2);
constexpr DeviMode devi_mode = DeviMode::Single | DeviMode::Span;
devirtualizer.try_execute_devirtualized(DeviModeSequence<devi_mode, devi_mode>());
if (devirtualizer.executed()) {
return;
}
template<typename T> struct BasicDevirtualizer {
const T value;
template<typename Fn> bool devirtualize(const Fn &fn) const
{
return fn(this->value);
}
func(varray1, varray2);
}
};
} // namespace blender