Functions: devirtualize virtual arrays in simple functions
In some multi-functions (such as a simple add function), the virtual method call overhead to access array elements adds significant overhead. For these simple functions it makes sense to generate optimized versions for different types of virtual arrays. This is done by giving the compiler all the information it needs to devirtualize virtual arrays. In my benchmark this speeds up processing a lot of data with small function 2-3x. This devirtualization should not be done for larger functions, because it increases compile time and binary size, while providing a negilible performance benefit.
This commit is contained in:
@@ -61,8 +61,11 @@ template<typename In1, typename Out1> class CustomMF_SI_SO : public MultiFunctio
|
||||
template<typename ElementFuncT> static FunctionT create_function(ElementFuncT element_fn)
|
||||
{
|
||||
return [=](IndexMask mask, const VArray<In1> &in1, MutableSpan<Out1> out1) {
|
||||
mask.foreach_index(
|
||||
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i])); });
|
||||
/* Devirtualization results in a 2-3x speedup for some simple functions. */
|
||||
devirtualize_varray(in1, [&](const auto &in1) {
|
||||
mask.foreach_index(
|
||||
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i])); });
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
@@ -111,8 +114,11 @@ class CustomMF_SI_SI_SO : public MultiFunction {
|
||||
const VArray<In1> &in1,
|
||||
const VArray<In2> &in2,
|
||||
MutableSpan<Out1> out1) {
|
||||
mask.foreach_index(
|
||||
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i])); });
|
||||
/* Devirtualization results in a 2-3x speedup for some simple functions. */
|
||||
devirtualize_varray2(in1, in2, [&](const auto &in1, const auto &in2) {
|
||||
mask.foreach_index(
|
||||
[&](int i) { new (static_cast<void *>(&out1[i])) Out1(element_fn(in1[i], in2[i])); });
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user