This repository has been archived on 2023-10-09. You can view files and clone it, but cannot push or open issues or pull requests.
Files
blender-archive/source/blender/functions/intern/multi_function.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

140 lines
4.4 KiB
C++
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "FN_multi_function.hh"
#include "BLI_task.hh"
#include "BLI_threads.h"
2020-07-03 14:25:20 +02:00
namespace blender::fn {
using ExecutionHints = MultiFunction::ExecutionHints;
ExecutionHints MultiFunction::execution_hints() const
{
return this->get_execution_hints();
}
ExecutionHints MultiFunction::get_execution_hints() const
{
return ExecutionHints{};
}
static bool supports_threading_by_slicing_params(const MultiFunction &fn)
{
for (const int i : fn.param_indices()) {
const MFParamType param_type = fn.param_type(i);
if (ELEM(param_type.interface_type(),
MFParamType::InterfaceType::Mutable,
MFParamType::InterfaceType::Output)) {
if (param_type.data_type().is_vector()) {
return false;
}
}
}
return true;
}
static int64_t compute_grain_size(const ExecutionHints &hints, const IndexMask mask)
{
int64_t grain_size = hints.min_grain_size;
if (hints.uniform_execution_time) {
const int thread_count = BLI_system_thread_count();
/* Avoid using a small grain size even if it is not necessary. */
const int64_t thread_based_grain_size = mask.size() / thread_count / 4;
grain_size = std::max(grain_size, thread_based_grain_size);
}
if (hints.allocates_array) {
const int64_t max_grain_size = 10000;
/* Avoid allocating many large intermediate arrays. Better process data in smaller chunks to
* keep peak memory usage lower. */
grain_size = std::min(grain_size, max_grain_size);
}
return grain_size;
}
void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context) const
{
if (mask.is_empty()) {
return;
}
const ExecutionHints hints = this->execution_hints();
const int64_t grain_size = compute_grain_size(hints, mask);
if (mask.size() <= grain_size) {
this->call(mask, params, context);
return;
}
const bool supports_threading = supports_threading_by_slicing_params(*this);
if (!supports_threading) {
this->call(mask, params, context);
return;
}
threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) {
const IndexMask sliced_mask = mask.slice(sub_range);
if (!hints.allocates_array) {
/* There is no benefit to changing indices in this case. */
this->call(sliced_mask, params, context);
return;
}
if (sliced_mask[0] < grain_size) {
/* The indices are low, no need to offset them. */
this->call(sliced_mask, params, context);
return;
}
const int64_t input_slice_start = sliced_mask[0];
const int64_t input_slice_size = sliced_mask.last() - input_slice_start + 1;
const IndexRange input_slice_range{input_slice_start, input_slice_size};
Vector<int64_t> offset_mask_indices;
const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset_mask_indices);
MFParamsBuilder offset_params{*this, offset_mask.min_array_size()};
/* Slice all parameters so that for the actual function call. */
for (const int param_index : this->param_indices()) {
const MFParamType param_type = this->param_type(param_index);
switch (param_type.category()) {
Geometry Nodes: refactor array devirtualization Goals: * Better high level control over where devirtualization occurs. There is always a trade-off between performance and compile-time/binary-size. * Simplify using array devirtualization. * Better performance for cases where devirtualization wasn't used before. Many geometry nodes accept fields as inputs. Internally, that means that the execution functions have to accept so called "virtual arrays" as inputs. Those can be e.g. actual arrays, just single values, or lazily computed arrays. Due to these different possible virtual arrays implementations, access to individual elements is slower than it would be if everything was just a normal array (access does through a virtual function call). For more complex execution functions, this overhead does not matter, but for small functions (like a simple addition) it very much does. The virtual function call also prevents the compiler from doing some optimizations (e.g. loop unrolling and inserting simd instructions). The solution is to "devirtualize" the virtual arrays for small functions where the overhead is measurable. Essentially, the function is generated many times with different array types as input. Then there is a run-time dispatch that calls the best implementation. We have been doing devirtualization in e.g. math nodes for a long time already. This patch just generalizes the concept and makes it easier to control. It also makes it easier to investigate the different trade-offs when it comes to devirtualization. Nodes that we've optimized using devirtualization before didn't get a speedup. However, a couple of nodes are using devirtualization now, that didn't before. Those got a 2-4x speedup in common cases. * Map Range * Random Value * Switch * Combine XYZ Differential Revision: https://developer.blender.org/D14628
2022-04-26 17:12:34 +02:00
case MFParamCategory::SingleInput: {
const GVArray &varray = params.readonly_single_input(param_index);
offset_params.add_readonly_single_input(varray.slice(input_slice_range));
break;
}
Geometry Nodes: refactor array devirtualization Goals: * Better high level control over where devirtualization occurs. There is always a trade-off between performance and compile-time/binary-size. * Simplify using array devirtualization. * Better performance for cases where devirtualization wasn't used before. Many geometry nodes accept fields as inputs. Internally, that means that the execution functions have to accept so called "virtual arrays" as inputs. Those can be e.g. actual arrays, just single values, or lazily computed arrays. Due to these different possible virtual arrays implementations, access to individual elements is slower than it would be if everything was just a normal array (access does through a virtual function call). For more complex execution functions, this overhead does not matter, but for small functions (like a simple addition) it very much does. The virtual function call also prevents the compiler from doing some optimizations (e.g. loop unrolling and inserting simd instructions). The solution is to "devirtualize" the virtual arrays for small functions where the overhead is measurable. Essentially, the function is generated many times with different array types as input. Then there is a run-time dispatch that calls the best implementation. We have been doing devirtualization in e.g. math nodes for a long time already. This patch just generalizes the concept and makes it easier to control. It also makes it easier to investigate the different trade-offs when it comes to devirtualization. Nodes that we've optimized using devirtualization before didn't get a speedup. However, a couple of nodes are using devirtualization now, that didn't before. Those got a 2-4x speedup in common cases. * Map Range * Random Value * Switch * Combine XYZ Differential Revision: https://developer.blender.org/D14628
2022-04-26 17:12:34 +02:00
case MFParamCategory::SingleMutable: {
const GMutableSpan span = params.single_mutable(param_index);
const GMutableSpan sliced_span = span.slice(input_slice_range);
offset_params.add_single_mutable(sliced_span);
break;
}
Geometry Nodes: refactor array devirtualization Goals: * Better high level control over where devirtualization occurs. There is always a trade-off between performance and compile-time/binary-size. * Simplify using array devirtualization. * Better performance for cases where devirtualization wasn't used before. Many geometry nodes accept fields as inputs. Internally, that means that the execution functions have to accept so called "virtual arrays" as inputs. Those can be e.g. actual arrays, just single values, or lazily computed arrays. Due to these different possible virtual arrays implementations, access to individual elements is slower than it would be if everything was just a normal array (access does through a virtual function call). For more complex execution functions, this overhead does not matter, but for small functions (like a simple addition) it very much does. The virtual function call also prevents the compiler from doing some optimizations (e.g. loop unrolling and inserting simd instructions). The solution is to "devirtualize" the virtual arrays for small functions where the overhead is measurable. Essentially, the function is generated many times with different array types as input. Then there is a run-time dispatch that calls the best implementation. We have been doing devirtualization in e.g. math nodes for a long time already. This patch just generalizes the concept and makes it easier to control. It also makes it easier to investigate the different trade-offs when it comes to devirtualization. Nodes that we've optimized using devirtualization before didn't get a speedup. However, a couple of nodes are using devirtualization now, that didn't before. Those got a 2-4x speedup in common cases. * Map Range * Random Value * Switch * Combine XYZ Differential Revision: https://developer.blender.org/D14628
2022-04-26 17:12:34 +02:00
case MFParamCategory::SingleOutput: {
const GMutableSpan span = params.uninitialized_single_output_if_required(param_index);
if (span.is_empty()) {
offset_params.add_ignored_single_output();
}
else {
const GMutableSpan sliced_span = span.slice(input_slice_range);
offset_params.add_uninitialized_single_output(sliced_span);
}
break;
}
Geometry Nodes: refactor array devirtualization Goals: * Better high level control over where devirtualization occurs. There is always a trade-off between performance and compile-time/binary-size. * Simplify using array devirtualization. * Better performance for cases where devirtualization wasn't used before. Many geometry nodes accept fields as inputs. Internally, that means that the execution functions have to accept so called "virtual arrays" as inputs. Those can be e.g. actual arrays, just single values, or lazily computed arrays. Due to these different possible virtual arrays implementations, access to individual elements is slower than it would be if everything was just a normal array (access does through a virtual function call). For more complex execution functions, this overhead does not matter, but for small functions (like a simple addition) it very much does. The virtual function call also prevents the compiler from doing some optimizations (e.g. loop unrolling and inserting simd instructions). The solution is to "devirtualize" the virtual arrays for small functions where the overhead is measurable. Essentially, the function is generated many times with different array types as input. Then there is a run-time dispatch that calls the best implementation. We have been doing devirtualization in e.g. math nodes for a long time already. This patch just generalizes the concept and makes it easier to control. It also makes it easier to investigate the different trade-offs when it comes to devirtualization. Nodes that we've optimized using devirtualization before didn't get a speedup. However, a couple of nodes are using devirtualization now, that didn't before. Those got a 2-4x speedup in common cases. * Map Range * Random Value * Switch * Combine XYZ Differential Revision: https://developer.blender.org/D14628
2022-04-26 17:12:34 +02:00
case MFParamCategory::VectorInput:
case MFParamCategory::VectorMutable:
case MFParamCategory::VectorOutput: {
BLI_assert_unreachable();
break;
}
}
}
this->call(offset_mask, offset_params, context);
});
}
std::string MultiFunction::debug_name() const
{
return signature_ref_->function_name;
}
2020-07-03 14:25:20 +02:00
} // namespace blender::fn