Geometry Nodes: refactor multi-threading in field evaluation
Previously, there was a fixed grain size for all multi-functions. That was
not sufficient because some functions could benefit a lot from smaller
grain sizes.
This refactors adds a new `MultiFunction::call_auto` method which has the
same effect as just calling `MultiFunction::call` but additionally figures
out how to execute the specific multi-function efficiently. It determines
a good grain size and decides whether the mask indices should be shifted
or not.
Most multi-function evaluations benefit from this, but medium sized work
loads (1000 - 50000 elements) benefit from it the most. Especially when
expensive multi-functions (e.g. noise) is involved. This is because for
smaller work loads, threading is rarely used and for larger work loads
threading worked fine before already.
With this patch, multi-functions can specify execution hints, that allow
the caller to execute it most efficiently. These execution hints still
have to be added to more functions.
Some performance measurements of a field evaluation involving noise and
math nodes, ordered by the number of elements being evaluated:
```
1,000,000: 133 ms   -> 120 ms
  100,000:  30 ms   ->  18 ms
   10,000:  20 ms   ->   2.7 ms
    1,000:   4 ms   ->   0.5 ms
      100:   0.5 ms ->   0.4 ms
```
			
			
This commit is contained in:
		@@ -16,8 +16,141 @@
 | 
			
		||||
 | 
			
		||||
#include "FN_multi_function.hh"
 | 
			
		||||
 | 
			
		||||
#include "BLI_task.hh"
 | 
			
		||||
#include "BLI_threads.h"
 | 
			
		||||
 | 
			
		||||
namespace blender::fn {
 | 
			
		||||
 | 
			
		||||
using ExecutionHints = MultiFunction::ExecutionHints;
 | 
			
		||||
 | 
			
		||||
ExecutionHints MultiFunction::execution_hints() const
 | 
			
		||||
{
 | 
			
		||||
  return this->get_execution_hints();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
ExecutionHints MultiFunction::get_execution_hints() const
 | 
			
		||||
{
 | 
			
		||||
  return ExecutionHints{};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool supports_threading_by_slicing_params(const MultiFunction &fn)
 | 
			
		||||
{
 | 
			
		||||
  for (const int i : fn.param_indices()) {
 | 
			
		||||
    const MFParamType param_type = fn.param_type(i);
 | 
			
		||||
    if (ELEM(param_type.interface_type(),
 | 
			
		||||
             MFParamType::InterfaceType::Mutable,
 | 
			
		||||
             MFParamType::InterfaceType::Output)) {
 | 
			
		||||
      if (param_type.data_type().is_vector()) {
 | 
			
		||||
        return false;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int64_t compute_grain_size(const ExecutionHints &hints, const IndexMask mask)
 | 
			
		||||
{
 | 
			
		||||
  int64_t grain_size = hints.min_grain_size;
 | 
			
		||||
  if (hints.uniform_execution_time) {
 | 
			
		||||
    const int thread_count = BLI_system_thread_count();
 | 
			
		||||
    /* Avoid using a small grain size even if it is not necessary. */
 | 
			
		||||
    const int64_t thread_based_grain_size = mask.size() / thread_count / 4;
 | 
			
		||||
    grain_size = std::max(grain_size, thread_based_grain_size);
 | 
			
		||||
  }
 | 
			
		||||
  if (hints.allocates_array) {
 | 
			
		||||
    const int64_t max_grain_size = 10000;
 | 
			
		||||
    /* Avoid allocating many large intermediate arrays. Better process data in smaller chunks to
 | 
			
		||||
     * keep peak memory usage lower. */
 | 
			
		||||
    grain_size = std::min(grain_size, max_grain_size);
 | 
			
		||||
  }
 | 
			
		||||
  return grain_size;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * The result is the same as using #call directly but this method has some additional features.
 | 
			
		||||
 * - Automatic multi-threading when possible and appropriate.
 | 
			
		||||
 * - Automatic index mask offsetting to avoid large temporary intermediate arrays that are mostly
 | 
			
		||||
 *   unused.
 | 
			
		||||
 */
 | 
			
		||||
void MultiFunction::call_auto(IndexMask mask, MFParams params, MFContext context) const
 | 
			
		||||
{
 | 
			
		||||
  if (mask.is_empty()) {
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
  const ExecutionHints hints = this->execution_hints();
 | 
			
		||||
  const int64_t grain_size = compute_grain_size(hints, mask);
 | 
			
		||||
 | 
			
		||||
  if (mask.size() <= grain_size) {
 | 
			
		||||
    this->call(mask, params, context);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  const bool supports_threading = supports_threading_by_slicing_params(*this);
 | 
			
		||||
  if (!supports_threading) {
 | 
			
		||||
    this->call(mask, params, context);
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  threading::parallel_for(mask.index_range(), grain_size, [&](const IndexRange sub_range) {
 | 
			
		||||
    const IndexMask sliced_mask = mask.slice(sub_range);
 | 
			
		||||
    if (!hints.allocates_array) {
 | 
			
		||||
      /* There is no benefit to changing indices in this case. */
 | 
			
		||||
      this->call(sliced_mask, params, context);
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
    if (sliced_mask[0] < grain_size) {
 | 
			
		||||
      /* The indices are low, no need to offset them. */
 | 
			
		||||
      this->call(sliced_mask, params, context);
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
    const int64_t input_slice_start = sliced_mask[0];
 | 
			
		||||
    const int64_t input_slice_size = sliced_mask.last() - input_slice_start + 1;
 | 
			
		||||
    const IndexRange input_slice_range{input_slice_start, input_slice_size};
 | 
			
		||||
 | 
			
		||||
    Vector<int64_t> offset_mask_indices;
 | 
			
		||||
    const IndexMask offset_mask = mask.slice_and_offset(sub_range, offset_mask_indices);
 | 
			
		||||
 | 
			
		||||
    MFParamsBuilder offset_params{*this, offset_mask.min_array_size()};
 | 
			
		||||
 | 
			
		||||
    /* Slice all parameters so that for the actual function call. */
 | 
			
		||||
    for (const int param_index : this->param_indices()) {
 | 
			
		||||
      const MFParamType param_type = this->param_type(param_index);
 | 
			
		||||
      switch (param_type.category()) {
 | 
			
		||||
        case MFParamType::SingleInput: {
 | 
			
		||||
          const GVArray &varray = params.readonly_single_input(param_index);
 | 
			
		||||
          offset_params.add_readonly_single_input(varray.slice(input_slice_range));
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        case MFParamType::SingleMutable: {
 | 
			
		||||
          const GMutableSpan span = params.single_mutable(param_index);
 | 
			
		||||
          const GMutableSpan sliced_span = span.slice(input_slice_range);
 | 
			
		||||
          offset_params.add_single_mutable(sliced_span);
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        case MFParamType::SingleOutput: {
 | 
			
		||||
          const GMutableSpan span = params.uninitialized_single_output_if_required(param_index);
 | 
			
		||||
          if (span.is_empty()) {
 | 
			
		||||
            offset_params.add_ignored_single_output();
 | 
			
		||||
          }
 | 
			
		||||
          else {
 | 
			
		||||
            const GMutableSpan sliced_span = span.slice(input_slice_range);
 | 
			
		||||
            offset_params.add_uninitialized_single_output(sliced_span);
 | 
			
		||||
          }
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
        case MFParamType::VectorInput:
 | 
			
		||||
        case MFParamType::VectorMutable:
 | 
			
		||||
        case MFParamType::VectorOutput: {
 | 
			
		||||
          BLI_assert_unreachable();
 | 
			
		||||
          break;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    this->call(offset_mask, offset_params, context);
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string MultiFunction::debug_name() const
 | 
			
		||||
{
 | 
			
		||||
  return signature_ref_->function_name;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user