2020-06-16 16:35:57 +02:00
|
|
|
/*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
* \ingroup fn
|
|
|
|
*
|
|
|
|
* A `MultiFunction` encapsulates a function that is optimized for throughput (instead of latency).
|
|
|
|
* The throughput is optimized by always processing many elements at once, instead of each element
|
|
|
|
* separately. This is ideal for functions that are evaluated often (e.g. for every particle).
|
|
|
|
*
|
|
|
|
* By processing a lot of data at once, individual functions become easier to optimize for humans
|
|
|
|
* and for the compiler. Furthermore, performance profiles become easier to understand and show
|
|
|
|
* better where bottlenecks are.
|
|
|
|
*
|
|
|
|
* Every multi-function has a name and an ordered list of parameters. Parameters are used for input
|
|
|
|
* and output. In fact, there are three kinds of parameters: inputs, outputs and mutable (which is
|
|
|
|
* combination of input and output).
|
|
|
|
*
|
|
|
|
* To call a multi-function, one has to provide three things:
|
|
|
|
* - `MFParams`: This references the input and output arrays that the function works with. The
|
|
|
|
* arrays are not owned by MFParams.
|
|
|
|
* - `IndexMask`: An array of indices indicating which indices in the provided arrays should be
|
|
|
|
* touched/processed.
|
|
|
|
* - `MFContext`: Further information for the called function.
|
|
|
|
*
|
|
|
|
* A new multi-function is generally implemented as follows:
|
|
|
|
* 1. Create a new subclass of MultiFunction.
|
|
|
|
* 2. Implement a constructor that initialized the signature of the function.
|
|
|
|
* 3. Override the `call` function.
|
|
|
|
*/
|
|
|
|
|
2020-07-08 15:02:47 +02:00
|
|
|
#include "BLI_hash.hh"
|
|
|
|
|
2020-06-16 16:35:57 +02:00
|
|
|
#include "FN_multi_function_context.hh"
|
|
|
|
#include "FN_multi_function_params.hh"
|
|
|
|
|
2020-07-03 14:25:20 +02:00
|
|
|
namespace blender::fn {
|
2020-06-16 16:35:57 +02:00
|
|
|
|
|
|
|
class MultiFunction {
|
|
|
|
private:
|
2021-03-22 11:57:24 +01:00
|
|
|
const MFSignature *signature_ref_ = nullptr;
|
2020-06-16 16:35:57 +02:00
|
|
|
|
|
|
|
public:
|
|
|
|
virtual ~MultiFunction()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2021-12-09 21:16:25 +11:00
|
|
|
/**
|
|
|
|
* The result is the same as using #call directly but this method has some additional features.
|
|
|
|
* - Automatic multi-threading when possible and appropriate.
|
|
|
|
* - Automatic index mask offsetting to avoid large temporary intermediate arrays that are mostly
|
|
|
|
* unused.
|
|
|
|
*/
|
Geometry Nodes: refactor multi-threading in field evaluation
Previously, there was a fixed grain size for all multi-functions. That was
not sufficient because some functions could benefit a lot from smaller
grain sizes.
This refactors adds a new `MultiFunction::call_auto` method which has the
same effect as just calling `MultiFunction::call` but additionally figures
out how to execute the specific multi-function efficiently. It determines
a good grain size and decides whether the mask indices should be shifted
or not.
Most multi-function evaluations benefit from this, but medium sized work
loads (1000 - 50000 elements) benefit from it the most. Especially when
expensive multi-functions (e.g. noise) is involved. This is because for
smaller work loads, threading is rarely used and for larger work loads
threading worked fine before already.
With this patch, multi-functions can specify execution hints, that allow
the caller to execute it most efficiently. These execution hints still
have to be added to more functions.
Some performance measurements of a field evaluation involving noise and
math nodes, ordered by the number of elements being evaluated:
```
1,000,000: 133 ms -> 120 ms
100,000: 30 ms -> 18 ms
10,000: 20 ms -> 2.7 ms
1,000: 4 ms -> 0.5 ms
100: 0.5 ms -> 0.4 ms
```
2021-11-26 11:05:47 +01:00
|
|
|
void call_auto(IndexMask mask, MFParams params, MFContext context) const;
|
2020-06-16 16:35:57 +02:00
|
|
|
virtual void call(IndexMask mask, MFParams params, MFContext context) const = 0;
|
|
|
|
|
2020-07-20 12:16:20 +02:00
|
|
|
virtual uint64_t hash() const
|
2020-07-08 15:02:47 +02:00
|
|
|
{
|
2021-03-25 16:01:28 +01:00
|
|
|
return get_default_hash(this);
|
2020-07-08 15:02:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
virtual bool equals(const MultiFunction &UNUSED(other)) const
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-07-20 12:16:20 +02:00
|
|
|
int param_amount() const
|
2020-07-12 12:38:03 +02:00
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
return signature_ref_->param_types.size();
|
2020-07-12 12:38:03 +02:00
|
|
|
}
|
|
|
|
|
2020-06-16 16:35:57 +02:00
|
|
|
IndexRange param_indices() const
|
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
return signature_ref_->param_types.index_range();
|
2020-06-16 16:35:57 +02:00
|
|
|
}
|
|
|
|
|
2020-07-20 12:16:20 +02:00
|
|
|
MFParamType param_type(int param_index) const
|
2020-06-16 16:35:57 +02:00
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
return signature_ref_->param_types[param_index];
|
2020-06-16 16:35:57 +02:00
|
|
|
}
|
|
|
|
|
2020-07-20 12:16:20 +02:00
|
|
|
StringRefNull param_name(int param_index) const
|
2020-06-16 16:35:57 +02:00
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
return signature_ref_->param_names[param_index];
|
2020-06-16 16:35:57 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
StringRefNull name() const
|
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
return signature_ref_->function_name;
|
2020-06-16 16:35:57 +02:00
|
|
|
}
|
|
|
|
|
2021-11-21 12:37:04 +01:00
|
|
|
virtual std::string debug_name() const;
|
|
|
|
|
2020-07-21 17:20:05 +02:00
|
|
|
bool depends_on_context() const
|
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
return signature_ref_->depends_on_context;
|
2020-07-21 17:20:05 +02:00
|
|
|
}
|
|
|
|
|
2020-06-16 16:35:57 +02:00
|
|
|
const MFSignature &signature() const
|
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
BLI_assert(signature_ref_ != nullptr);
|
|
|
|
return *signature_ref_;
|
2020-06-16 16:35:57 +02:00
|
|
|
}
|
|
|
|
|
Geometry Nodes: refactor multi-threading in field evaluation
Previously, there was a fixed grain size for all multi-functions. That was
not sufficient because some functions could benefit a lot from smaller
grain sizes.
This refactors adds a new `MultiFunction::call_auto` method which has the
same effect as just calling `MultiFunction::call` but additionally figures
out how to execute the specific multi-function efficiently. It determines
a good grain size and decides whether the mask indices should be shifted
or not.
Most multi-function evaluations benefit from this, but medium sized work
loads (1000 - 50000 elements) benefit from it the most. Especially when
expensive multi-functions (e.g. noise) is involved. This is because for
smaller work loads, threading is rarely used and for larger work loads
threading worked fine before already.
With this patch, multi-functions can specify execution hints, that allow
the caller to execute it most efficiently. These execution hints still
have to be added to more functions.
Some performance measurements of a field evaluation involving noise and
math nodes, ordered by the number of elements being evaluated:
```
1,000,000: 133 ms -> 120 ms
100,000: 30 ms -> 18 ms
10,000: 20 ms -> 2.7 ms
1,000: 4 ms -> 0.5 ms
100: 0.5 ms -> 0.4 ms
```
2021-11-26 11:05:47 +01:00
|
|
|
/**
|
|
|
|
* Information about how the multi-function behaves that help a caller to execute it efficiently.
|
|
|
|
*/
|
|
|
|
struct ExecutionHints {
|
|
|
|
/**
|
|
|
|
* Suggested minimum workload under which multi-threading does not really help.
|
|
|
|
* This should be lowered when the multi-function is doing something computationally expensive.
|
|
|
|
*/
|
|
|
|
int64_t min_grain_size = 10000;
|
|
|
|
/**
|
|
|
|
* Indicates that the multi-function will allocate an array large enough to hold all indices
|
|
|
|
* passed in as mask. This tells the caller that it would be preferable to pass in smaller
|
|
|
|
* indices. Also maybe the full mask should be split up into smaller segments to decrease peak
|
|
|
|
* memory usage.
|
|
|
|
*/
|
|
|
|
bool allocates_array = false;
|
|
|
|
/**
|
|
|
|
* Tells the caller that every execution takes about the same time. This helps making a more
|
|
|
|
* educated guess about a good grain size.
|
|
|
|
*/
|
|
|
|
bool uniform_execution_time = true;
|
|
|
|
};
|
|
|
|
|
|
|
|
ExecutionHints execution_hints() const;
|
|
|
|
|
2020-06-16 16:35:57 +02:00
|
|
|
protected:
|
2021-03-22 11:57:24 +01:00
|
|
|
/* Make the function use the given signature. This should be called once in the constructor of
|
|
|
|
* child classes. No copy of the signature is made, so the caller has to make sure that the
|
|
|
|
* signature lives as long as the multi function. It is ok to embed the signature into the child
|
|
|
|
* class. */
|
|
|
|
void set_signature(const MFSignature *signature)
|
2020-06-16 16:35:57 +02:00
|
|
|
{
|
2021-03-22 11:57:24 +01:00
|
|
|
/* Take a pointer as argument, so that it is more obvious that no copy is created. */
|
|
|
|
BLI_assert(signature != nullptr);
|
|
|
|
signature_ref_ = signature;
|
2020-06-16 16:35:57 +02:00
|
|
|
}
|
Geometry Nodes: refactor multi-threading in field evaluation
Previously, there was a fixed grain size for all multi-functions. That was
not sufficient because some functions could benefit a lot from smaller
grain sizes.
This refactors adds a new `MultiFunction::call_auto` method which has the
same effect as just calling `MultiFunction::call` but additionally figures
out how to execute the specific multi-function efficiently. It determines
a good grain size and decides whether the mask indices should be shifted
or not.
Most multi-function evaluations benefit from this, but medium sized work
loads (1000 - 50000 elements) benefit from it the most. Especially when
expensive multi-functions (e.g. noise) is involved. This is because for
smaller work loads, threading is rarely used and for larger work loads
threading worked fine before already.
With this patch, multi-functions can specify execution hints, that allow
the caller to execute it most efficiently. These execution hints still
have to be added to more functions.
Some performance measurements of a field evaluation involving noise and
math nodes, ordered by the number of elements being evaluated:
```
1,000,000: 133 ms -> 120 ms
100,000: 30 ms -> 18 ms
10,000: 20 ms -> 2.7 ms
1,000: 4 ms -> 0.5 ms
100: 0.5 ms -> 0.4 ms
```
2021-11-26 11:05:47 +01:00
|
|
|
|
|
|
|
virtual ExecutionHints get_execution_hints() const;
|
2020-06-16 16:35:57 +02:00
|
|
|
};
|
|
|
|
|
2021-09-14 14:52:44 +02:00
|
|
|
inline MFParamsBuilder::MFParamsBuilder(const MultiFunction &fn, int64_t mask_size)
|
|
|
|
: MFParamsBuilder(fn.signature(), IndexMask(mask_size))
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
inline MFParamsBuilder::MFParamsBuilder(const MultiFunction &fn, const IndexMask *mask)
|
|
|
|
: MFParamsBuilder(fn.signature(), *mask)
|
2020-06-16 16:35:57 +02:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-12-02 13:25:25 +01:00
|
|
|
namespace multi_function_types {
|
|
|
|
using fn::CPPType;
|
|
|
|
using fn::GMutableSpan;
|
|
|
|
using fn::GSpan;
|
|
|
|
using fn::MFContext;
|
|
|
|
using fn::MFContextBuilder;
|
|
|
|
using fn::MFDataType;
|
|
|
|
using fn::MFParams;
|
|
|
|
using fn::MFParamsBuilder;
|
|
|
|
using fn::MFParamType;
|
|
|
|
using fn::MultiFunction;
|
|
|
|
} // namespace multi_function_types
|
|
|
|
|
2020-07-03 14:25:20 +02:00
|
|
|
} // namespace blender::fn
|