WIP: Functions: new local allocator for better memory reuse and performance #104630

Draft
Jacques Lucke wants to merge 44 commits from JacquesLucke/blender:local-allocator into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
3 changed files with 35 additions and 26 deletions
Showing only changes of commit fb8948c7d5 - Show all commits

View File

@ -289,19 +289,24 @@ RenderWork Session::run_update_for_next_iteration()
RenderWork render_work;
thread_scoped_lock scene_lock(scene->mutex);
thread_scoped_lock reset_lock(delayed_reset_.mutex);
bool have_tiles = true;
bool switched_to_new_tile = false;
bool did_reset = false;
const bool did_reset = delayed_reset_.do_reset;
if (delayed_reset_.do_reset) {
thread_scoped_lock buffers_lock(buffers_mutex_);
do_delayed_reset();
/* Perform delayed reset if requested. */
{
thread_scoped_lock reset_lock(delayed_reset_.mutex);
if (delayed_reset_.do_reset) {
did_reset = true;
/* After reset make sure the tile manager is at the first big tile. */
have_tiles = tile_manager_.next();
switched_to_new_tile = true;
thread_scoped_lock buffers_lock(buffers_mutex_);
do_delayed_reset();
/* After reset make sure the tile manager is at the first big tile. */
have_tiles = tile_manager_.next();
switched_to_new_tile = true;
}
}
/* Update number of samples in the integrator.

View File

@ -1131,6 +1131,10 @@ bNodeSocket *node_find_indicated_socket(SpaceNode &snode,
snode.edittree->ensure_topology_cache();
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
if (socket_locations.is_empty()) {
/* Sockets haven't been drawn yet, e.g. when the file is currently opening. */
return nullptr;
}
const Span<bNode *> nodes = snode.edittree->all_nodes();
for (int i = nodes.index_range().last(); i >= 0; i--) {

View File

@ -129,14 +129,14 @@ template<typename MaskT, typename... Args, typename... ParamTags, size_t... I, t
#if (defined(__GNUC__) && !defined(__clang__))
[[gnu::optimize("-funroll-loops")]] [[gnu::optimize("O3")]]
#endif
void execute_array(
TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
ElementFn element_fn,
MaskT mask,
/* Use restrict to tell the compiler that pointer inputs do not alias each
* other. This is important for some compiler optimizations. */
Args &&__restrict... args)
inline void
execute_array(TypeSequence<ParamTags...> /*param_tags*/,
std::index_sequence<I...> /*indices*/,
ElementFn element_fn,
MaskT mask,
/* Use restrict to tell the compiler that pointer inputs do not alias each
* other. This is important for some compiler optimizations. */
Args &&__restrict... args)
{
for (const int64_t i : mask) {
element_fn([&]() -> decltype(auto) {
@ -174,11 +174,11 @@ template<typename ParamTag> struct MaterializeArgInfo {
* Similar to #execute_array but accepts two mask inputs, one for inputs and one for outputs.
*/
template<typename... ParamTags, typename ElementFn, typename... Chunks>
void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
const ElementFn element_fn,
const IndexRange in_mask,
const IndexMask out_mask,
Chunks &&__restrict... chunks)
inline void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
const ElementFn element_fn,
const IndexRange in_mask,
const IndexMask out_mask,
Chunks &&__restrict... chunks)
{
BLI_assert(in_mask.size() == out_mask.size());
for (const int64_t i : IndexRange(in_mask.size())) {
@ -205,11 +205,11 @@ void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
* chunks, which reduces virtual function call overhead.
*/
template<typename... ParamTags, size_t... I, typename ElementFn, typename... LoadedParams>
void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
std::index_sequence<I...> /* indices */,
const ElementFn element_fn,
const IndexMask mask,
const std::tuple<LoadedParams...> &loaded_params)
inline void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
std::index_sequence<I...> /* indices */,
const ElementFn element_fn,
const IndexMask mask,
const std::tuple<LoadedParams...> &loaded_params)
{
/* In theory, all elements could be processed in one chunk. However, that has the disadvantage