WIP: Functions: new local allocator for better memory reuse and performance #104630
|
@ -289,19 +289,24 @@ RenderWork Session::run_update_for_next_iteration()
|
|||
RenderWork render_work;
|
||||
|
||||
thread_scoped_lock scene_lock(scene->mutex);
|
||||
thread_scoped_lock reset_lock(delayed_reset_.mutex);
|
||||
|
||||
bool have_tiles = true;
|
||||
bool switched_to_new_tile = false;
|
||||
bool did_reset = false;
|
||||
|
||||
const bool did_reset = delayed_reset_.do_reset;
|
||||
if (delayed_reset_.do_reset) {
|
||||
thread_scoped_lock buffers_lock(buffers_mutex_);
|
||||
do_delayed_reset();
|
||||
/* Perform delayed reset if requested. */
|
||||
{
|
||||
thread_scoped_lock reset_lock(delayed_reset_.mutex);
|
||||
if (delayed_reset_.do_reset) {
|
||||
did_reset = true;
|
||||
|
||||
/* After reset make sure the tile manager is at the first big tile. */
|
||||
have_tiles = tile_manager_.next();
|
||||
switched_to_new_tile = true;
|
||||
thread_scoped_lock buffers_lock(buffers_mutex_);
|
||||
do_delayed_reset();
|
||||
|
||||
/* After reset make sure the tile manager is at the first big tile. */
|
||||
have_tiles = tile_manager_.next();
|
||||
switched_to_new_tile = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update number of samples in the integrator.
|
||||
|
|
|
@ -1131,6 +1131,10 @@ bNodeSocket *node_find_indicated_socket(SpaceNode &snode,
|
|||
|
||||
snode.edittree->ensure_topology_cache();
|
||||
const Span<float2> socket_locations = snode.runtime->all_socket_locations;
|
||||
if (socket_locations.is_empty()) {
|
||||
/* Sockets haven't been drawn yet, e.g. when the file is currently opening. */
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const Span<bNode *> nodes = snode.edittree->all_nodes();
|
||||
for (int i = nodes.index_range().last(); i >= 0; i--) {
|
||||
|
|
|
@ -129,14 +129,14 @@ template<typename MaskT, typename... Args, typename... ParamTags, size_t... I, t
|
|||
#if (defined(__GNUC__) && !defined(__clang__))
|
||||
[[gnu::optimize("-funroll-loops")]] [[gnu::optimize("O3")]]
|
||||
#endif
|
||||
void execute_array(
|
||||
TypeSequence<ParamTags...> /*param_tags*/,
|
||||
std::index_sequence<I...> /*indices*/,
|
||||
ElementFn element_fn,
|
||||
MaskT mask,
|
||||
/* Use restrict to tell the compiler that pointer inputs do not alias each
|
||||
* other. This is important for some compiler optimizations. */
|
||||
Args &&__restrict... args)
|
||||
inline void
|
||||
execute_array(TypeSequence<ParamTags...> /*param_tags*/,
|
||||
std::index_sequence<I...> /*indices*/,
|
||||
ElementFn element_fn,
|
||||
MaskT mask,
|
||||
/* Use restrict to tell the compiler that pointer inputs do not alias each
|
||||
* other. This is important for some compiler optimizations. */
|
||||
Args &&__restrict... args)
|
||||
{
|
||||
for (const int64_t i : mask) {
|
||||
element_fn([&]() -> decltype(auto) {
|
||||
|
@ -174,11 +174,11 @@ template<typename ParamTag> struct MaterializeArgInfo {
|
|||
* Similar to #execute_array but accepts two mask inputs, one for inputs and one for outputs.
|
||||
*/
|
||||
template<typename... ParamTags, typename ElementFn, typename... Chunks>
|
||||
void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
|
||||
const ElementFn element_fn,
|
||||
const IndexRange in_mask,
|
||||
const IndexMask out_mask,
|
||||
Chunks &&__restrict... chunks)
|
||||
inline void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
|
||||
const ElementFn element_fn,
|
||||
const IndexRange in_mask,
|
||||
const IndexMask out_mask,
|
||||
Chunks &&__restrict... chunks)
|
||||
{
|
||||
BLI_assert(in_mask.size() == out_mask.size());
|
||||
for (const int64_t i : IndexRange(in_mask.size())) {
|
||||
|
@ -205,11 +205,11 @@ void execute_materialized_impl(TypeSequence<ParamTags...> /*param_tags*/,
|
|||
* chunks, which reduces virtual function call overhead.
|
||||
*/
|
||||
template<typename... ParamTags, size_t... I, typename ElementFn, typename... LoadedParams>
|
||||
void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
|
||||
std::index_sequence<I...> /* indices */,
|
||||
const ElementFn element_fn,
|
||||
const IndexMask mask,
|
||||
const std::tuple<LoadedParams...> &loaded_params)
|
||||
inline void execute_materialized(TypeSequence<ParamTags...> /* param_tags */,
|
||||
std::index_sequence<I...> /* indices */,
|
||||
const ElementFn element_fn,
|
||||
const IndexMask mask,
|
||||
const std::tuple<LoadedParams...> &loaded_params)
|
||||
{
|
||||
|
||||
/* In theory, all elements could be processed in one chunk. However, that has the disadvantage
|
||||
|
|
Loading…
Reference in New Issue