Geometry Nodes: avoid using enumerable thread specific on single thread
The geometry nodes evaluator supports "lazy threading", i.e. it starts out single-threaded. But when it determines that multi-threading can be benefitial, it switches to multi-threaded mode. Now it only creates an enumerable-thread-specific if it is actually using multiple threads. This results in a 6% speedup in my test file with many node groups and math nodes.
This commit is contained in:
@@ -245,8 +245,11 @@ class Executor {
|
|||||||
* A separate linear allocator for every thread. We could potentially reuse some memory, but that
|
* A separate linear allocator for every thread. We could potentially reuse some memory, but that
|
||||||
* doesn't seem worth it yet.
|
* doesn't seem worth it yet.
|
||||||
*/
|
*/
|
||||||
threading::EnumerableThreadSpecific<LinearAllocator<>> local_allocators_;
|
struct ThreadLocalData {
|
||||||
LinearAllocator<> *main_local_allocator_ = nullptr;
|
LinearAllocator<> allocator;
|
||||||
|
};
|
||||||
|
std::unique_ptr<threading::EnumerableThreadSpecific<ThreadLocalData>> thread_locals_;
|
||||||
|
LinearAllocator<> main_allocator_;
|
||||||
/**
|
/**
|
||||||
* Set to false when the first execution ends.
|
* Set to false when the first execution ends.
|
||||||
*/
|
*/
|
||||||
@@ -259,7 +262,6 @@ class Executor {
|
|||||||
{
|
{
|
||||||
/* The indices are necessary, because they are used as keys in #node_states_. */
|
/* The indices are necessary, because they are used as keys in #node_states_. */
|
||||||
BLI_assert(self_.graph_.node_indices_are_valid());
|
BLI_assert(self_.graph_.node_indices_are_valid());
|
||||||
main_local_allocator_ = &local_allocators_.local();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
~Executor()
|
~Executor()
|
||||||
@@ -338,17 +340,26 @@ class Executor {
|
|||||||
Span<const Node *> nodes = self_.graph_.nodes();
|
Span<const Node *> nodes = self_.graph_.nodes();
|
||||||
node_states_.reinitialize(nodes.size());
|
node_states_.reinitialize(nodes.size());
|
||||||
|
|
||||||
/* Construct all node states in parallel. */
|
auto construct_node_range = [&](const IndexRange range, LinearAllocator<> &allocator) {
|
||||||
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
|
|
||||||
LinearAllocator<> &allocator = local_allocators_.local();
|
|
||||||
for (const int i : range) {
|
for (const int i : range) {
|
||||||
const Node &node = *nodes[i];
|
const Node &node = *nodes[i];
|
||||||
NodeState &node_state = *allocator.construct<NodeState>().release();
|
NodeState &node_state = *allocator.construct<NodeState>().release();
|
||||||
node_states_[i] = &node_state;
|
node_states_[i] = &node_state;
|
||||||
this->construct_initial_node_state(allocator, node, node_state);
|
this->construct_initial_node_state(allocator, node, node_state);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
if (nodes.size() <= 256) {
|
||||||
|
construct_node_range(nodes.index_range(), main_allocator_);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this->ensure_thread_locals();
|
||||||
|
/* Construct all node states in parallel. */
|
||||||
|
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
|
||||||
|
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
|
||||||
|
construct_node_range(range, allocator);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void construct_initial_node_state(LinearAllocator<> &allocator,
|
void construct_initial_node_state(LinearAllocator<> &allocator,
|
||||||
const Node &node,
|
const Node &node,
|
||||||
@@ -1067,10 +1078,23 @@ class Executor {
|
|||||||
if (BLI_system_thread_count() <= 1) {
|
if (BLI_system_thread_count() <= 1) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
this->ensure_thread_locals();
|
||||||
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
|
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ensure_thread_locals()
|
||||||
|
{
|
||||||
|
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
|
||||||
|
if (current_main_thread_ != std::this_thread::get_id()) {
|
||||||
|
BLI_assert_unreachable();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (!thread_locals_) {
|
||||||
|
thread_locals_ = std::make_unique<threading::EnumerableThreadSpecific<ThreadLocalData>>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allow other threads to steal all the nodes that are currently scheduled on this thread.
|
* Allow other threads to steal all the nodes that are currently scheduled on this thread.
|
||||||
*/
|
*/
|
||||||
@@ -1109,9 +1133,9 @@ class Executor {
|
|||||||
LinearAllocator<> &get_main_or_local_allocator()
|
LinearAllocator<> &get_main_or_local_allocator()
|
||||||
{
|
{
|
||||||
if (this->use_multi_threading()) {
|
if (this->use_multi_threading()) {
|
||||||
return local_allocators_.local();
|
return thread_locals_->local().allocator;
|
||||||
}
|
}
|
||||||
return *main_local_allocator_;
|
return main_allocator_;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user