Geometry Nodes: avoid using enumerable thread specific on single thread

The geometry nodes evaluator supports "lazy threading", i.e. it starts out
single-threaded. But when it determines that multi-threading can be
benefitial, it switches to multi-threaded mode.

Now it only creates an enumerable-thread-specific if it is actually using
multiple threads. This results in a 6% speedup in my test file with many
node groups and math nodes.
This commit is contained in:
2022-12-29 21:05:41 +01:00
parent c744d5453f
commit dba2d82846

View File

@@ -245,8 +245,11 @@ class Executor {
* A separate linear allocator for every thread. We could potentially reuse some memory, but that * A separate linear allocator for every thread. We could potentially reuse some memory, but that
* doesn't seem worth it yet. * doesn't seem worth it yet.
*/ */
threading::EnumerableThreadSpecific<LinearAllocator<>> local_allocators_; struct ThreadLocalData {
LinearAllocator<> *main_local_allocator_ = nullptr; LinearAllocator<> allocator;
};
std::unique_ptr<threading::EnumerableThreadSpecific<ThreadLocalData>> thread_locals_;
LinearAllocator<> main_allocator_;
/** /**
* Set to false when the first execution ends. * Set to false when the first execution ends.
*/ */
@@ -259,7 +262,6 @@ class Executor {
{ {
/* The indices are necessary, because they are used as keys in #node_states_. */ /* The indices are necessary, because they are used as keys in #node_states_. */
BLI_assert(self_.graph_.node_indices_are_valid()); BLI_assert(self_.graph_.node_indices_are_valid());
main_local_allocator_ = &local_allocators_.local();
} }
~Executor() ~Executor()
@@ -338,17 +340,26 @@ class Executor {
Span<const Node *> nodes = self_.graph_.nodes(); Span<const Node *> nodes = self_.graph_.nodes();
node_states_.reinitialize(nodes.size()); node_states_.reinitialize(nodes.size());
/* Construct all node states in parallel. */ auto construct_node_range = [&](const IndexRange range, LinearAllocator<> &allocator) {
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
LinearAllocator<> &allocator = local_allocators_.local();
for (const int i : range) { for (const int i : range) {
const Node &node = *nodes[i]; const Node &node = *nodes[i];
NodeState &node_state = *allocator.construct<NodeState>().release(); NodeState &node_state = *allocator.construct<NodeState>().release();
node_states_[i] = &node_state; node_states_[i] = &node_state;
this->construct_initial_node_state(allocator, node, node_state); this->construct_initial_node_state(allocator, node, node_state);
} }
};
if (nodes.size() <= 256) {
construct_node_range(nodes.index_range(), main_allocator_);
}
else {
this->ensure_thread_locals();
/* Construct all node states in parallel. */
threading::parallel_for(nodes.index_range(), 256, [&](const IndexRange range) {
LinearAllocator<> &allocator = this->get_main_or_local_allocator();
construct_node_range(range, allocator);
}); });
} }
}
void construct_initial_node_state(LinearAllocator<> &allocator, void construct_initial_node_state(LinearAllocator<> &allocator,
const Node &node, const Node &node,
@@ -1067,10 +1078,23 @@ class Executor {
if (BLI_system_thread_count() <= 1) { if (BLI_system_thread_count() <= 1) {
return false; return false;
} }
this->ensure_thread_locals();
task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH)); task_pool_.store(BLI_task_pool_create(this, TASK_PRIORITY_HIGH));
return true; return true;
} }
void ensure_thread_locals()
{
#ifdef FN_LAZY_FUNCTION_DEBUG_THREADS
if (current_main_thread_ != std::this_thread::get_id()) {
BLI_assert_unreachable();
}
#endif
if (!thread_locals_) {
thread_locals_ = std::make_unique<threading::EnumerableThreadSpecific<ThreadLocalData>>();
}
}
/** /**
* Allow other threads to steal all the nodes that are currently scheduled on this thread. * Allow other threads to steal all the nodes that are currently scheduled on this thread.
*/ */
@@ -1109,9 +1133,9 @@ class Executor {
LinearAllocator<> &get_main_or_local_allocator() LinearAllocator<> &get_main_or_local_allocator()
{ {
if (this->use_multi_threading()) { if (this->use_multi_threading()) {
return local_allocators_.local(); return thread_locals_->local().allocator;
} }
return *main_local_allocator_; return main_allocator_;
} }
}; };