WIP: BLI: reduce idle threads when computing caches #111686

Draft
Jacques Lucke wants to merge 3 commits from JacquesLucke/blender:reduce-idle-threads-when-computing-caches into main

When changing the target branch, be careful to rebase the branch in your fork to match. See documentation.
3 changed files with 59 additions and 11 deletions

View File

@ -63,14 +63,23 @@
#include "BLI_function_ref.hh"
namespace tbb {
class task_group;
}
namespace blender {
class CacheMutex {
private:
std::mutex mutex_;
std::atomic<bool> cache_valid_ = false;
bool is_computing_in_group_ = false;
std::unique_ptr<tbb::task_group> task_group_;
public:
CacheMutex();
~CacheMutex();
/**
* Make sure the cache exists and is up to date. This calls `compute_cache` once to update the
* cache (which is stored outside of this class) if it is dirty, otherwise it does nothing.
@ -78,7 +87,7 @@ class CacheMutex {
* This function is thread-safe under the assumption that the same parameters are passed from
* every thread.
*/
void ensure(FunctionRef<void()> compute_cache);
void ensure(FunctionRef<void()> compute_cache, bool is_expensive = false);
/**
* Reset the cache. The next time #ensure is called, it will recompute that code.

View File

@ -57,9 +57,9 @@ template<typename T> class SharedCache {
* If the cache is dirty, trigger its computation with the provided function which should set
* the proper data.
*/
void ensure(FunctionRef<void(T &data)> compute_cache)
void ensure(FunctionRef<void(T &data)> compute_cache, const bool is_expensive = false)
{
cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); });
cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); }, is_expensive);
}
/**
@ -68,7 +68,7 @@ template<typename T> class SharedCache {
* the recalculation is only expected to make a small change to the cached data, since using
* #tag_dirty() and #ensure() separately may require rebuilding the cache from scratch.
*/
void update(FunctionRef<void(T &data)> compute_cache)
void update(FunctionRef<void(T &data)> compute_cache, const bool is_expensive = false)
{
if (cache_.unique()) {
cache_->mutex.tag_dirty();
@ -76,7 +76,7 @@ template<typename T> class SharedCache {
else {
cache_ = std::make_shared<CacheData>(cache_->data);
}
cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); });
cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); }, is_expensive);
}
/** Retrieve the cached data. */

View File

@ -5,23 +5,62 @@
#include "BLI_cache_mutex.hh"
#include "BLI_task.hh"
#ifdef WITH_TBB
# include <tbb/task_group.h>
#endif
namespace blender {
void CacheMutex::ensure(const FunctionRef<void()> compute_cache)
CacheMutex::CacheMutex() = default;
CacheMutex::~CacheMutex() = default;
void CacheMutex::ensure(const FunctionRef<void()> compute_cache, const bool is_expensive)
{
if (cache_valid_.load(std::memory_order_acquire)) {
/* Fast case when the cache is computed already. */
return;
}
std::scoped_lock lock{mutex_};
mutex_.lock();
/* Double checked lock. */
if (cache_valid_.load(std::memory_order_relaxed)) {
mutex_.unlock();
return;
}
/* Use task isolation because a mutex is locked and the cache computation might use
* multi-threading. */
threading::isolate_task(compute_cache);
if (is_computing_in_group_) {
/* When another thread is already computing the cache, call `wait` on the task group instead.
* This allows the current thread to steal work from somewhere else instead of being idle until
* the cache computation is done. */
mutex_.unlock();
while (!cache_valid_.load(std::memory_order_acquire)) {
task_group_->wait();
}
return;
}
/* If the cache computation is expensive, we want to make sure that other threads waiting for the
* cache can continue to do some work instead of being idle until the cache is ready. */
if (is_expensive) {
if (!task_group_) {
task_group_ = std::make_unique<tbb::task_group>();
}
is_computing_in_group_ = true;
mutex_.unlock();
cache_valid_.store(true, std::memory_order_release);
/* Run the actual computation when the mutex is not locked. This is necessary, so that other
* threads can lock the mutex in the mean-time. Task isolation is not necessary because the
* mutex is not locked. */
task_group_->run_and_wait(compute_cache);
std::scoped_lock lock{mutex_};
is_computing_in_group_ = false;
cache_valid_.store(true, std::memory_order_release);
}
else {
/* Use task isolation because a mutex is locked and the cache computation might use
* multi-threading. */
threading::isolate_task(compute_cache);
cache_valid_.store(true, std::memory_order_release);
mutex_.unlock();
}
}
} // namespace blender