3 changed files with 59 additions and 11 deletions
--- a/source/blender/blenlib/BLI_cache_mutex.hh
+++ b/source/blender/blenlib/BLI_cache_mutex.hh
@ -63,14 +63,23 @@

 #include "BLI_function_ref.hh"

+namespace tbb {
+class task_group;
+}
+
 namespace blender {

 class CacheMutex {
 private:
  std::mutex mutex_;
  std::atomic<bool> cache_valid_ = false;
+  bool is_computing_in_group_ = false;
+  std::unique_ptr<tbb::task_group> task_group_;

 public:
+  CacheMutex();
+  ~CacheMutex();
+
  /**
   * Make sure the cache exists and is up to date. This calls `compute_cache` once to update the
   * cache (which is stored outside of this class) if it is dirty, otherwise it does nothing.
@ -78,7 +87,7 @@ class CacheMutex {
   * This function is thread-safe under the assumption that the same parameters are passed from
   * every thread.
   */
-  void ensure(FunctionRef<void()> compute_cache);
+  void ensure(FunctionRef<void()> compute_cache, bool is_expensive = false);

  /**
   * Reset the cache. The next time #ensure is called, it will recompute that code.
--- a/source/blender/blenlib/BLI_shared_cache.hh
+++ b/source/blender/blenlib/BLI_shared_cache.hh
@ -57,9 +57,9 @@ template<typename T> class SharedCache {
   * If the cache is dirty, trigger its computation with the provided function which should set
   * the proper data.
   */
-  void ensure(FunctionRef<void(T &data)> compute_cache)
+  void ensure(FunctionRef<void(T &data)> compute_cache, const bool is_expensive = false)
  {
-    cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); });
+    cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); }, is_expensive);
  }

  /**
@ -68,7 +68,7 @@ template<typename T> class SharedCache {
   * the recalculation is only expected to make a small change to the cached data, since using
   * #tag_dirty() and #ensure() separately may require rebuilding the cache from scratch.
   */
-  void update(FunctionRef<void(T &data)> compute_cache)
+  void update(FunctionRef<void(T &data)> compute_cache, const bool is_expensive = false)
  {
    if (cache_.unique()) {
      cache_->mutex.tag_dirty();
@ -76,7 +76,7 @@ template<typename T> class SharedCache {
    else {
      cache_ = std::make_shared<CacheData>(cache_->data);
    }
-    cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); });
+    cache_->mutex.ensure([&]() { compute_cache(this->cache_->data); }, is_expensive);
  }

  /** Retrieve the cached data. */
--- a/source/blender/blenlib/intern/cache_mutex.cc
+++ b/source/blender/blenlib/intern/cache_mutex.cc
@ -5,23 +5,62 @@
 #include "BLI_cache_mutex.hh"
 #include "BLI_task.hh"

+#ifdef WITH_TBB
+#  include <tbb/task_group.h>
+#endif
+
 namespace blender {

-void CacheMutex::ensure(const FunctionRef<void()> compute_cache)
+CacheMutex::CacheMutex() = default;
+CacheMutex::~CacheMutex() = default;
+
+void CacheMutex::ensure(const FunctionRef<void()> compute_cache, const bool is_expensive)
 {
  if (cache_valid_.load(std::memory_order_acquire)) {
+    /* Fast case when the cache is computed already. */
    return;
  }
-  std::scoped_lock lock{mutex_};
+  mutex_.lock();
  /* Double checked lock. */
  if (cache_valid_.load(std::memory_order_relaxed)) {
+    mutex_.unlock();
    return;
  }
-  /* Use task isolation because a mutex is locked and the cache computation might use
-   * multi-threading. */
-  threading::isolate_task(compute_cache);
+  if (is_computing_in_group_) {
+    /* When another thread is already computing the cache, call `wait` on the task group instead.
+     * This allows the current thread to steal work from somewhere else instead of being idle until
+     * the cache computation is done. */
+    mutex_.unlock();
+    while (!cache_valid_.load(std::memory_order_acquire)) {
+      task_group_->wait();
+    }
+    return;
+  }
+  /* If the cache computation is expensive, we want to make sure that other threads waiting for the
+   * cache can continue to do some work instead of being idle until the cache is ready. */
+  if (is_expensive) {
+    if (!task_group_) {
+      task_group_ = std::make_unique<tbb::task_group>();
+    }
+    is_computing_in_group_ = true;
+    mutex_.unlock();

-  cache_valid_.store(true, std::memory_order_release);
+    /* Run the actual computation when the mutex is not locked. This is necessary, so that other
+     * threads can lock the mutex in the mean-time. Task isolation is not necessary because the
+     * mutex is not locked. */
+    task_group_->run_and_wait(compute_cache);
+
+    std::scoped_lock lock{mutex_};
+    is_computing_in_group_ = false;
+    cache_valid_.store(true, std::memory_order_release);
+  }
+  else {
+    /* Use task isolation because a mutex is locked and the cache computation might use
+     * multi-threading. */
+    threading::isolate_task(compute_cache);
+    cache_valid_.store(true, std::memory_order_release);
+    mutex_.unlock();
+  }
 }

 }  // namespace blender