From 43f1c3db2be563b6bd150792581cd006f574da44 Mon Sep 17 00:00:00 2001
From: Michael Parkin-White <mparkinwhite@apple.com>
Date: Thu, 20 Apr 2023 11:32:02 +0100
Subject: [PATCH] Metal: TF more optimal for hair refinement

Patch prefers usage of Transform Feedback for hair refinement
as opposed to compute, as vertex work can be pipelined with
existing rendering work which is in-flight.

This approach is ~20% faster depending on the scene. Note that
the current implementation only uses TF, as storage buffer support
is disabled. Though once storage buffer support is added, we should
still use the TF path.

Authored by Apple: Michael Parkin-White
---
 source/blender/draw/intern/draw_curves.cc | 7 ++++++-
 source/blender/draw/intern/draw_hair.cc   | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/source/blender/draw/intern/draw_curves.cc b/source/blender/draw/intern/draw_curves.cc
index f44cd9ca0f4..ae3a908e483 100644
--- a/source/blender/draw/intern/draw_curves.cc
+++ b/source/blender/draw/intern/draw_curves.cc
@@ -33,7 +33,12 @@
 
 BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
 {
-  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
+  /* NOTE: Curve refine is faster using transform feedback via vertex processing pipeline with
+   * Metal and Apple Silicon GPUs. This is also because vertex work can more easily be executed in
+   * parallel with fragment work, whereas compute inserts an explicit dependency,
+   * due to switching of command encoder types. */
+  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support() &&
+      (GPU_backend_get_type() != GPU_BACKEND_METAL)) {
     return PART_REFINE_SHADER_COMPUTE;
   }
   if (GPU_transform_feedback_support()) {
diff --git a/source/blender/draw/intern/draw_hair.cc b/source/blender/draw/intern/draw_hair.cc
index a66a164f0fb..03a222a7157 100644
--- a/source/blender/draw/intern/draw_hair.cc
+++ b/source/blender/draw/intern/draw_hair.cc
@@ -36,7 +36,12 @@
 
 BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
 {
-  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
+  /* NOTE: Hair refine is faster using transform feedback via vertex processing pipeline with Metal
+   * and Apple Silicon GPUs. This is also because vertex work can more easily be executed in
+   * parallel with fragment work, whereas compute inserts an explicit dependency,
+   * due to switching of command encoder types. */
+  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support() &&
+      (GPU_backend_get_type() != GPU_BACKEND_METAL)) {
     return PART_REFINE_SHADER_COMPUTE;
   }
   if (GPU_transform_feedback_support()) {
-- 
2.30.2