Merge remote-tracking branch 'origin' into temp-pbvh-split

temp-pbvh-split: fix compile error
Enable USD Preview Surface import by default
2023-01-23 09:14:43 -08:00 · 2023-01-23 09:11:57 -08:00 · 2023-01-23 12:02:38 -05:00 · 2023-01-23 17:59:07 +01:00 · 2023-01-23 08:47:46 -08:00 · 2023-01-23 17:47:21 +01:00
36 changed files with 656 additions and 180 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -506,7 +506,7 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_HIP        "Enable Cycles AMD HIP support" ON)
  option(WITH_CYCLES_HIP_BINARIES      "Build Cycles AMD HIP binaries" OFF)
-  set(CYCLES_HIP_BINARIES_ARCH gfx900 gfx906 gfx90c gfx902 gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
+  set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1100 gfx1101 gfx1102 CACHE STRING "AMD HIP architectures to build binaries for")
  mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
  mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
 endif()
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1671,10 +1671,10 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            elif device_type == 'HIP':
                import sys
                if sys.platform[:3] == "win":
-                    col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
+                    col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
                    col.label(text="and AMD Radeon Pro 21.Q4 driver or newer", icon='BLANK1')
                elif sys.platform.startswith("linux"):
-                    col.label(text="Requires AMD GPU with Vega or RDNA architecture", icon='BLANK1')
+                    col.label(text="Requires AMD GPU with RDNA architecture", icon='BLANK1')
                    col.label(text="and AMD driver version 22.10 or newer", icon='BLANK1')
            elif device_type == 'ONEAPI':
                import sys
--- a/intern/cycles/device/hip/util.h
+++ b/intern/cycles/device/hip/util.h
@@ -51,7 +51,7 @@ static inline bool hipSupportsDevice(const int hipDevId)
  hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
  hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);

-  return (major >= 9);
+  return (major >= 10);
 }

 CCL_NAMESPACE_END
--- a/intern/ghost/intern/GHOST_ContextCGL.h
+++ b/intern/ghost/intern/GHOST_ContextCGL.h
@@ -23,6 +23,22 @@
@class NSView;

 class GHOST_ContextCGL : public GHOST_Context {
+
+ public:
+  /* Defines the number of simultaneous command buffers which can be in flight.
+   * The default limit of `64` is considered to be optimal for Blender. Too many command buffers
+   * will result in workload fragmnetation and additional system-level overhead. This limit should
+   * also only be increased if the application is consistently exceeding the limit, and there are
+   * no command buffer leaks.
+   *
+   * If this limit is reached, starting a new command buffer will fail. The Metal backend will
+   * therefore stall until completion and log a warning when this limit is reached in order to
+   * ensure correct function of the app.
+   *
+   * It is generally preferable to reduce the prevalence of GPU_flush or GPU Context switches
+   * (which will both break command submissions), rather than increasing this limit. */
+  static const int max_command_buffer_count = 64;
+
 public:
  /**
   * Constructor.
--- a/intern/ghost/intern/GHOST_ContextCGL.mm
+++ b/intern/ghost/intern/GHOST_ContextCGL.mm
@@ -529,7 +529,8 @@ void GHOST_ContextCGL::metalInit()
    id<MTLDevice> device = m_metalLayer.device;

    /* Create a command queue for blit/present operation. */
-    m_metalCmdQueue = (MTLCommandQueue *)[device newCommandQueue];
+    m_metalCmdQueue = (MTLCommandQueue *)[device
+        newCommandQueueWithMaxCommandBufferCount:GHOST_ContextCGL::max_command_buffer_count];
    [m_metalCmdQueue retain];

    /* Create shaders for blit operation. */
--- a/release/datafiles/locale
+++ b/release/datafiles/locale
--- a/release/scripts/addons
+++ b/release/scripts/addons
--- a/source/blender/blenkernel/BKE_pbvh.h
+++ b/source/blender/blenkernel/BKE_pbvh.h
@@ -163,7 +163,8 @@ typedef enum {
  PBVH_UpdateTopology = 1 << 13,
  PBVH_UpdateColor = 1 << 14,
  PBVH_RebuildPixels = 1 << 15,
-  PBVH_TopologyUpdated = 1 << 16, /* Used internally by pbvh_bmesh.c */
+  PBVH_TexLeaf = 1 << 16,
+  PBVH_TopologyUpdated = 1 << 17, /* Used internally by pbvh_bmesh.c */

 } PBVHNodeFlags;

@@ -337,7 +338,12 @@ void BKE_pbvh_search_callback(PBVH *pbvh,

 void BKE_pbvh_search_gather(
    PBVH *pbvh, BKE_pbvh_SearchCallback scb, void *search_data, PBVHNode ***array, int *tot);
-
+void BKE_pbvh_search_gather_ex(PBVH *pbvh,
+                               BKE_pbvh_SearchCallback scb,
+                               void *search_data,
+                               PBVHNode ***r_array,
+                               int *r_tot,
+                               PBVHNodeFlags leaf_flag);
 /* Ray-cast
 * the hit callback is called for all leaf nodes intersecting the ray;
 * it's up to the callback to find the primitive within the leaves that is
--- a/source/blender/blenkernel/BKE_pbvh_pixels.hh
+++ b/source/blender/blenkernel/BKE_pbvh_pixels.hh
@@ -200,6 +200,10 @@ struct NodeData {
  {
    undo_regions.clear();
    for (UDIMTilePixels &tile : tiles) {
+      if (tile.pixel_rows.size() == 0) {
+        continue;
+      }
+
      rcti region;
      BLI_rcti_init_minmax(&region);
      for (PackedPixelRow &pixel_row : tile.pixel_rows) {
--- a/source/blender/blenkernel/intern/pbvh.c
+++ b/source/blender/blenkernel/intern/pbvh.c
@@ -1020,7 +1020,9 @@ void BKE_pbvh_free(PBVH *pbvh)
      if (node->bm_other_verts) {
        BLI_gset_free(node->bm_other_verts, NULL);
      }
+    }

+    if (node->flag & (PBVH_Leaf | PBVH_TexLeaf)) {
      pbvh_node_pixels_free(node);
    }
  }
@@ -1094,7 +1096,7 @@ static void pbvh_stack_push(PBVHIter *iter, PBVHNode *node, bool revisiting)
  iter->stacksize++;
 }

-static PBVHNode *pbvh_iter_next(PBVHIter *iter)
+static PBVHNode *pbvh_iter_next(PBVHIter *iter, PBVHNodeFlags leaf_flag)
 {
  /* purpose here is to traverse tree, visiting child nodes before their
   * parents, this order is necessary for e.g. computing bounding boxes */
@@ -1121,7 +1123,7 @@ static PBVHNode *pbvh_iter_next(PBVHIter *iter)
      continue; /* don't traverse, outside of search zone */
    }

-    if (node->flag & PBVH_Leaf) {
+    if (node->flag & leaf_flag) {
      /* immediately hit leaf node */
      return node;
    }
@@ -1166,8 +1168,12 @@ static PBVHNode *pbvh_iter_next_occluded(PBVHIter *iter)
  return NULL;
 }

-void BKE_pbvh_search_gather(
-    PBVH *pbvh, BKE_pbvh_SearchCallback scb, void *search_data, PBVHNode ***r_array, int *r_tot)
+void BKE_pbvh_search_gather_ex(PBVH *pbvh,
+                               BKE_pbvh_SearchCallback scb,
+                               void *search_data,
+                               PBVHNode ***r_array,
+                               int *r_tot,
+                               PBVHNodeFlags leaf_flag)
 {
  PBVHIter iter;
  PBVHNode **array = NULL, *node;
@@ -1175,8 +1181,8 @@ void BKE_pbvh_search_gather(

  pbvh_iter_begin(&iter, pbvh, scb, search_data);

-  while ((node = pbvh_iter_next(&iter))) {
-    if (node->flag & PBVH_Leaf) {
+  while ((node = pbvh_iter_next(&iter, leaf_flag))) {
+    if (node->flag & leaf_flag) {
      if (UNLIKELY(tot == space)) {
        /* resize array if needed */
        space = (tot == 0) ? 32 : space * 2;
@@ -1199,6 +1205,12 @@ void BKE_pbvh_search_gather(
  *r_tot = tot;
 }

+void BKE_pbvh_search_gather(
+    PBVH *pbvh, BKE_pbvh_SearchCallback scb, void *search_data, PBVHNode ***r_array, int *r_tot)
+{
+  BKE_pbvh_search_gather_ex(pbvh, scb, search_data, r_array, r_tot, PBVH_Leaf);
+}
+
 void BKE_pbvh_search_callback(PBVH *pbvh,
                              BKE_pbvh_SearchCallback scb,
                              void *search_data,
@@ -1210,7 +1222,7 @@ void BKE_pbvh_search_callback(PBVH *pbvh,

  pbvh_iter_begin(&iter, pbvh, scb, search_data);

-  while ((node = pbvh_iter_next(&iter))) {
+  while ((node = pbvh_iter_next(&iter, PBVH_Leaf))) {
    if (node->flag & PBVH_Leaf) {
      hcb(node, hit_data);
    }
@@ -1946,7 +1958,7 @@ void BKE_pbvh_redraw_BB(PBVH *pbvh, float bb_min[3], float bb_max[3])

  pbvh_iter_begin(&iter, pbvh, NULL, NULL);

-  while ((node = pbvh_iter_next(&iter))) {
+  while ((node = pbvh_iter_next(&iter, PBVH_Leaf))) {
    if (node->flag & PBVH_UpdateRedraw) {
      BB_expand_with_bb(&bb, &node->vb);
    }
@@ -1966,7 +1978,7 @@ void BKE_pbvh_get_grid_updates(PBVH *pbvh, bool clear, void ***r_gridfaces, int

  pbvh_iter_begin(&iter, pbvh, NULL, NULL);

-  while ((node = pbvh_iter_next(&iter))) {
+  while ((node = pbvh_iter_next(&iter, PBVH_Leaf))) {
    if (node->flag & PBVH_UpdateNormals) {
      for (uint i = 0; i < node->totprim; i++) {
        void *face = pbvh->gridfaces[node->prim_indices[i]];
@@ -3147,9 +3159,24 @@ void BKE_pbvh_draw_debug_cb(PBVH *pbvh,
                                            PBVHNodeFlags flag),
                            void *user_data)
 {
+  PBVHNodeFlags flag = PBVH_Leaf;
+
  for (int a = 0; a < pbvh->totnode; a++) {
    PBVHNode *node = &pbvh->nodes[a];

+    if (node->flag & PBVH_TexLeaf) {
+      flag = PBVH_TexLeaf;
+      break;
+    }
+  }
+
+  for (int a = 0; a < pbvh->totnode; a++) {
+    PBVHNode *node = &pbvh->nodes[a];
+
+    if (!(node->flag & flag)) {
+      continue;
+    }
+
    draw_fn(node, user_data, node->vb.bmin, node->vb.bmax, node->flag);
  }
 }
--- a/source/blender/blenkernel/intern/pbvh_intern.h
+++ b/source/blender/blenkernel/intern/pbvh_intern.h
@@ -150,6 +150,8 @@ struct PBVH {
  int faces_num; /* Do not use directly, use BKE_pbvh_num_faces. */

  int leaf_limit;
+  int pixel_leaf_limit;
+  int depth_limit;

  /* Mesh data */
  struct Mesh *mesh;
--- a/source/blender/blenkernel/intern/pbvh_pixels.cc
+++ b/source/blender/blenkernel/intern/pbvh_pixels.cc
@@ -15,7 +15,9 @@

 #include "BLI_math.h"
 #include "BLI_task.h"
+#include "PIL_time.h"

+#include "BKE_global.h"
 #include "BKE_image_wrappers.hh"

 #include "bmesh.h"
@@ -25,12 +27,6 @@

 namespace blender::bke::pbvh::pixels {

-/**
- * During debugging this check could be enabled.
- * It will write to each image pixel that is covered by the PBVH.
- */
-constexpr bool USE_WATERTIGHT_CHECK = false;
-
 /**
 * Calculate the delta of two neighbor UV coordinates in the given image buffer.
 */
@@ -57,6 +53,315 @@ static float2 calc_barycentric_delta_x(const ImBuf *image_buffer,
  return calc_barycentric_delta(uvs, start_uv, end_uv);
 }

+static int count_node_pixels(PBVHNode &node)
+{
+  if (!node.pixels.node_data) {
+    return 0;
+  }
+
+  NodeData &data = BKE_pbvh_pixels_node_data_get(node);
+
+  int totpixel = 0;
+
+  for (UDIMTilePixels &tile : data.tiles) {
+    for (PackedPixelRow &row : tile.pixel_rows) {
+      totpixel += row.num_pixels;
+    }
+  }
+
+  return totpixel;
+}
+
+struct SplitQueueData {
+  ThreadQueue *new_nodes;
+  TaskPool *pool;
+
+  PBVH *pbvh;
+  Mesh *mesh;
+  Image *image;
+  ImageUser *image_user;
+};
+
+struct SplitNodePair {
+  SplitNodePair *parent;
+  PBVHNode node;
+  int children_offset = 0;
+  int depth = 0;
+  int source_index = -1;
+  bool is_old = false;
+  SplitQueueData *tdata;
+
+  SplitNodePair(SplitNodePair *node_parent = nullptr) : parent(node_parent)
+  {
+    memset(static_cast<void *>(&node), 0, sizeof(PBVHNode));
+  }
+};
+
+static void split_thread_job(TaskPool *__restrict pool, void *taskdata);
+
+static void split_pixel_node(PBVH *pbvh,
+                             SplitNodePair *split,
+                             Mesh *mesh,
+                             Image *image,
+                             ImageUser *image_user,
+                             SplitQueueData *tdata)
+{
+  BB cb;
+  PBVHNode *node = &split->node;
+
+  cb = node->vb;
+
+  if (count_node_pixels(*node) <= pbvh->pixel_leaf_limit || split->depth >= pbvh->depth_limit) {
+    BKE_pbvh_pixels_node_data_get(split->node).rebuild_undo_regions();
+    return;
+  }
+
+  /* Find widest axis and its midpoint */
+  const int axis = BB_widest_axis(&cb);
+  const float mid = (cb.bmax[axis] + cb.bmin[axis]) * 0.5f;
+
+  node->flag = (PBVHNodeFlags)((int)node->flag & (int)~PBVH_TexLeaf);
+
+  SplitNodePair *split1 = MEM_new<SplitNodePair>("split_pixel_node split1", split);
+  SplitNodePair *split2 = MEM_new<SplitNodePair>("split_pixel_node split1", split);
+
+  split1->depth = split->depth + 1;
+  split2->depth = split->depth + 1;
+
+  PBVHNode *child1 = &split1->node;
+  PBVHNode *child2 = &split2->node;
+
+  child1->flag = PBVH_TexLeaf;
+  child2->flag = PBVH_TexLeaf;
+
+  child1->vb = cb;
+  child1->vb.bmax[axis] = mid;
+
+  child2->vb = cb;
+  child2->vb.bmin[axis] = mid;
+
+  NodeData &data = BKE_pbvh_pixels_node_data_get(split->node);
+
+  NodeData *data1 = MEM_new<NodeData>(__func__);
+  NodeData *data2 = MEM_new<NodeData>(__func__);
+  child1->pixels.node_data = static_cast<void *>(data1);
+  child2->pixels.node_data = static_cast<void *>(data2);
+
+  data1->uv_primitives = data.uv_primitives;
+  data2->uv_primitives = data.uv_primitives;
+
+  data1->tiles.resize(data.tiles.size());
+  data2->tiles.resize(data.tiles.size());
+
+  for (int i : IndexRange(data.tiles.size())) {
+    UDIMTilePixels &tile = data.tiles[i];
+    UDIMTilePixels &tile1 = data1->tiles[i];
+    UDIMTilePixels &tile2 = data2->tiles[i];
+
+    tile1.tile_number = tile2.tile_number = tile.tile_number;
+    tile1.flags.dirty = tile2.flags.dirty = 0;
+  }
+
+  ImageUser image_user2 = *image_user;
+
+  for (int i : IndexRange(data.tiles.size())) {
+    const UDIMTilePixels &tile = data.tiles[i];
+
+    image_user2.tile = tile.tile_number;
+
+    ImBuf *image_buffer = BKE_image_acquire_ibuf(image, &image_user2, nullptr);
+    if (image_buffer == nullptr) {
+      continue;
+    }
+
+    const float(*vert_cos)[3] = BKE_pbvh_get_vert_positions(pbvh);
+    PBVHData &pbvh_data = BKE_pbvh_pixels_data_get(*pbvh);
+
+    for (const PackedPixelRow &row : tile.pixel_rows) {
+      UDIMTilePixels *tile1 = &data1->tiles[i];
+      UDIMTilePixels *tile2 = &data2->tiles[i];
+
+      UVPrimitivePaintInput &uv_prim = data.uv_primitives.paint_input[row.uv_primitive_index];
+      int3 tri = pbvh_data.geom_primitives.vert_indices[uv_prim.geometry_primitive_index];
+
+      float verts[3][3];
+
+      copy_v3_v3(verts[0], vert_cos[tri[0]]);
+      copy_v3_v3(verts[1], vert_cos[tri[1]]);
+      copy_v3_v3(verts[2], vert_cos[tri[2]]);
+
+      float2 delta = uv_prim.delta_barycentric_coord_u;
+      float2 uv1 = row.start_barycentric_coord;
+      float2 uv2 = row.start_barycentric_coord + delta * (float)row.num_pixels;
+
+      float co1[3];
+      float co2[3];
+
+      interp_barycentric_tri_v3(verts, uv1[0], uv1[1], co1);
+      interp_barycentric_tri_v3(verts, uv2[0], uv2[1], co2);
+
+      /* Are we spanning the midpoint? */
+      if ((co1[axis] <= mid) != (co2[axis] <= mid)) {
+        PackedPixelRow row1 = row;
+        float t;
+
+        if (mid < co1[axis]) {
+          t = 1.0f - (mid - co2[axis]) / (co1[axis] - co2[axis]);
+
+          SWAP(UDIMTilePixels *, tile1, tile2);
+        }
+        else {
+          t = (mid - co1[axis]) / (co2[axis] - co1[axis]);
+        }
+
+        int num_pixels = (int)floorf((float)row.num_pixels * t);
+
+        if (num_pixels) {
+          row1.num_pixels = num_pixels;
+          tile1->pixel_rows.append(row1);
+        }
+
+        if (num_pixels != row.num_pixels) {
+          PackedPixelRow row2 = row;
+
+          row2.num_pixels = row.num_pixels - num_pixels;
+
+          row2.start_barycentric_coord = row.start_barycentric_coord +
+                                         uv_prim.delta_barycentric_coord_u * (float)num_pixels;
+          row2.start_image_coordinate = row.start_image_coordinate;
+          row2.start_image_coordinate[0] += num_pixels;
+
+          tile2->pixel_rows.append(row2);
+        }
+      }
+      else if (co1[axis] <= mid && co2[axis] <= mid) {
+        tile1->pixel_rows.append(row);
+      }
+      else {
+        tile2->pixel_rows.append(row);
+      }
+    }
+
+    BKE_image_release_ibuf(image, image_buffer, nullptr);
+  }
+
+  data.undo_regions.clear();
+
+  if (node->flag & PBVH_Leaf) {
+    data.clear_data();
+  }
+  else {
+    pbvh_node_pixels_free(node);
+  }
+
+  BLI_thread_queue_push(tdata->new_nodes, static_cast<void *>(split1));
+  BLI_thread_queue_push(tdata->new_nodes, static_cast<void *>(split2));
+
+  BLI_task_pool_push(tdata->pool, split_thread_job, static_cast<void *>(split1), false, nullptr);
+  BLI_task_pool_push(tdata->pool, split_thread_job, static_cast<void *>(split2), false, nullptr);
+}
+
+static void split_flush_final_nodes(SplitQueueData *tdata)
+{
+  PBVH *pbvh = tdata->pbvh;
+  Vector<SplitNodePair *> splits;
+
+  while (!BLI_thread_queue_is_empty(tdata->new_nodes)) {
+    SplitNodePair *newsplit = static_cast<SplitNodePair *>(BLI_thread_queue_pop(tdata->new_nodes));
+
+    splits.append(newsplit);
+
+    if (newsplit->is_old) {
+      continue;
+    }
+
+    if (!newsplit->parent->children_offset) {
+      newsplit->parent->children_offset = pbvh->totnode;
+
+      pbvh_grow_nodes(pbvh, pbvh->totnode + 2);
+      newsplit->source_index = newsplit->parent->children_offset;
+    }
+    else {
+      newsplit->source_index = newsplit->parent->children_offset + 1;
+    }
+  }
+
+  for (SplitNodePair *split : splits) {
+    BLI_assert(split->source_index != -1);
+
+    split->node.children_offset = split->children_offset;
+    pbvh->nodes[split->source_index] = split->node;
+  }
+
+  for (SplitNodePair *split : splits) {
+    MEM_delete<SplitNodePair>(split);
+  }
+}
+
+static void split_thread_job(TaskPool *__restrict pool, void *taskdata)
+{
+
+  SplitQueueData *tdata = static_cast<SplitQueueData *>(BLI_task_pool_user_data(pool));
+  SplitNodePair *split = static_cast<SplitNodePair *>(taskdata);
+
+  split_pixel_node(tdata->pbvh, split, tdata->mesh, tdata->image, tdata->image_user, tdata);
+}
+
+static void split_pixel_nodes(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image_user)
+{
+  if (G.debug_value == 891) {
+    return;
+  }
+
+  if (!pbvh->depth_limit) {
+    pbvh->depth_limit = 40; /* TODO: move into a constant */
+  }
+
+  if (!pbvh->pixel_leaf_limit) {
+    pbvh->pixel_leaf_limit = 256 * 256; /* TODO: move into a constant */
+  }
+
+  SplitQueueData tdata;
+  TaskPool *pool = BLI_task_pool_create_suspended(&tdata, TASK_PRIORITY_HIGH);
+
+  tdata.pool = pool;
+  tdata.pbvh = pbvh;
+  tdata.mesh = mesh;
+  tdata.image = image;
+  tdata.image_user = image_user;
+
+  tdata.new_nodes = BLI_thread_queue_init();
+
+  /* Set up initial jobs before initializing threads. */
+  for (int i : IndexRange(pbvh->totnode)) {
+    if (pbvh->nodes[i].flag & PBVH_TexLeaf) {
+      SplitNodePair *split = MEM_new<SplitNodePair>("split_pixel_nodes split");
+
+      split->source_index = i;
+      split->is_old = true;
+      split->node = pbvh->nodes[i];
+      split->tdata = &tdata;
+
+      BLI_task_pool_push(pool, split_thread_job, static_cast<void *>(split), false, nullptr);
+
+      BLI_thread_queue_push(tdata.new_nodes, static_cast<void *>(split));
+    }
+  }
+
+  BLI_task_pool_work_and_wait(pool);
+  BLI_task_pool_free(pool);
+
+  split_flush_final_nodes(&tdata);
+
+  BLI_thread_queue_free(tdata.new_nodes);
+}
+
+/**
+ * During debugging this check could be enabled.
+ * It will write to each image pixel that is covered by the PBVH.
+ */
+constexpr bool USE_WATERTIGHT_CHECK = false;
+
 static void extract_barycentric_pixels(UDIMTilePixels &tile_data,
                                       const ImBuf *image_buffer,
                                       const uv_islands::UVIslandsMask &uv_mask,
@@ -233,7 +538,10 @@ static void do_encode_pixels(void *__restrict userdata,

 static bool should_pixels_be_updated(PBVHNode *node)
 {
-  if ((node->flag & PBVH_Leaf) == 0) {
+  if ((node->flag & (PBVH_Leaf | PBVH_TexLeaf)) == 0) {
+    return false;
+  }
+  if (node->children_offset != 0) {
    return false;
  }
  if ((node->flag & PBVH_RebuildPixels) != 0) {
@@ -349,17 +657,17 @@ static void apply_watertight_check(PBVH *pbvh, Image *image, ImageUser *image_us
  BKE_image_partial_update_mark_full_update(image);
 }

-static void update_pixels(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image_user)
+static bool update_pixels(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image_user)
 {
  Vector<PBVHNode *> nodes_to_update;

  if (!find_nodes_to_update(pbvh, nodes_to_update)) {
-    return;
+    return false;
  }

  const StringRef active_uv_name = CustomData_get_active_layer_name(&mesh->ldata, CD_PROP_FLOAT2);
  if (active_uv_name.is_empty()) {
-    return;
+    return false;
  }

  const AttributeAccessor attributes = mesh->attributes();
@@ -422,6 +730,15 @@ static void update_pixels(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image
    node->flag = static_cast<PBVHNodeFlags>(node->flag & ~PBVH_RebuildPixels);
  }

+  /* Add PBVH_TexLeaf flag */
+  for (int i : IndexRange(pbvh->totnode)) {
+    PBVHNode &node = pbvh->nodes[i];
+
+    if (node.flag & PBVH_Leaf) {
+      node.flag = (PBVHNodeFlags)((int)node.flag | (int)PBVH_TexLeaf);
+    }
+  }
+
 //#define DO_PRINT_STATISTICS
 #ifdef DO_PRINT_STATISTICS
  /* Print some statistics about compression ratio. */
@@ -434,7 +751,6 @@ static void update_pixels(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image
        continue;
      }
      NodeData *node_data = static_cast<NodeData *>(node->pixels.node_data);
-      compressed_data_len += node_data->triangles.mem_size();
      for (const UDIMTilePixels &tile_data : node_data->tiles) {
        compressed_data_len += tile_data.encoded_pixels.size() * sizeof(PackedPixelRow);
        for (const PackedPixelRow &encoded_pixels : tile_data.encoded_pixels) {
@@ -448,6 +764,8 @@ static void update_pixels(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image
           float(compressed_data_len) / num_pixels);
  }
 #endif
+
+  return true;
 }

 NodeData &BKE_pbvh_pixels_node_data_get(PBVHNode &node)
@@ -484,7 +802,6 @@ void BKE_pbvh_pixels_mark_image_dirty(PBVHNode &node, Image &image, ImageUser &i
    node_data->flags.dirty = false;
  }
 }
-
 }  // namespace blender::bke::pbvh::pixels

 extern "C" {
@@ -492,12 +809,19 @@ using namespace blender::bke::pbvh::pixels;

 void BKE_pbvh_build_pixels(PBVH *pbvh, Mesh *mesh, Image *image, ImageUser *image_user)
 {
-  update_pixels(pbvh, mesh, image, image_user);
+  if (update_pixels(pbvh, mesh, image, image_user)) {
+    split_pixel_nodes(pbvh, mesh, image, image_user);
+  }
 }

 void pbvh_node_pixels_free(PBVHNode *node)
 {
  NodeData *node_data = static_cast<NodeData *>(node->pixels.node_data);
+
+  if (!node_data) {
+    return;
+  }
+
  MEM_delete(node_data);
  node->pixels.node_data = nullptr;
 }
--- a/source/blender/draw/engines/basic/basic_engine.c
+++ b/source/blender/draw/engines/basic/basic_engine.c
@@ -10,9 +10,11 @@

 #include "DRW_render.h"

+#include "BKE_global.h"
 #include "BKE_object.h"
 #include "BKE_paint.h"
 #include "BKE_particle.h"
+#include "BKE_pbvh.h"

 #include "BLI_alloca.h"

@@ -219,6 +221,16 @@ static void basic_cache_populate(void *vedata, Object *ob)
        DRW_shgroup_call(shgrp, geom, ob);
      }
    }
+
+    if (G.debug_value == 889 && ob->sculpt && ob->sculpt->pbvh) {
+      int debug_node_nr = 0;
+      DRW_debug_modelmat(ob->object_to_world);
+      BKE_pbvh_draw_debug_cb(
+          ob->sculpt->pbvh,
+          (void (*)(void *d, const float min[3], const float max[3], PBVHNodeFlags f))
+              DRW_sculpt_debug_cb,
+          &debug_node_nr);
+    }
  }
 }

--- a/source/blender/draw/engines/eevee/eevee_lightcache.c
+++ b/source/blender/draw/engines/eevee/eevee_lightcache.c
@@ -395,7 +395,8 @@ static bool eevee_lightcache_static_load(LightCache *lcache)
  }

  if (lcache->grid_tx.tex == NULL) {
-    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT |
+                             GPU_TEXTURE_USAGE_HOST_READ;
    lcache->grid_tx.tex = GPU_texture_create_2d_array_ex("lightcache_irradiance",
                                                         UNPACK3(lcache->grid_tx.tex_size),
                                                         1,
@@ -716,7 +717,8 @@ static void eevee_lightbake_create_resources(EEVEE_LightBake *lbake)
                                                    lbake->irr_size[1],
                                                    lbake->irr_size[2],
                                                    IRRADIANCE_FORMAT,
-                                                    GPU_TEXTURE_USAGE_SHADER_READ,
+                                                    GPU_TEXTURE_USAGE_SHADER_READ |
+                                                        GPU_TEXTURE_USAGE_ATTACHMENT,
                                                    DRW_TEX_FILTER,
                                                    NULL);

--- a/source/blender/draw/engines/eevee/eevee_materials.c
+++ b/source/blender/draw/engines/eevee/eevee_materials.c
@@ -15,6 +15,7 @@
 #include "BLI_rand.h"
 #include "BLI_string_utils.h"

+#include "BKE_global.h"
 #include "BKE_paint.h"
 #include "BKE_particle.h"

@@ -883,6 +884,16 @@ void EEVEE_materials_cache_populate(EEVEE_Data *vedata,
            *cast_shadow = *cast_shadow || (matcache[i].shadow_grp != NULL);
          }
        }
+
+        if (G.debug_value == 889 && ob->sculpt && ob->sculpt->pbvh) {
+          int debug_node_nr = 0;
+          DRW_debug_modelmat(ob->object_to_world);
+          BKE_pbvh_draw_debug_cb(
+              ob->sculpt->pbvh,
+              (void (*)(void *d, const float min[3], const float max[3], PBVHNodeFlags f))
+                  DRW_sculpt_debug_cb,
+              &debug_node_nr);
+        }
      }

      /* Motion Blur Vectors. */
--- a/source/blender/draw/engines/workbench/shaders/infos/workbench_prepass_info.hh
+++ b/source/blender/draw/engines/workbench/shaders/infos/workbench_prepass_info.hh
@@ -126,18 +126,19 @@ GPU_SHADER_CREATE_INFO(workbench_next_prepass)

 GPU_SHADER_CREATE_INFO(workbench_color_material)
    .define("WORKBENCH_COLOR_MATERIAL")
-    .push_constant(Type::VEC4, "material_data");
+    .storage_buf(WB_MATERIAL_SLOT, Qualifier::READ, "vec4", "materials_data[]");

 GPU_SHADER_CREATE_INFO(workbench_color_texture)
    .define("WORKBENCH_COLOR_TEXTURE")
    .define("WORKBENCH_TEXTURE_IMAGE_ARRAY")
+    .define("WORKBENCH_COLOR_MATERIAL")
+    .storage_buf(WB_MATERIAL_SLOT, Qualifier::READ, "vec4", "materials_data[]")
    .sampler(1, ImageType::FLOAT_2D, "imageTexture", Frequency::BATCH)
    .sampler(2, ImageType::FLOAT_2D_ARRAY, "imageTileArray", Frequency::BATCH)
    .sampler(3, ImageType::FLOAT_1D_ARRAY, "imageTileData", Frequency::BATCH)
    .push_constant(Type::BOOL, "isImageTile")
    .push_constant(Type::BOOL, "imagePremult")
-    .push_constant(Type::FLOAT, "imageTransparencyCutoff")
-    .additional_info("workbench_color_material");
+    .push_constant(Type::FLOAT, "imageTransparencyCutoff");

 GPU_SHADER_CREATE_INFO(workbench_color_vertex).define("WORKBENCH_COLOR_VERTEX");

--- a/source/blender/draw/engines/workbench/shaders/workbench_material_lib.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_material_lib.glsl
@@ -17,7 +17,7 @@ void workbench_material_data_get(int handle,
 #else

 #  ifdef WORKBENCH_COLOR_MATERIAL
-  vec4 data = material_data;
+  vec4 data = materials_data[handle];
 #  else
  vec4 data = vec4(0.0);
 #  endif
--- a/source/blender/draw/engines/workbench/workbench_engine.cc
+++ b/source/blender/draw/engines/workbench/workbench_engine.cc
@@ -196,7 +196,12 @@ class Instance {
            if (batches[i] == nullptr) {
              continue;
            }
-            Material mat;
+            /* TODO(fclem): This create a cull-able instance for each sub-object. This is done
+             * for simplicity to reduce complexity. But this increase the overhead per object.
+             * Instead, we should use an indirection buffer to the material buffer. */
+            ResourceHandle _handle = i == 0 ? handle : manager.resource_handle(ob_ref);
+
+            Material &mat = resources.material_buf.get_or_resize(_handle.resource_index());

            if (::Material *_mat = BKE_object_material_get_eval(ob_ref.object, i + 1)) {
              mat = Material(*_mat);
@@ -214,7 +219,7 @@ class Instance {
              get_material_image(ob_ref.object, i + 1, image, iuser, sampler_state);
            }

-            draw_mesh(ob_ref, mat, batches[i], handle, image, sampler_state, iuser);
+            draw_mesh(ob_ref, mat, batches[i], _handle, image, sampler_state, iuser);
          }
        }
      }
@@ -236,7 +241,7 @@ class Instance {
        }

        if (batch) {
-          Material mat;
+          Material &mat = resources.material_buf.get_or_resize(handle.resource_index());

          if (object_state.color_type == V3D_SHADING_OBJECT_COLOR) {
            mat = Material(*ob_ref.object);
@@ -282,7 +287,7 @@ class Instance {
    const bool in_front = (ob_ref.object->dtx & OB_DRAW_IN_FRONT) != 0;

    auto draw = [&](MeshPass &pass) {
-      pass.draw(ob_ref, batch, handle, material, image, sampler_state, iuser);
+      pass.draw(ob_ref, batch, handle, image, sampler_state, iuser);
    };

    if (scene_state.xray_mode || material.is_transparent()) {
--- a/source/blender/draw/engines/workbench/workbench_mesh_passes.cc
+++ b/source/blender/draw/engines/workbench/workbench_mesh_passes.cc
@@ -34,7 +34,6 @@ void MeshPass::init_subpasses(ePipelineType pipeline,
                              bool clip,
                              ShaderCache &shaders)
 {
-  material_subpass_map_.clear();
  texture_subpass_map_.clear();

  static std::string pass_names[geometry_type_len][shader_type_len] = {};
@@ -58,7 +57,6 @@ void MeshPass::init_subpasses(ePipelineType pipeline,
 void MeshPass::draw(ObjectRef &ref,
                    GPUBatch *batch,
                    ResourceHandle handle,
-                    Material material,
                    ::Image *image /* = nullptr */,
                    eGPUSamplerState sampler_state /* = GPU_SAMPLER_DEFAULT */,
                    ImageUser *iuser /* = nullptr */)
@@ -66,7 +64,6 @@ void MeshPass::draw(ObjectRef &ref,
  is_empty_ = false;

  eGeometryType geometry_type = geometry_type_from_object(ref.object);
-
  if (image) {
    GPUTexture *texture = nullptr;
    GPUTexture *tilemap = nullptr;
@@ -78,47 +75,33 @@ void MeshPass::draw(ObjectRef &ref,
      texture = BKE_image_get_gpu_texture(image, iuser, nullptr);
    }
    if (texture) {
-      auto add_texture_cb = [&] {
-        PassMain::Sub &sub_pass =
-            passes_[static_cast<int>(geometry_type)][static_cast<int>(eShaderType::TEXTURE)]->sub(
-                image->id.name);
-
-        sub_pass.push_constant("material_data", *reinterpret_cast<float4 *>(&material));
+      auto add_cb = [&] {
+        PassMain::Sub *sub_pass =
+            passes_[static_cast<int>(geometry_type)][static_cast<int>(eShaderType::TEXTURE)];
+        sub_pass = &sub_pass->sub(image->id.name);
        if (tilemap) {
-          sub_pass.bind_texture(WB_TILE_ARRAY_SLOT, texture, sampler_state);
-          sub_pass.bind_texture(WB_TILE_DATA_SLOT, tilemap);
+          sub_pass->bind_texture(WB_TILE_ARRAY_SLOT, texture, sampler_state);
+          sub_pass->bind_texture(WB_TILE_DATA_SLOT, tilemap);
        }
        else {
-          sub_pass.bind_texture(WB_TEXTURE_SLOT, texture, sampler_state);
+          sub_pass->bind_texture(WB_TEXTURE_SLOT, texture, sampler_state);
        }
-        sub_pass.push_constant("isImageTile", tilemap != nullptr);
-        sub_pass.push_constant("imagePremult", image && image->alpha_mode == IMA_ALPHA_PREMUL);
+        sub_pass->push_constant("isImageTile", tilemap != nullptr);
+        sub_pass->push_constant("imagePremult", image && image->alpha_mode == IMA_ALPHA_PREMUL);
        /* TODO(Miguel Pozo): This setting should be exposed on the user side,
         * either as a global parameter (and set it here)
         * or by reading the Material Clipping Threshold (and set it per material) */
-        sub_pass.push_constant("imageTransparencyCutoff", 0.1f);
-        return &sub_pass;
+        sub_pass->push_constant("imageTransparencyCutoff", 0.1f);
+        return sub_pass;
      };

-      texture_subpass_map_
-          .lookup_or_add_cb(TextureSubPassKey(texture, geometry_type), add_texture_cb)
+      texture_subpass_map_.lookup_or_add_cb(TextureSubPassKey(texture, geometry_type), add_cb)
          ->draw(batch, handle);
      return;
    }
  }
-
-  auto add_material_cb = [&] {
-    PassMain::Sub &sub_pass =
-        passes_[static_cast<int>(geometry_type)][static_cast<int>(eShaderType::MATERIAL)]->sub(
-            "Material");
-
-    sub_pass.push_constant("material_data", *reinterpret_cast<float4 *>(&material));
-    return &sub_pass;
-  };
-
-  material_subpass_map_
-      .lookup_or_add_cb(MaterialSubPassKey(material, geometry_type), add_material_cb)
-      ->draw(batch, handle);
+  passes_[static_cast<int>(geometry_type)][static_cast<int>(eShaderType::MATERIAL)]->draw(batch,
+                                                                                          handle);
 }

 /** \} */
--- a/source/blender/draw/engines/workbench/workbench_private.hh
+++ b/source/blender/draw/engines/workbench/workbench_private.hh
@@ -51,16 +51,6 @@ struct Material {
  static uint32_t pack_data(float metallic, float roughness, float alpha);

  bool is_transparent();
-
-  inline bool operator==(const Material &a) const
-  {
-    return packed_data == a.packed_data && base_color == a.base_color;
-  }
-
-  inline uint64_t hash() const
-  {
-    return get_default_hash_4(base_color.x, base_color.y, base_color.z, packed_data);
-  }
 };

 void get_material_image(Object *ob,
@@ -171,10 +161,8 @@ struct SceneResources {

 class MeshPass : public PassMain {
 private:
-  using MaterialSubPassKey = std::pair<Material, eGeometryType>;
  using TextureSubPassKey = std::pair<GPUTexture *, eGeometryType>;

-  Map<MaterialSubPassKey, PassMain::Sub *> material_subpass_map_ = {};
  Map<TextureSubPassKey, PassMain::Sub *> texture_subpass_map_ = {};

  PassMain::Sub *passes_[geometry_type_len][shader_type_len] = {{nullptr}};
@@ -196,7 +184,6 @@ class MeshPass : public PassMain {
  void draw(ObjectRef &ref,
            GPUBatch *batch,
            ResourceHandle handle,
-            Material material,
            ::Image *image = nullptr,
            eGPUSamplerState sampler_state = eGPUSamplerState::GPU_SAMPLER_DEFAULT,
            ImageUser *iuser = nullptr);
--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -17,6 +17,7 @@
 #include "BKE_context.h"
 #include "BKE_layer.h"
 #include "BKE_material.h"
+#include "BKE_pbvh.h"
 #include "BKE_scene.h"

 #include "BLT_translation.h"
@@ -1007,6 +1008,9 @@ void DRW_mesh_batch_cache_get_attributes(struct Object *object,
                                         struct DRW_Attributes **r_attrs,
                                         struct DRW_MeshCDMask **r_cd_needed);

+void DRW_sculpt_debug_cb(
+    PBVHNode *node, void *user_data, const float bmin[3], const float bmax[3], PBVHNodeFlags flag);
+
 #ifdef __cplusplus
 }
 #endif
--- a/source/blender/draw/intern/draw_manager_data.cc
+++ b/source/blender/draw/intern/draw_manager_data.cc
@@ -1255,7 +1255,7 @@ static void sculpt_draw_cb(DRWSculptCallbackData *scd,
  }
 }

-static void sculpt_debug_cb(
+void DRW_sculpt_debug_cb(
    PBVHNode *node, void *user_data, const float bmin[3], const float bmax[3], PBVHNodeFlags flag)
 {
  int *debug_node_nr = (int *)user_data;
@@ -1270,7 +1270,8 @@ static void sculpt_debug_cb(
    DRW_debug_bbox(&bb, (float[4]){0.5f, 0.5f, 0.5f, 0.6f});
  }
 #else /* Color coded leaf bounds. */
-  if (flag & PBVH_Leaf) {
+  if (flag & (PBVH_Leaf | PBVH_TexLeaf)) {
+    DRW_debug_bbox(&bb, SCULPT_DEBUG_COLOR((*debug_node_nr)++));
    int color = (*debug_node_nr)++;
    color += BKE_pbvh_debug_draw_gen_get(node);

@@ -1370,7 +1371,7 @@ static void drw_sculpt_generate_calls(DRWSculptCallbackData *scd)
    BKE_pbvh_draw_debug_cb(
        pbvh,
        (void (*)(PBVHNode * n, void *d, const float min[3], const float max[3], PBVHNodeFlags f))
-            sculpt_debug_cb,
+            DRW_sculpt_debug_cb,
        &debug_node_nr);
  }
 }
--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@@ -333,7 +333,7 @@ struct DRWDebugVert {
 BLI_STATIC_ASSERT_ALIGN(DRWDebugVert, 16)

 /* Take the header (DrawCommand) into account. */
-#define DRW_DEBUG_DRAW_VERT_MAX (64 * 1024) - 1
+#define DRW_DEBUG_DRAW_VERT_MAX (64 * 8192) - 1

 /* The debug draw buffer is laid-out as the following struct.
 * But we use plain array in shader code instead because of driver issues. */
--- a/source/blender/editors/io/io_usd.c
+++ b/source/blender/editors/io/io_usd.c
@@ -477,15 +477,15 @@ static void wm_usd_import_draw(bContext *UNUSED(C), wmOperator *op)
  uiItemR(col, ptr, "relative_path", 0, NULL, ICON_NONE);
  uiItemR(col, ptr, "create_collection", 0, NULL, ICON_NONE);
  uiItemR(box, ptr, "light_intensity_scale", 0, NULL, ICON_NONE);
-  uiItemR(box, ptr, "mtl_name_collision_mode", 0, NULL, ICON_NONE);

  box = uiLayoutBox(layout);
-  col = uiLayoutColumnWithHeading(box, true, IFACE_("Experimental"));
+  col = uiLayoutColumnWithHeading(box, true, IFACE_("Materials"));
  uiItemR(col, ptr, "import_usd_preview", 0, NULL, ICON_NONE);
  uiLayoutSetEnabled(col, RNA_boolean_get(ptr, "import_materials"));
  uiLayout *row = uiLayoutRow(col, true);
  uiItemR(row, ptr, "set_material_blend", 0, NULL, ICON_NONE);
  uiLayoutSetEnabled(row, RNA_boolean_get(ptr, "import_usd_preview"));
+  uiItemR(col, ptr, "mtl_name_collision_mode", 0, NULL, ICON_NONE);
 }

 void WM_OT_usd_import(struct wmOperatorType *ot)
@@ -581,7 +581,7 @@ void WM_OT_usd_import(struct wmOperatorType *ot)

  RNA_def_boolean(ot->srna,
                  "import_usd_preview",
-                  false,
+                  true,
                  "Import USD Preview",
                  "Convert UsdPreviewSurface shaders to Principled BSDF shader networks");

--- a/source/blender/editors/screen/glutil.c
+++ b/source/blender/editors/screen/glutil.c
@@ -26,6 +26,7 @@
 #include "GPU_texture.h"

 #ifdef __APPLE__
+#  include "GPU_context.h"
 #  include "GPU_state.h"
 #endif

@@ -281,7 +282,9 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
       * This doesn't seem to be too slow,
       * but still would be nice to have fast and nice solution. */
 #ifdef __APPLE__
-      GPU_flush();
+      if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
+        GPU_flush();
+      }
 #endif
    }
  }
--- a/source/blender/editors/sculpt_paint/sculpt.cc
+++ b/source/blender/editors/sculpt_paint/sculpt.cc
@@ -2775,15 +2775,21 @@ static PBVHNode **sculpt_pbvh_gather_cursor_update(Object *ob,
  return nodes;
 }

-static PBVHNode **sculpt_pbvh_gather_generic(Object *ob,
-                                             Sculpt *sd,
-                                             const Brush *brush,
-                                             bool use_original,
-                                             float radius_scale,
-                                             int *r_totnode)
+static PBVHNode **sculpt_pbvh_gather_generic_intern(Object *ob,
+                                                    Sculpt *sd,
+                                                    const Brush *brush,
+                                                    bool use_original,
+                                                    float radius_scale,
+                                                    int *r_totnode,
+                                                    PBVHNodeFlags flag)
 {
  SculptSession *ss = ob->sculpt;
  PBVHNode **nodes = nullptr;
+  PBVHNodeFlags leaf_flag = PBVH_Leaf;
+
+  if (flag & PBVH_TexLeaf) {
+    leaf_flag = PBVH_TexLeaf;
+  }

  /* Build a list of all nodes that are potentially within the cursor or brush's area of influence.
   */
@@ -2795,7 +2801,7 @@ static PBVHNode **sculpt_pbvh_gather_generic(Object *ob,
    data.original = use_original;
    data.ignore_fully_ineffective = brush->sculpt_tool != SCULPT_TOOL_MASK;
    data.center = nullptr;
-    BKE_pbvh_search_gather(ss->pbvh, SCULPT_search_sphere_cb, &data, &nodes, r_totnode);
+    BKE_pbvh_search_gather_ex(ss->pbvh, SCULPT_search_sphere_cb, &data, &nodes, r_totnode, leaf_flag);
  }
  else {
    DistRayAABB_Precalc dist_ray_to_aabb_precalc;
@@ -2809,11 +2815,33 @@ static PBVHNode **sculpt_pbvh_gather_generic(Object *ob,
    data.original = use_original;
    data.dist_ray_to_aabb_precalc = &dist_ray_to_aabb_precalc;
    data.ignore_fully_ineffective = brush->sculpt_tool != SCULPT_TOOL_MASK;
-    BKE_pbvh_search_gather(ss->pbvh, SCULPT_search_circle_cb, &data, &nodes, r_totnode);
+    BKE_pbvh_search_gather_ex(ss->pbvh, SCULPT_search_circle_cb, &data, &nodes, r_totnode, leaf_flag);
  }
  return nodes;
 }

+static PBVHNode **sculpt_pbvh_gather_generic(Object *ob,
+                                             Sculpt *sd,
+                                             const Brush *brush,
+                                             bool use_original,
+                                             float radius_scale,
+                                             int *r_totnode)
+{
+  return sculpt_pbvh_gather_generic_intern(
+      ob, sd, brush, use_original, radius_scale, r_totnode, PBVH_Leaf);
+}
+
+static PBVHNode **sculpt_pbvh_gather_texpaint(Object *ob,
+                                              Sculpt *sd,
+                                              const Brush *brush,
+                                              bool use_original,
+                                              float radius_scale,
+                                              int *r_totnode)
+{
+  return sculpt_pbvh_gather_generic_intern(
+      ob, sd, brush, use_original, radius_scale, r_totnode, PBVH_TexLeaf);
+}
+
 /* Calculate primary direction of movement for many brushes. */
 static void calc_sculpt_normal(
    Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode, float r_area_no[3])
@@ -3440,8 +3468,8 @@ static void do_brush_action(Sculpt *sd,
                            PaintModeSettings *paint_mode_settings)
 {
  SculptSession *ss = ob->sculpt;
-  int totnode;
-  PBVHNode **nodes;
+  int totnode, texnodes_num = 0;
+  PBVHNode **nodes, **texnodes = NULL;

  /* Check for unsupported features. */
  PBVHType type = BKE_pbvh_type(ss->pbvh);
@@ -3454,6 +3482,20 @@ static void do_brush_action(Sculpt *sd,
    BKE_pbvh_ensure_node_loops(ss->pbvh);
  }

+  const bool use_original = sculpt_tool_needs_original(brush->sculpt_tool) ? true :
+                                                                             ss->cache->original;
+  const bool use_pixels = sculpt_needs_pbvh_pixels(paint_mode_settings, brush, ob);
+
+  if (sculpt_needs_pbvh_pixels(paint_mode_settings, brush, ob)) {
+    sculpt_pbvh_update_pixels(paint_mode_settings, ss, ob);
+
+    texnodes = sculpt_pbvh_gather_texpaint(ob, sd, brush, use_original, 1.0f, &texnodes_num);
+
+    if (!texnodes_num) {
+      return;
+    }
+  }
+
  /* Build a list of all nodes that are potentially within the brush's area of influence */

  if (SCULPT_tool_needs_all_pbvh_nodes(brush)) {
@@ -3464,8 +3506,6 @@ static void do_brush_action(Sculpt *sd,
    nodes = SCULPT_cloth_brush_affected_nodes_gather(ss, brush, &totnode);
  }
  else {
-    const bool use_original = sculpt_tool_needs_original(brush->sculpt_tool) ? true :
-                                                                               ss->cache->original;
    float radius_scale = 1.0f;

    /* Corners of square brushes can go outside the brush radius. */
@@ -3480,10 +3520,6 @@ static void do_brush_action(Sculpt *sd,
    }
    nodes = sculpt_pbvh_gather_generic(ob, sd, brush, use_original, radius_scale, &totnode);
  }
-  const bool use_pixels = sculpt_needs_pbvh_pixels(paint_mode_settings, brush, ob);
-  if (use_pixels) {
-    sculpt_pbvh_update_pixels(paint_mode_settings, ss, ob);
-  }

  /* Draw Face Sets in draw mode makes a single undo push, in alt-smooth mode deforms the
   * vertices and uses regular coords undo. */
@@ -3524,6 +3560,7 @@ static void do_brush_action(Sculpt *sd,

  /* Only act if some verts are inside the brush area. */
  if (totnode == 0) {
+    MEM_SAFE_FREE(texnodes);
    return;
  }
  float location[3];
@@ -3671,7 +3708,7 @@ static void do_brush_action(Sculpt *sd,
      SCULPT_do_displacement_smear_brush(sd, ob, nodes, totnode);
      break;
    case SCULPT_TOOL_PAINT:
-      SCULPT_do_paint_brush(paint_mode_settings, sd, ob, nodes, totnode);
+      SCULPT_do_paint_brush(paint_mode_settings, sd, ob, nodes, totnode, texnodes, texnodes_num);
      break;
    case SCULPT_TOOL_SMEAR:
      SCULPT_do_smear_brush(sd, ob, nodes, totnode);
@@ -3715,6 +3752,7 @@ static void do_brush_action(Sculpt *sd,
  }

  MEM_SAFE_FREE(nodes);
+  MEM_SAFE_FREE(texnodes);

  /* Update average stroke position. */
  copy_v3_v3(location, ss->cache->true_location);
--- a/source/blender/editors/sculpt_paint/sculpt_intern.h
+++ b/source/blender/editors/sculpt_paint/sculpt_intern.h
@@ -1789,7 +1789,9 @@ void SCULPT_do_paint_brush(struct PaintModeSettings *paint_mode_settings,
                           Sculpt *sd,
                           Object *ob,
                           PBVHNode **nodes,
-                           int totnode) ATTR_NONNULL();
+                           int totnode,
+                           PBVHNode **texnodes,
+                           int texnodes_num) ATTR_NONNULL();

 /**
 * \brief Get the image canvas for painting on the given object.
@@ -1806,7 +1808,9 @@ void SCULPT_do_paint_brush_image(struct PaintModeSettings *paint_mode_settings,
                                 Sculpt *sd,
                                 Object *ob,
                                 PBVHNode **nodes,
-                                 int totnode) ATTR_NONNULL();
+                                 int totnode,
+                                 PBVHNode **texnodes,
+                                 int texnode_num) ATTR_NONNULL();
 bool SCULPT_use_image_paint_brush(struct PaintModeSettings *settings, Object *ob) ATTR_NONNULL();

 /* Smear Brush. */
--- a/source/blender/editors/sculpt_paint/sculpt_paint_color.c
+++ b/source/blender/editors/sculpt_paint/sculpt_paint_color.c
@@ -249,11 +249,17 @@ static void sample_wet_paint_reduce(const void *__restrict UNUSED(userdata),
  add_v4_v4(join->color, swptd->color);
 }

-void SCULPT_do_paint_brush(
-    PaintModeSettings *paint_mode_settings, Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode)
+void SCULPT_do_paint_brush(PaintModeSettings *paint_mode_settings,
+                           Sculpt *sd,
+                           Object *ob,
+                           PBVHNode **nodes,
+                           int totnode,
+                           PBVHNode **texnodes,
+                           int texnodes_num)
 {
  if (SCULPT_use_image_paint_brush(paint_mode_settings, ob)) {
-    SCULPT_do_paint_brush_image(paint_mode_settings, sd, ob, nodes, totnode);
+    SCULPT_do_paint_brush_image(
+        paint_mode_settings, sd, ob, nodes, totnode, texnodes, texnodes_num);
    return;
  }

--- a/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
+++ b/source/blender/editors/sculpt_paint/sculpt_paint_image.cc
@@ -1,6 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later
 * Copyright 2022 Blender Foundation. All rights reserved. */

+/* Paint a color made from hash of node pointer. */
+//#define DEBUG_PIXEL_NODES
+
 #include "DNA_image_types.h"
 #include "DNA_object_types.h"

@@ -9,6 +12,9 @@
 #include "BLI_math.h"
 #include "BLI_math_color_blend.h"
 #include "BLI_task.h"
+#ifdef DEBUG_PIXEL_NODES
+#  include "BLI_hash.h"
+#endif

 #include "IMB_colormanagement.h"
 #include "IMB_imbuf.h"
@@ -187,6 +193,15 @@ template<typename ImageBuffer> class PaintingKernel {
          automask_data);
      float4 paint_color = brush_color * falloff_strength * brush_strength;
      float4 buffer_color;
+
+#ifdef DEBUG_PIXEL_NODES
+      if ((pixel_row.start_image_coordinate.y >> 3) & 1) {
+        paint_color[0] *= 0.5f;
+        paint_color[1] *= 0.5f;
+        paint_color[2] *= 0.5f;
+      }
+#endif
+
      blend_color_mix_float(buffer_color, color, paint_color);
      buffer_color *= brush->alpha;
      IMB_blend_color_float(color, color, buffer_color, static_cast<IMB_BlendMode>(brush->blend));
@@ -199,20 +214,18 @@ template<typename ImageBuffer> class PaintingKernel {
    return pixels_painted;
  }

-  void init_brush_color(ImBuf *image_buffer)
+  void init_brush_color(ImBuf *image_buffer, float in_brush_color[3])
  {
    const char *to_colorspace = image_accessor.get_colorspace_name(image_buffer);
    if (last_used_color_space == to_colorspace) {
      return;
    }
-    copy_v3_v3(brush_color,
-               ss->cache->invert ? BKE_brush_secondary_color_get(ss->scene, brush) :
-                                   BKE_brush_color_get(ss->scene, brush));
+
    /* NOTE: Brush colors are stored in sRGB. We use math color to follow other areas that
     * use brush colors. From there on we use IMB_colormanagement to convert the brush color to the
     * colorspace of the texture. This isn't ideal, but would need more refactoring to make sure
     * that brush colors are stored in scene linear by default. */
-    srgb_to_linearrgb_v3_v3(brush_color, brush_color);
+    srgb_to_linearrgb_v3_v3(brush_color, in_brush_color);
    brush_color[3] = 1.0f;

    const char *from_colorspace = IMB_colormanagement_role_colorspace_name_get(
@@ -336,6 +349,22 @@ static void do_paint_pixels(void *__restrict userdata,
  PaintingKernel<ImageBufferFloat4> kernel_float4(ss, brush, thread_id, positions);
  PaintingKernel<ImageBufferByte4> kernel_byte4(ss, brush, thread_id, positions);

+  float brush_color[4];
+
+#ifdef DEBUG_PIXEL_NODES
+  uint hash = BLI_hash_int(POINTER_AS_UINT(node));
+
+  brush_color[0] = (float)(hash & 255) / 255.0f;
+  brush_color[1] = (float)((hash >> 8) & 255) / 255.0f;
+  brush_color[2] = (float)((hash >> 16) & 255) / 255.0f;
+#else
+  copy_v3_v3(brush_color,
+             ss->cache->invert ? BKE_brush_secondary_color_get(ss->scene, brush) :
+                                 BKE_brush_color_get(ss->scene, brush));
+#endif
+
+  brush_color[3] = 1.0f;
+
  AutomaskingNodeData automask_data;
  SCULPT_automasking_node_begin(ob, ss, ss->cache->automasking, &automask_data, data->nodes[n]);

@@ -353,10 +382,10 @@ static void do_paint_pixels(void *__restrict userdata,
        }

        if (image_buffer->rect_float != nullptr) {
-          kernel_float4.init_brush_color(image_buffer);
+          kernel_float4.init_brush_color(image_buffer, brush_color);
        }
        else {
-          kernel_byte4.init_brush_color(image_buffer);
+          kernel_byte4.init_brush_color(image_buffer, brush_color);
        }

        for (const PackedPixelRow &pixel_row : tile_data.pixel_rows) {
@@ -520,27 +549,33 @@ bool SCULPT_use_image_paint_brush(PaintModeSettings *settings, Object *ob)
  return BKE_paint_canvas_image_get(settings, ob, &image, &image_user);
 }

-void SCULPT_do_paint_brush_image(
-    PaintModeSettings *paint_mode_settings, Sculpt *sd, Object *ob, PBVHNode **nodes, int totnode)
+void SCULPT_do_paint_brush_image(PaintModeSettings *paint_mode_settings,
+                                 Sculpt *sd,
+                                 Object *ob,
+                                 PBVHNode **nodes,
+                                 int totnode,
+                                 PBVHNode **texnodes,
+                                 int texnodes_num)
 {
  Brush *brush = BKE_paint_brush(&sd->paint);

  TexturePaintingUserData data = {nullptr};
  data.ob = ob;
  data.brush = brush;
-  data.nodes = nodes;
+  data.nodes = texnodes;

  if (!ImageData::init_active_image(ob, &data.image_data, paint_mode_settings)) {
    return;
  }

  TaskParallelSettings settings;
-  BKE_pbvh_parallel_range_settings(&settings, true, totnode);
-  BLI_task_parallel_range(0, totnode, &data, do_push_undo_tile, &settings);
-  BLI_task_parallel_range(0, totnode, &data, do_paint_pixels, &settings);
+  BKE_pbvh_parallel_range_settings(&settings, true, texnodes_num);
+  BLI_task_parallel_range(0, texnodes_num, &data, do_push_undo_tile, &settings);
+  BLI_task_parallel_range(0, texnodes_num, &data, do_paint_pixels, &settings);

  TaskParallelSettings settings_flush;
-  BKE_pbvh_parallel_range_settings(&settings_flush, false, totnode);
-  BLI_task_parallel_range(0, totnode, &data, do_mark_dirty_regions, &settings_flush);
+
+  BKE_pbvh_parallel_range_settings(&settings_flush, false, texnodes_num);
+  BLI_task_parallel_range(0, texnodes_num, &data, do_mark_dirty_regions, &settings_flush);
 }
 }
--- a/source/blender/editors/sculpt_paint/sculpt_undo.cc
+++ b/source/blender/editors/sculpt_paint/sculpt_undo.cc
@@ -2090,7 +2090,7 @@ static UndoSculpt *sculpt_undo_get_nodes(void)
 {
  UndoStack *ustack = ED_undo_stack_get();
  UndoStep *us = BKE_undosys_stack_init_or_active_with_type(ustack, BKE_UNDOSYS_TYPE_SCULPT);
-  return sculpt_undosys_step_get_nodes(us);
+  return us ? sculpt_undosys_step_get_nodes(us) : NULL;
 }

 /** \} */
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -8,6 +8,8 @@
 #include "mtl_debug.hh"
 #include "mtl_framebuffer.hh"

+#include "intern/GHOST_ContextCGL.h"
+
 #include <fstream>

 using namespace blender;
@@ -45,9 +47,15 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
  if (active_command_buffer_ == nil) {

    /* Verify number of active command buffers is below limit.
-     * Exceeding this limit will mean we either have a leak/GPU hang
-     * or we should increase the command buffer limit during MTLQueue creation */
-    BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs < MTL_MAX_COMMAND_BUFFERS);
+     * Exceeding this limit will mean we either have a command buffer leak/GPU hang
+     * or we should increase the command buffer limit during MTLQueue creation.
+     * Excessive command buffers can also be caused by frequent GPUContext switches, which cause
+     * the GPU pipeline to flush. This is common during indirect light baking operations.
+     *
+     * NOTE: We currently stall until completion of GPU work upon ::submit if we have reached the
+     * in-flight command buffer limit. */
+    BLI_assert(MTLCommandBufferManager::num_active_cmd_bufs <
+               GHOST_ContextCGL::max_command_buffer_count);

    if (G.debug & G_DEBUG_GPU) {
      /* Debug: Enable Advanced Errors for GPU work execution. */
@@ -137,6 +145,19 @@ bool MTLCommandBufferManager::submit(bool wait)
  /* Submit command buffer to GPU. */
  [active_command_buffer_ commit];

+  /* If we have too many active command buffers in flight, wait until completed to avoid running
+   * out. We can increase */
+  if (MTLCommandBufferManager::num_active_cmd_bufs >=
+      (GHOST_ContextCGL::max_command_buffer_count - 1)) {
+    wait = true;
+    MTL_LOG_WARNING(
+        "Maximum number of command buffers in flight. Host will wait until GPU work has "
+        "completed. Consider increasing GHOST_ContextCGL::max_command_buffer_count or reducing "
+        "work fragmentation to better utilise system hardware. Command buffers are flushed upon "
+        "GPUContext switches, this is the most common cause of excessive command buffer "
+        "generation.\n");
+  }
+
  if (wait || (G.debug & G_DEBUG_GPU)) {
    /* Wait until current GPU work has finished executing. */
    [active_command_buffer_ waitUntilCompleted];
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -9,7 +9,6 @@
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
-#define MTL_MAX_COMMAND_BUFFERS 64

 /* Number of frames for which we retain in-flight resources such as scratch buffers.
 * Set as number of GPU frames in flight, plus an additional value for extra possible CPU frame. */
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -339,10 +339,10 @@ class MTLSafeFreeList {
 class MTLBufferPool {

 private:
-  /* Memory statistics. */
-  int64_t total_allocation_bytes_ = 0;
-
 #if MTL_DEBUG_MEMORY_STATISTICS == 1
+  /* Memory statistics. */
+  std::atomic<int64_t> total_allocation_bytes_;
+
  /* Debug statistics. */
  std::atomic<int> per_frame_allocation_count_;
  std::atomic<int64_t> allocations_in_pool_;
@@ -368,10 +368,14 @@ class MTLBufferPool {
   * - A size-ordered list (MultiSet) of allocated buffers is kept per MTLResourceOptions
   *   permutation. This allows efficient lookup for buffers of a given requested size.
   * - MTLBufferHandle wraps a gpu::MTLBuffer pointer to achieve easy size-based sorting
-   *   via CompareMTLBuffer. */
+   *   via CompareMTLBuffer.
+   *
+   * NOTE: buffer_pool_lock_ guards against concurrent access to the memory allocator. This
+   * can occur during light baking or rendering operations. */
  using MTLBufferPoolOrderedList = std::multiset<MTLBufferHandle, CompareMTLBuffer>;
  using MTLBufferResourceOptions = uint64_t;

+  std::mutex buffer_pool_lock_;
  blender::Map<MTLBufferResourceOptions, MTLBufferPoolOrderedList *> buffer_pools_;
  blender::Vector<gpu::MTLBuffer *> allocations_;

--- a/source/blender/gpu/metal/mtl_memory.mm
+++ b/source/blender/gpu/metal/mtl_memory.mm
@@ -25,6 +25,7 @@ void MTLBufferPool::init(id<MTLDevice> mtl_device)

 #if MTL_DEBUG_MEMORY_STATISTICS == 1
    /* Debug statistics. */
+    total_allocation_bytes_ = 0;
    per_frame_allocation_count_ = 0;
    allocations_in_pool_ = 0;
    buffers_in_pool_ = 0;
@@ -43,7 +44,7 @@ MTLBufferPool::~MTLBufferPool()

 void MTLBufferPool::free()
 {
-
+  buffer_pool_lock_.lock();
  for (auto buffer : allocations_) {
    BLI_assert(buffer);
    delete buffer;
@@ -55,6 +56,7 @@ void MTLBufferPool::free()
    delete buffer_pool;
  }
  buffer_pools_.clear();
+  buffer_pool_lock_.unlock();
 }

 gpu::MTLBuffer *MTLBufferPool::allocate(uint64_t size, bool cpu_visible)
@@ -96,6 +98,8 @@ gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size,

  /* Check if we have a suitable buffer */
  gpu::MTLBuffer *new_buffer = nullptr;
+  buffer_pool_lock_.lock();
+
  std::multiset<MTLBufferHandle, CompareMTLBuffer> **pool_search = buffer_pools_.lookup_ptr(
      (uint64_t)options);

@@ -142,7 +146,9 @@ gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size,

    /* Track allocation in context. */
    allocations_.append(new_buffer);
+#if MTL_DEBUG_MEMORY_STATISTICS == 1
    total_allocation_bytes_ += aligned_alloc_size;
+#endif
  }
  else {
    /* Re-use suitable buffer. */
@@ -165,6 +171,9 @@ gpu::MTLBuffer *MTLBufferPool::allocate_aligned(uint64_t size,
  per_frame_allocation_count_++;
 #endif

+  /* Release lock. */
+  buffer_pool_lock_.unlock();
+
  return new_buffer;
 }

@@ -209,8 +218,11 @@ void MTLBufferPool::update_memory_pools()
 {
  /* Ensure thread-safe access to `completed_safelist_queue_`, which contains
   * the list of MTLSafeFreeList's whose buffers are ready to be
-   * re-inserted into the Memory Manager pools. */
+   * re-inserted into the Memory Manager pools.
+   * we also need to lock access to general buffer pools, to ensure allocations
+   * are not simultaneously happening on background threads. */
  safelist_lock_.lock();
+  buffer_pool_lock_.lock();

 #if MTL_DEBUG_MEMORY_STATISTICS == 1
  int num_buffers_added = 0;
@@ -302,6 +314,7 @@ void MTLBufferPool::update_memory_pools()

  /* Clear safe pools list */
  completed_safelist_queue_.clear();
+  buffer_pool_lock_.unlock();
  safelist_lock_.unlock();
 }

--- a/source/blender/gpu/shaders/common/gpu_shader_common_color_utils.glsl
+++ b/source/blender/gpu/shaders/common/gpu_shader_common_color_utils.glsl
@@ -146,7 +146,7 @@ void ycca_to_rgba_itu_601(vec4 ycca, out vec4 color)
 {
  ycca.xyz *= 255.0;
  ycca.xyz -= vec3(16.0, 128.0, 128.0);
-  color.rgb = mat3(vec3(1.164), 0.0, -0.392, 2.017, 1.596, -0.813, 0.0) * ycca.xyz;
+  color.rgb = mat3(1.164, 1.164, 1.164, 0.0, -0.392, 2.017, 1.596, -0.813, 0.0) * ycca.xyz;
  color.rgb /= 255.0;
  color.a = ycca.a;
 }
@@ -155,7 +155,7 @@ void ycca_to_rgba_itu_709(vec4 ycca, out vec4 color)
 {
  ycca.xyz *= 255.0;
  ycca.xyz -= vec3(16.0, 128.0, 128.0);
-  color.rgb = mat3(vec3(1.164), 0.0, -0.213, 2.115, 1.793, -0.534, 0.0) * ycca.xyz;
+  color.rgb = mat3(1.164, 1.164, 1.164, 0.0, -0.213, 2.115, 1.793, -0.534, 0.0) * ycca.xyz;
  color.rgb /= 255.0;
  color.a = ycca.a;
 }
@@ -163,7 +163,7 @@ void ycca_to_rgba_itu_709(vec4 ycca, out vec4 color)
 void ycca_to_rgba_jpeg(vec4 ycca, out vec4 color)
 {
  ycca.xyz *= 255.0;
-  color.rgb = mat3(vec3(1.0), 0.0, -0.34414, 1.772, 1.402, -0.71414, 0.0) * ycca.xyz;
+  color.rgb = mat3(1.0, 1.0, 1.0, 0.0, -0.34414, 1.772, 1.402, -0.71414, 0.0) * ycca.xyz;
  color.rgb += vec3(-179.456, 135.45984, -226.816);
  color.rgb /= 255.0;
  color.a = ycca.a;
@@ -203,7 +203,7 @@ void rgba_to_ycca_jpeg(vec4 rgba, out vec4 ycca)

 void yuva_to_rgba_itu_709(vec4 yuva, out vec4 color)
 {
-  color.rgb = mat3(vec3(1.0), 0.0, -0.21482, 2.12798, 1.28033, -0.38059, 0.0) * yuva.xyz;
+  color.rgb = mat3(1.0, 1.0, 1.0, 0.0, -0.21482, 2.12798, 1.28033, -0.38059, 0.0) * yuva.xyz;
  color.a = yuva.a;
 }

--- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@@ -210,7 +210,7 @@ template<typename S, typename T, access A>
 inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
                                      T texel,
                                      uint lod,
-                                      T offset = 0)
+                                      T offset)
 {
  float w = tex.texture->get_width();
  if ((texel + offset) >= 0 && (texel + offset) < w) {
@@ -222,38 +222,6 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A
  }
 }

-template<typename S, typename T, access A>
-inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
-                                      vec<T, 1> texel,
-                                      uint lod,
-                                      vec<T, 1> offset = 0)
-{
-  float w = tex.texture->get_width();
-  if ((texel + offset) >= 0 && (texel + offset) < w) {
-    /* LODs not supported for 1d textures. This must be zero. */
-    return tex.texture->read(uint(texel + offset), 0);
-  }
-  else {
-    return vec<S, 4>(0);
-  }
-}
-
-template<typename S, typename T, int n, access A>
-inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
-                                      vec<T, n> texel,
-                                      uint lod,
-                                      vec<T, n> offset = vec<T, n>(0))
-{
-  float w = tex.texture->get_width();
-  if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w) {
-    /* LODs not supported for 1d textures. This must be zero. */
-    return tex.texture->read(uint(texel.x + offset.x), 0);
-  }
-  else {
-    return vec<S, 4>(0);
-  }
-}
-
 template<typename S, typename T, access A>
 inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
                                      vec<T, 2> texel,
@@ -1236,8 +1204,7 @@ mat3 MAT3x3(
 {
  return mat3(vec3(a1, a2, a3), vec3(b1, b2, b3), vec3(c1, c2, c3));
 }
-mat3 MAT3x3(
-    vec3 a, float b1, float b2, float b3, float c1, float c2, float c3)
+mat3 MAT3x3(vec3 a, float b1, float b2, float b3, float c1, float c2, float c3)
 {
  return mat3(a, vec3(b1, b2, b3), vec3(c1, c2, c3));
 }