2023-03-17 15:52:21 +01:00
9 changed files with 186 additions and 92 deletions
--- a/source/blender/editors/screen/glutil.c
+++ b/source/blender/editors/screen/glutil.c
@ -147,6 +147,18 @@ void immDrawPixelsTexTiled_scaling_clipping(IMMDrawPixelsTexState *state,
                                            const float color[4])
 {
  int subpart_x, subpart_y, tex_w = 256, tex_h = 256;
+#ifdef __APPLE__
+  if (GPU_backend_get_type() == GPU_BACKEND_METAL) {
+    /* NOTE(Metal): The Metal backend will keep all temporary texture memory within a command
+     * submission in-flight, so using a partial tile size does not provide any tangible memory
+     * reduction, but does incur additional API overhead and significant cache inefficiency on AMD
+     * platforms.
+     * The Metal API also provides smart resource paging such that the application can
+     * still efficiently swap memory, even if system is low in physical memory. */
+    tex_w = img_w;
+    tex_h = img_h;
+  }
+#endif
  int seamless, offset_x, offset_y, nsubparts_x, nsubparts_y;
  int components;
  const bool use_clipping = ((clip_min_x < clip_max_x) && (clip_min_y < clip_max_y));
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@ -99,6 +99,8 @@ struct GPUPass {
  /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
   * during pass code generation. */
  bool should_optimize;
+  /** Whether pass is in the GPUPass cache. */
+  bool cached;
 };

 /* -------------------------------------------------------------------- */
@ -132,6 +134,7 @@ static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
 static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
 {
  BLI_spin_lock(&pass_cache_spin);
+  pass->cached = true;
  if (node != nullptr) {
    /* Add after the first pass having the same hash. */
    pass->next = node->next;
@ -775,6 +778,7 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
    pass->create_info = codegen.create_info;
    pass->hash = codegen.hash_get();
    pass->compiled = false;
+    pass->cached = false;
    /* Only flag pass optimization hint if this is the first generated pass for a material.
     * Optimized passes cannot be optimized further, even if the heuristic is still not
     * favorable. */
@ -881,14 +885,6 @@ GPUShader *GPU_pass_shader_get(GPUPass *pass)
  return pass->shader;
 }

-void GPU_pass_release(GPUPass *pass)
-{
-  BLI_spin_lock(&pass_cache_spin);
-  BLI_assert(pass->refcount > 0);
-  pass->refcount--;
-  BLI_spin_unlock(&pass_cache_spin);
-}
-
 static void gpu_pass_free(GPUPass *pass)
 {
  BLI_assert(pass->refcount == 0);
@ -899,6 +895,18 @@ static void gpu_pass_free(GPUPass *pass)
  MEM_freeN(pass);
 }

+void GPU_pass_release(GPUPass *pass)
+{
+  BLI_spin_lock(&pass_cache_spin);
+  BLI_assert(pass->refcount > 0);
+  pass->refcount--;
+  /* Un-cached passes will not be filtered by garbage collection, so release here. */
+  if (pass->refcount == 0 && !pass->cached) {
+    gpu_pass_free(pass);
+  }
+  BLI_spin_unlock(&pass_cache_spin);
+}
+
 void GPU_pass_cache_garbage_collect(void)
 {
  static int lasttime = 0;
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@ -288,17 +288,17 @@ class MTLSafeFreeList {
  std::atomic<bool> in_free_queue_;
  std::atomic<bool> referenced_by_workload_;
  std::recursive_mutex lock_;
-
  /* Linked list of next MTLSafeFreeList chunk if current chunk is full. */
-  std::atomic<int> has_next_pool_;
  std::atomic<MTLSafeFreeList *> next_;

  /* Lockless list. MAX_NUM_BUFFERS_ within a chunk based on considerations
-   * for performance and memory.
+   * for performance and memory. Higher chunk counts are preferable for efficiently
+   * performing block operations such as copying several objects simultaneously.
+   *
   * MIN_BUFFER_FLUSH_COUNT refers to the minimum count of buffers in the MTLSafeFreeList
   * before buffers are returned to global memory pool. This is set at a point to reduce
   * overhead of small pool flushes, while ensuring floating memory overhead is not excessive. */
-  static const int MAX_NUM_BUFFERS_ = 1024;
+  static const int MAX_NUM_BUFFERS_ = 8192;
  static const int MIN_BUFFER_FLUSH_COUNT = 120;
  std::atomic<int> current_list_index_;
  gpu::MTLBuffer *safe_free_pool_[MAX_NUM_BUFFERS_];
@ -306,8 +306,8 @@ class MTLSafeFreeList {
 public:
  MTLSafeFreeList();

-  /* Add buffer to Safe Free List, can be called from secondary threads.
-   * Performs a lockless list insert. */
+  /* Can be used from multiple threads. Performs insertion into Safe Free List with the least
+   * amount of threading synchronization. */
  void insert_buffer(gpu::MTLBuffer *buffer);

  /* Whether we need to start a new safe free list, or can carry on using the existing one. */
@ -322,12 +322,13 @@ class MTLSafeFreeList {
  void flag_in_queue()
  {
    in_free_queue_ = true;
-    if (has_next_pool_) {
+    if (current_list_index_ >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {
      MTLSafeFreeList *next_pool = next_.load();
-      BLI_assert(next_pool != nullptr);
+      if (next_pool) {
        next_pool->flag_in_queue();
      }
    }
+  }
 };

 /* MTLBuffer pools. */
--- a/source/blender/gpu/metal/mtl_memory.mm
+++ b/source/blender/gpu/metal/mtl_memory.mm
@ -257,10 +257,7 @@ void MTLBufferPool::update_memory_pools()
      }

      /* Fetch next MTLSafeFreeList chunk, if any. */
-      MTLSafeFreeList *next_list = nullptr;
-      if (current_pool->has_next_pool_ > 0) {
-        next_list = current_pool->next_.load();
-      }
+      MTLSafeFreeList *next_list = current_pool->next_.load();

      /* Delete current MTLSafeFreeList */
      current_pool->lock_.unlock();
@ -396,7 +393,6 @@ MTLSafeFreeList::MTLSafeFreeList()
  in_free_queue_ = false;
  current_list_index_ = 0;
  next_ = nullptr;
-  has_next_pool_ = 0;
 }

 void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
@ -410,12 +406,19 @@ void MTLSafeFreeList::insert_buffer(gpu::MTLBuffer *buffer)
   * insert the buffer into the next available chunk. */
  if (insert_index >= MTLSafeFreeList::MAX_NUM_BUFFERS_) {

-    /* Check if first caller to generate next pool. */
-    int has_next = has_next_pool_++;
-    if (has_next == 0) {
-      next_ = new MTLSafeFreeList();
-    }
+    /* Check if first caller to generate next pool in chain.
+     * Otherwise, ensure pool exists or wait for first caller to create next pool. */
    MTLSafeFreeList *next_list = next_.load();
+
+    if (!next_list) {
+      std::unique_lock lock(lock_);
+
+      next_list = next_.load();
+      if (!next_list) {
+        next_list = new MTLSafeFreeList();
+        next_.store(next_list);
+      }
+    }
    BLI_assert(next_list);
    next_list->insert_buffer(buffer);

--- a/source/blender/gpu/metal/mtl_shader_generator.hh
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@ -490,8 +490,12 @@ class MSLGeneratorInterface {
  std::string generate_msl_uniform_undefs(ShaderStage stage);
  std::string generate_ubo_block_undef_chain(ShaderStage stage);
  std::string generate_msl_texture_vars(ShaderStage shader_stage);
-  void generate_msl_textures_input_string(std::stringstream &out, ShaderStage stage);
-  void generate_msl_uniforms_input_string(std::stringstream &out, ShaderStage stage);
+  void generate_msl_textures_input_string(std::stringstream &out,
+                                          ShaderStage stage,
+                                          bool &is_first_parameter);
+  void generate_msl_uniforms_input_string(std::stringstream &out,
+                                          ShaderStage stage,
+                                          bool &is_first_parameter);

  /* Location is not always specified, so this will resolve outstanding locations. */
  void resolve_input_attribute_locations();
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@ -2145,8 +2145,20 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
  return out.str();
 }

+/* If first parameter in function signature, do not print out a comma.
+ * Update first parameter flag to false for future invocations. */
+static char parameter_delimiter(bool &is_first_parameter)
+{
+  if (is_first_parameter) {
+    is_first_parameter = false;
+    return ' ';
+  }
+  return ',';
+}
+
 void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream &out,
-                                                               ShaderStage stage)
+                                                               ShaderStage stage,
+                                                               bool &is_first_parameter)
 {
  /* Note: Shader stage must be specified as the singular stage index for which the input
   * is generating. Compound stages are not valid inputs. */
@ -2156,7 +2168,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
  BLI_assert(this->texture_samplers.size() <= GPU_max_textures_vert());
  for (const MSLTextureSampler &tex : this->texture_samplers) {
    if (bool(tex.stage & stage)) {
-      out << ",\n\t" << tex.get_msl_typestring(false) << " [[texture(" << tex.location << ")]]";
+      out << parameter_delimiter(is_first_parameter) << "\n\t" << tex.get_msl_typestring(false)
+          << " [[texture(" << tex.location << ")]]";
    }
  }

@ -2166,7 +2179,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
   * If we exceed the hardware-supported limit, then follow a bind-less model using argument
   * buffers. */
  if (this->use_argument_buffer_for_samplers()) {
-    out << ",\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconstant SStruct& samplers [[buffer(MTL_uniform_buffer_base_index+"
        << (this->get_sampler_argument_buffer_bind_index(stage)) << ")]]";
  }
  else {
@ -2175,7 +2189,8 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
    BLI_assert(this->texture_samplers.size() <= MTL_MAX_DEFAULT_SAMPLERS);
    for (const MSLTextureSampler &tex : this->texture_samplers) {
      if (bool(tex.stage & stage)) {
-        out << ",\n\tsampler " << tex.name << "_sampler [[sampler(" << tex.location << ")]]";
+        out << parameter_delimiter(is_first_parameter) << "\n\tsampler " << tex.name
+            << "_sampler [[sampler(" << tex.location << ")]]";
      }
    }

@ -2189,12 +2204,13 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
 }

 void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
-                                                               ShaderStage stage)
+                                                               ShaderStage stage,
+                                                               bool &is_first_parameter)
 {
  for (const MSLUniformBlock &ubo : this->uniform_blocks) {
    if (bool(ubo.stage & stage)) {
      /* For literal/existing global types, we do not need the class name-space accessor. */
-      out << ",\n\tconstant ";
+      out << parameter_delimiter(is_first_parameter) << "\n\tconstant ";
      if (!is_builtin_type(ubo.type_name)) {
        out << get_stage_class_name(stage) << "::";
      }
@ -2211,104 +2227,135 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
 std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
 {
  std::stringstream out;
+  bool is_first_parameter = true;

  if (this->uses_ssbo_vertex_fetch_mode) {
    /* Vertex Buffers bound as raw buffers. */
    for (int i = 0; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
-      out << "\tconstant uchar* MTL_VERTEX_DATA_" << i << " [[buffer(" << i << ")]],\n";
+      out << parameter_delimiter(is_first_parameter) << "\tconstant uchar* MTL_VERTEX_DATA_" << i
+          << " [[buffer(" << i << ")]]\n";
    }
-    out << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]],";
+    out << parameter_delimiter(is_first_parameter)
+        << "\tconstant ushort* MTL_INDEX_DATA[[buffer(MTL_SSBO_VERTEX_FETCH_IBO_INDEX)]]";
  }
  else {
    if (this->vertex_input_attributes.size() > 0) {
      /* Vertex Buffers use input assembly. */
-      out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]],";
+      out << get_stage_class_name(ShaderStage::VERTEX) << "::VertexIn v_in [[stage_in]]";
+      is_first_parameter = false;
    }
  }
-  out << "\n\tconstant " << get_stage_class_name(ShaderStage::VERTEX)
-      << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";

-  this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX);
+  if (this->uniforms.size() > 0) {
+    out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
+        << get_stage_class_name(ShaderStage::VERTEX)
+        << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
+    is_first_parameter = false;
+  }
+
+  this->generate_msl_uniforms_input_string(out, ShaderStage::VERTEX, is_first_parameter);

  /* Transform feedback buffer binding. */
  if (this->uses_transform_feedback) {
-    out << ",\n\tdevice " << get_stage_class_name(ShaderStage::VERTEX)
+    out << parameter_delimiter(is_first_parameter) << "\n\tdevice "
+        << get_stage_class_name(ShaderStage::VERTEX)
        << "::VertexOut_TF* "
           "transform_feedback_results[[buffer(MTL_transform_feedback_buffer_index)]]";
  }

  /* Generate texture signatures. */
-  this->generate_msl_textures_input_string(out, ShaderStage::VERTEX);
+  this->generate_msl_textures_input_string(out, ShaderStage::VERTEX, is_first_parameter);

  /* Entry point parameters for gl Globals. */
  if (this->uses_gl_VertexID) {
-    out << ",\n\tconst uint32_t gl_VertexID [[vertex_id]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint32_t gl_VertexID [[vertex_id]]";
  }
  if (this->uses_gl_InstanceID) {
-    out << ",\n\tconst uint32_t gl_InstanceID [[instance_id]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint32_t gl_InstanceID [[instance_id]]";
  }
  if (this->uses_gl_BaseInstanceARB) {
-    out << ",\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint32_t gl_BaseInstanceARB [[base_instance]]";
  }
  return out.str();
 }

 std::string MSLGeneratorInterface::generate_msl_fragment_inputs_string()
 {
+  bool is_first_parameter = true;
  std::stringstream out;
-  out << get_stage_class_name(ShaderStage::FRAGMENT)
-      << "::VertexOut v_in [[stage_in]],\n\tconstant "
+  out << parameter_delimiter(is_first_parameter) << get_stage_class_name(ShaderStage::FRAGMENT)
+      << "::VertexOut v_in [[stage_in]]";
+
+  if (this->uniforms.size() > 0) {
+    out << parameter_delimiter(is_first_parameter) << "\n\tconstant "
        << get_stage_class_name(ShaderStage::FRAGMENT)
        << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
+  }

-  this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT);
+  this->generate_msl_uniforms_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);

  /* Generate texture signatures. */
-  this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT);
+  this->generate_msl_textures_input_string(out, ShaderStage::FRAGMENT, is_first_parameter);

  if (this->uses_gl_PointCoord) {
-    out << ",\n\tconst float2 gl_PointCoord [[point_coord]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst float2 gl_PointCoord [[point_coord]]";
  }
  if (this->uses_gl_FrontFacing) {
-    out << ",\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst MTLBOOL gl_FrontFacing [[front_facing]]";
  }
  if (this->uses_gl_PrimitiveID) {
-    out << ",\n\tconst uint gl_PrimitiveID [[primitive_id]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint gl_PrimitiveID [[primitive_id]]";
  }

  /* Barycentrics. */
  if (this->uses_barycentrics) {
-    out << ",\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst float3 mtl_barycentric_coord [[barycentric_coord]]";
  }
  return out.str();
 }

 std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()
 {
+  bool is_first_parameter = true;
  std::stringstream out;
-  out << "constant " << get_stage_class_name(ShaderStage::COMPUTE)
+  if (this->uniforms.size() > 0) {
+    out << parameter_delimiter(is_first_parameter) << "constant "
+        << get_stage_class_name(ShaderStage::COMPUTE)
        << "::PushConstantBlock* uniforms[[buffer(MTL_uniform_buffer_base_index)]]";
+  }

-  this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE);
+  this->generate_msl_uniforms_input_string(out, ShaderStage::COMPUTE, is_first_parameter);

  /* Generate texture signatures. */
-  this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE);
+  this->generate_msl_textures_input_string(out, ShaderStage::COMPUTE, is_first_parameter);

  /* Entry point parameters for gl Globals. */
  if (this->uses_gl_GlobalInvocationID) {
-    out << ",\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint3 gl_GlobalInvocationID [[thread_position_in_grid]]";
  }
  if (this->uses_gl_WorkGroupID) {
-    out << ",\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]";
  }
  if (this->uses_gl_NumWorkGroups) {
-    out << ",\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint3 gl_NumWorkGroups [[threadgroups_per_grid]]";
  }
  if (this->uses_gl_LocalInvocationIndex) {
-    out << ",\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]";
  }
  if (this->uses_gl_LocalInvocationID) {
-    out << ",\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
+    out << parameter_delimiter(is_first_parameter)
+        << "\n\tconst uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]";
  }

  return out.str();
@ -2316,6 +2363,10 @@ std::string MSLGeneratorInterface::generate_msl_compute_inputs_string()

 std::string MSLGeneratorInterface::generate_msl_uniform_structs(ShaderStage shader_stage)
 {
+  /* Only generate PushConstantBlock if we have uniforms. */
+  if (this->uniforms.size() == 0) {
+    return "";
+  }
  BLI_assert(shader_stage == ShaderStage::VERTEX || shader_stage == ShaderStage::FRAGMENT);
  std::stringstream out;

@ -2624,6 +2675,9 @@ std::string MSLGeneratorInterface::generate_msl_fragment_out_struct()

 std::string MSLGeneratorInterface::generate_msl_global_uniform_population(ShaderStage stage)
 {
+  if (this->uniforms.size() == 0) {
+    return "";
+  }
  /* Populate Global Uniforms. */
  std::stringstream out;

--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@ -594,17 +594,6 @@ void gpu::MTLTexture::update_sub(
      }
    }

-    /* Prepare staging buffer for data. */
-    id<MTLBuffer> staging_buffer = nil;
-    uint64_t staging_buffer_offset = 0;
-
-    /* Fetch allocation from scratch buffer. */
-    MTLTemporaryBuffer allocation =
-        ctx->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(totalsize, 256);
-    memcpy(allocation.data, data, totalsize);
-    staging_buffer = allocation.metal_buffer;
-    staging_buffer_offset = allocation.buffer_offset;
-
    /* Common Properties. */
    MTLPixelFormat compatible_write_format = mtl_format_get_writeable_view_format(
        destination_format);
@ -616,6 +605,12 @@ void gpu::MTLTexture::update_sub(
      return;
    }

+    /* Fetch allocation from memory pool. */
+    MTLBuffer *temp_allocation = MTLContext::get_global_memory_manager()->allocate_with_data(
+        totalsize, true, data);
+    id<MTLBuffer> staging_buffer = temp_allocation->get_metal_buffer();
+    BLI_assert(staging_buffer != nil);
+
    /* Prepare command encoders. */
    id<MTLBlitCommandEncoder> blit_encoder = nil;
    id<MTLComputeCommandEncoder> compute_encoder = nil;
@ -697,7 +692,7 @@ void gpu::MTLTexture::update_sub(
          int max_array_index = ((type_ == GPU_TEXTURE_1D_ARRAY) ? extent[1] : 1);
          for (int array_index = 0; array_index < max_array_index; array_index++) {

-            int buffer_array_offset = staging_buffer_offset + (bytes_per_image * array_index);
+            int buffer_array_offset = (bytes_per_image * array_index);
            [blit_encoder
                     copyFromBuffer:staging_buffer
                       sourceOffset:buffer_array_offset
@ -727,7 +722,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder
                      dispatchThreads:MTLSizeMake(extent[0], 1, 1) /* Width, Height, Layer */
@ -747,7 +742,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder
                      dispatchThreads:MTLSizeMake(extent[0], extent[1], 1) /* Width, layers, nil */
@ -779,7 +774,7 @@ void gpu::MTLTexture::update_sub(
            }

            [blit_encoder copyFromBuffer:staging_buffer
-                            sourceOffset:staging_buffer_offset + texture_array_relative_offset
+                            sourceOffset:texture_array_relative_offset
                       sourceBytesPerRow:bytes_per_row
                     sourceBytesPerImage:bytes_per_image
                              sourceSize:MTLSizeMake(extent[0], extent[1], 1)
@ -807,7 +802,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder
                      dispatchThreads:MTLSizeMake(
@ -828,7 +823,7 @@ void gpu::MTLTexture::update_sub(
            MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
            cs.bind_pso(pso);
            cs.bind_compute_bytes(&params, sizeof(params), 0);
-            cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
+            cs.bind_compute_buffer(staging_buffer, 0, 1);
            cs.bind_compute_texture(texture_handle, 0);
            [compute_encoder dispatchThreads:MTLSizeMake(extent[0],
                                                         extent[1],
@ -848,7 +843,7 @@ void gpu::MTLTexture::update_sub(
                                   ctx->pipeline_state.unpack_row_length);
          int bytes_per_image = bytes_per_row * extent[1];
          [blit_encoder copyFromBuffer:staging_buffer
-                          sourceOffset:staging_buffer_offset
+                          sourceOffset:0
                     sourceBytesPerRow:bytes_per_row
                   sourceBytesPerImage:bytes_per_image
                            sourceSize:MTLSizeMake(extent[0], extent[1], extent[2])
@ -871,7 +866,7 @@ void gpu::MTLTexture::update_sub(
          MTLComputeState &cs = ctx->main_command_buffer.get_compute_state();
          cs.bind_pso(pso);
          cs.bind_compute_bytes(&params, sizeof(params), 0);
-          cs.bind_compute_buffer(staging_buffer, staging_buffer_offset, 1);
+          cs.bind_compute_buffer(staging_buffer, 0, 1);
          cs.bind_compute_texture(texture_handle, 0);
          [compute_encoder
                    dispatchThreads:MTLSizeMake(
@ -896,7 +891,7 @@ void gpu::MTLTexture::update_sub(
            int face_index = offset[2] + i;

            [blit_encoder copyFromBuffer:staging_buffer
-                            sourceOffset:staging_buffer_offset + texture_array_relative_offset
+                            sourceOffset:texture_array_relative_offset
                       sourceBytesPerRow:bytes_per_row
                     sourceBytesPerImage:bytes_per_image
                              sourceSize:MTLSizeMake(extent[0], extent[1], 1)
@ -930,7 +925,7 @@ void gpu::MTLTexture::update_sub(
          for (int i = 0; i < extent[2]; i++) {
            int face_index = offset[2] + i;
            [blit_encoder copyFromBuffer:staging_buffer
-                            sourceOffset:staging_buffer_offset + texture_array_relative_offset
+                            sourceOffset:texture_array_relative_offset
                       sourceBytesPerRow:bytes_per_row
                     sourceBytesPerImage:bytes_per_image
                              sourceSize:MTLSizeMake(extent[0], extent[1], 1)
@ -1058,6 +1053,11 @@ void gpu::MTLTexture::update_sub(

    /* Decrement texture reference counts. This ensures temporary texture views are released. */
    [texture_handle release];
+
+    /* Release temporary staging buffer allocation.
+     * NOTE: Allocation will be tracked with command submission and released once no longer in use.
+     */
+    temp_allocation->free();
  }
 }

--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@ -402,9 +402,13 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_update_impl(
                                                         options:options
                                                           error:&error] autorelease];
    if (error) {
+      /* Only exit out if genuine error and not warning. */
+      if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
+          NSNotFound) {
        NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
        BLI_assert(false);
-      return nullptr;
+        return nil;
+      }
    }

    /* Fetch compute function. */
@ -718,10 +722,14 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
                                                         options:options
                                                           error:&error] autorelease];
    if (error) {
+      /* Only exit out if genuine error and not warning. */
+      if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
+          NSNotFound) {
        NSLog(@"Compile Error - Metal Shader Library error %@ ", error);
        BLI_assert(false);
        return nil;
      }
+    }

    /* Fetch compute function. */
    BLI_assert(temp_lib != nil);
--- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@ -7,6 +7,10 @@
 * and texture2d types in metal).
 */

+/* Suppress unhelpful shader compiler warnings. */
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wcomment"
+
 /* Base instance with offsets. */
 #define gpu_BaseInstance gl_BaseInstanceARB
 #define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)