2023-02-27 12:31:30 +01:00 · 2023-02-27 16:11:05 +01:00 · 2023-02-27 16:11:09 +01:00 · 2023-02-27 16:13:11 +01:00 · 2023-02-27 15:07:18 +01:00 · 2023-02-27 15:16:21 +01:00
32 changed files with 1108 additions and 101 deletions
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx,
 * The full pixel neighborhood is gathered.
 * \{ */

-void dof_slight_focus_gather(sampler2D depth_tx,
+void dof_slight_focus_gather(depth2D depth_tx,
                             sampler2D color_tx,
                             sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
                             float radius,
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
@ -62,7 +62,7 @@ void main()
  int mask_shift = 1;

 #define downsample_level(out_mip__, lod_) \
-  active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
+  active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \
  barrier(); /* Wait for previous writes to finish. */ \
  if (active_thread) { \
    max_depth = max_v4(load_local_depths(local_px)); \
@ -89,12 +89,12 @@ void main()
  }
  finished_tile_counter = 0u;

-  ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
+  ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u));
  ivec2 image_border = imageSize(out_mip_5) - 1;
  for (int y = 0; y < iter.y; y++) {
    for (int x = 0; x < iter.x; x++) {
      /* Load result of the other work groups. */
-      kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
+      kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y);
      src_px = ivec2(kernel_origin + local_px) * 2;
      vec4 samp;
      samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
@ -168,13 +168,14 @@ void main()
        }
        /* Fallthrough to the hemispheric case. */
      case LIGHT_RECT:
-      case LIGHT_ELLIPSE:
+      case LIGHT_ELLIPSE: {
        vec3 v000 = vP - v_right * radius - v_up * radius;
        vec3 v100 = v000 + v_right * (radius * 2.0);
        vec3 v010 = v000 + v_up * (radius * 2.0);
        vec3 v001 = v000 - v_back * radius;
        Box bbox = shape_box(v000, v100, v010, v001);
        intersect_tile = intersect_tile && intersect(tile, bbox);
+      } break;
      default:
        break;
    }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
@ -74,8 +74,10 @@ void main()

  vec4 max_motion = imageLoad(in_tiles_img, src_tile);

-  MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
-  MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+  MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy,
+                                                                        uvec2(src_tile));
+  MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw,
+                                                                        uvec2(src_tile));
  if (true) {
    /* Rectangular area (in tiles) where the motion vector spreads. */
    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
@ -85,17 +87,20 @@ void main()
      for (int y = 0; y < motion_rect.extent.y; y++) {
        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
        if (is_inside_motion_line(tile, motion_line)) {
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
           * the motion next so that weighting in gather pass is better. */
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
        }
      }
    }
  }

  if (true) {
-    MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+    MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw,
+                                                                      uvec2(src_tile));
    /* Rectangular area (in tiles) where the motion vector spreads. */
    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
    MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
@ -104,10 +109,12 @@ void main()
      for (int y = 0; y < motion_rect.extent.y; y++) {
        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
        if (is_inside_motion_line(tile, motion_line)) {
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
           * the motion next so that weighting in gather pass is better. */
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
        }
      }
    }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
@ -178,10 +178,10 @@ void main()
  vec4 max_motion;
  /* Load dilation result from the indirection table. */
  ivec2 tile_prev;
-  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev);
  max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
  ivec2 tile_next;
-  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next);
  max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;

  Accumulator accum;
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
@ -240,13 +240,13 @@ void output_aov(vec4 color, float value, uint hash)
 #if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
  for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
    if (aov_buf.hash_color[i] == hash) {
-      imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color);
+      imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color);
      return;
    }
  }
  for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
    if (aov_buf.hash_value[i] == hash) {
-      imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value));
+      imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value));
      return;
    }
  }
--- a/source/blender/draw/intern/draw_curves.cc
+++ b/source/blender/draw/intern/draw_curves.cc
@ -33,7 +33,12 @@

 BLI_INLINE eParticleRefineShaderType drw_curves_shader_type_get()
 {
-  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
+  /* NOTE: Curve refine is faster using transform feedback via vertex processing pipeline with
+   * Metal and Apple Silicon GPUs. This is also because vertex work can more easily be executed in
+   * parallel with fragment work, whereas compute inserts an explicit dependency,
+   * due to switching of command encoder types. */
+  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support() &&
+      (GPU_backend_get_type() != GPU_BACKEND_METAL)) {
    return PART_REFINE_SHADER_COMPUTE;
  }
  if (GPU_transform_feedback_support()) {
--- a/source/blender/draw/intern/draw_hair.cc
+++ b/source/blender/draw/intern/draw_hair.cc
@ -36,7 +36,12 @@

 BLI_INLINE eParticleRefineShaderType drw_hair_shader_type_get()
 {
-  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support()) {
+  /* NOTE: Hair refine is faster using transform feedback via vertex processing pipeline with Metal
+   * and Apple Silicon GPUs. This is also because vertex work can more easily be executed in
+   * parallel with fragment work, whereas compute inserts an explicit dependency,
+   * due to switching of command encoder types. */
+  if (GPU_compute_shader_support() && GPU_shader_storage_buffer_objects_support() &&
+      (GPU_backend_get_type() != GPU_BACKEND_METAL)) {
    return PART_REFINE_SHADER_COMPUTE;
  }
  if (GPU_transform_feedback_support()) {
--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@ -329,6 +329,14 @@ struct DRWDebugVert {
  uint pos2;
  /* Named vert_color to avoid global namespace collision with uniform color. */
  uint vert_color;
+
+#ifdef GPU_METAL
+  inline DRWDebugVert() = default;
+  inline DRWDebugVert(uint in_pos0, uint in_pos1, uint in_pos2, uint in_vert_color)
+      : pos0(in_pos0), pos1(in_pos1), pos2(in_pos2), vert_color(in_vert_color)
+  {
+  }
+#endif
 };
 BLI_STATIC_ASSERT_ALIGN(DRWDebugVert, 16)

--- a/source/blender/draw/intern/shaders/common_shape_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl
@ -14,6 +14,13 @@
 struct Circle {
  vec2 center;
  float radius;
+
+#ifdef GPU_METAL
+  inline Circle() = default;
+  inline Circle(vec2 in_center, float in_radius) : center(in_center), radius(in_radius)
+  {
+  }
+#endif
 };

 Circle shape_circle(vec2 center, float radius)
@ -30,6 +37,13 @@ Circle shape_circle(vec2 center, float radius)
 struct Sphere {
  vec3 center;
  float radius;
+
+#ifdef GPU_METAL
+  inline Sphere() = default;
+  inline Sphere(vec3 in_center, float in_radius) : center(in_center), radius(in_radius)
+  {
+  }
+#endif
 };

 Sphere shape_sphere(vec3 center, float radius)
@ -192,6 +206,14 @@ Frustum shape_frustum(vec3 corners[8])
 struct Cone {
  vec3 direction;
  float angle_cos;
+
+#ifdef GPU_METAL
+  inline Cone() = default;
+  inline Cone(vec3 in_direction, float in_angle_cos)
+      : direction(in_direction), angle_cos(in_angle_cos)
+  {
+  }
+#endif
 };

 Cone shape_cone(vec3 direction, float angle_cosine)
--- a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl
+++ b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl
@ -33,7 +33,7 @@ void projmat_dimensions(mat4 winmat,
  }
 }

-void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
+void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, DEVICE_OUT_ARRAY(vec4, corners, 8))
 {
  float left, right, bottom, top, near, far;
  bool is_persp = winmat[3][3] == 0.0;
@ -68,12 +68,12 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
 }

 void planes_from_projmat(mat4 mat,
-                         out vec4 left,
-                         out vec4 right,
-                         out vec4 bottom,
-                         out vec4 top,
-                         out vec4 near,
-                         out vec4 far)
+                         DEVICE_OUT(vec4, left),
+                         DEVICE_OUT(vec4, right),
+                         DEVICE_OUT(vec4, bottom),
+                         DEVICE_OUT(vec4, top),
+                         DEVICE_OUT(vec4, near),
+                         DEVICE_OUT(vec4, far))
 {
  /* References:
   *
@ -89,7 +89,7 @@ void planes_from_projmat(mat4 mat,
  far = mat[3] - mat[2];
 }

-void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6])
+void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, DEVICE_OUT_ARRAY(vec4, planes, 6))
 {
  mat4 persmat = winmat * viewmat;
  planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]);
@ -100,7 +100,7 @@ void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6])
  }
 }

-vec4 frustum_culling_sphere_calc(vec4 corners[8])
+vec4 frustum_culling_sphere_calc(device vec4 corners[8])
 {
  /* Extract Bounding Sphere */
  /* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@ -240,6 +240,7 @@ set(METAL_SRC
  metal/mtl_shader_generator.mm
  metal/mtl_shader_interface.mm
  metal/mtl_state.mm
+  metal/mtl_storage_buffer.mm
  metal/mtl_texture.mm
  metal/mtl_texture_util.mm
  metal/mtl_uniform_buffer.mm
@ -265,6 +266,7 @@ set(METAL_SRC
  metal/mtl_shader_interface_type.hh
  metal/mtl_shader_shared.h
  metal/mtl_state.hh
+  metal/mtl_storage_buffer.hh
  metal/mtl_texture.hh
  metal/mtl_uniform_buffer.hh
  metal/mtl_vertex_buffer.hh
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@ -16,6 +16,7 @@
 #include "mtl_index_buffer.hh"
 #include "mtl_query.hh"
 #include "mtl_shader.hh"
+#include "mtl_storage_buffer.hh"
 #include "mtl_uniform_buffer.hh"
 #include "mtl_vertex_buffer.hh"

@ -100,8 +101,7 @@ UniformBuf *MTLBackend::uniformbuf_alloc(int size, const char *name)

 StorageBuf *MTLBackend::storagebuf_alloc(int size, GPUUsageType usage, const char *name)
 {
-  /* TODO(Metal): Implement MTLStorageBuf. */
-  return nullptr;
+  return new MTLStorageBuf(size, usage, name);
 }

 VertBuf *MTLBackend::vertbuf_alloc()
@ -398,16 +398,16 @@ void MTLBackend::capabilities_init(MTLContext *ctx)
  GCaps.shader_image_load_store_support = ([device supportsFamily:MTLGPUFamilyApple3] ||
                                           MTLBackend::capabilities.supports_family_mac1 ||
                                           MTLBackend::capabilities.supports_family_mac2);
+  GCaps.compute_shader_support = true;
+  GCaps.shader_storage_buffer_objects_support = false;
  /* TODO(Metal): Add support? */
  GCaps.shader_draw_parameters_support = false;
-  GCaps.compute_shader_support = true;
+
  GCaps.geometry_shader_support = false;
-  GCaps.shader_storage_buffer_objects_support =
-      false; /* TODO(Metal): implement Storage Buffer support. */

  /* Maximum buffer bindings: 31. Consider required slot for uniforms/UBOs/Vertex attributes.
   * Can use argument buffers if a higher limit is required. */
-  GCaps.max_shader_storage_buffer_bindings = 24;
+  GCaps.max_shader_storage_buffer_bindings = 14;

  if (GCaps.compute_shader_support) {
    GCaps.max_work_group_count[0] = 65535;
--- a/source/blender/gpu/metal/mtl_capabilities.hh
+++ b/source/blender/gpu/metal/mtl_capabilities.hh
@ -18,7 +18,12 @@ namespace gpu {
 #define MTL_MAX_DEFAULT_SAMPLERS 16
 /* Total maximum buffers which can be bound to an encoder, for use within a shader.
 * MTL_MAX_UNIFORM_BUFFER_BINDINGS + MTL_MAX_STORAGE_BUFFER_BINDINGS must be <=
- * than MTL_MAX_BUFFER_BINDINGS. */
+ * than MTL_MAX_BUFFER_BINDINGS.
+ * We also require an additional 3 core buffers for:
+ * - Argument buffer for bindless resources (e.g. samplers)
+ * - Transform feedback buffer
+ * - Default push constant block
+ * Along with up to 6+1 buffers for vertex data, and index data. */
 #define MTL_MAX_BUFFER_BINDINGS 31
 #define MTL_MAX_UNIFORM_BUFFER_BINDINGS 16
 #define MTL_MAX_STORAGE_BUFFER_BINDINGS 12
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@ -46,6 +46,7 @@ namespace blender::gpu {
 class MTLContext;
 class MTLCommandBufferManager;
 class MTLUniformBuf;
+class MTLStorageBuf;

 /* Structs containing information on current binding state for textures and samplers. */
 struct MTLTextureBinding {
@ -436,6 +437,11 @@ struct MTLUniformBufferBinding {
  MTLUniformBuf *ubo;
 };

+struct MTLStorageBufferBinding {
+  bool bound;
+  MTLStorageBuf *ssbo;
+};
+
 struct MTLContextGlobalShaderPipelineState {
  bool initialised;

@ -457,6 +463,9 @@ struct MTLContextGlobalShaderPipelineState {
  /* Global Uniform Buffers. */
  MTLUniformBufferBinding ubo_bindings[MTL_MAX_UNIFORM_BUFFER_BINDINGS];

+  /* Storage buffer. */
+  MTLStorageBufferBinding ssbo_bindings[MTL_MAX_STORAGE_BUFFER_BINDINGS];
+
  /* Context Texture bindings. */
  MTLTextureBinding texture_bindings[MTL_MAX_TEXTURE_SLOTS];
  MTLSamplerBinding sampler_bindings[MTL_MAX_SAMPLER_SLOTS];
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
@ -12,6 +12,7 @@
 #include "mtl_shader.hh"
 #include "mtl_shader_interface.hh"
 #include "mtl_state.hh"
+#include "mtl_storage_buffer.hh"
 #include "mtl_uniform_buffer.hh"
 #include "mtl_vertex_buffer.hh"

@ -20,6 +21,7 @@
 #include "GPU_capabilities.h"
 #include "GPU_matrix.h"
 #include "GPU_shader.h"
+#include "GPU_storage_buffer.h"
 #include "GPU_texture.h"
 #include "GPU_uniform_buffer.h"
 #include "GPU_vertex_buffer.h"
@ -272,6 +274,16 @@ MTLContext::~MTLContext()
    }
  }

+  /* Unbind SSBOs. */
+  for (int i = 0; i < MTL_MAX_STORAGE_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ssbo_bindings[i].bound &&
+        this->pipeline_state.ssbo_bindings[i].ssbo != nullptr) {
+      GPUStorageBuf *ssbo = wrap(
+          static_cast<StorageBuf *>(this->pipeline_state.ssbo_bindings[i].ssbo));
+      GPU_storagebuf_unbind(ssbo);
+    }
+  }
+
  /* Release Dummy resources */
  this->free_dummy_resources();

@ -360,6 +372,15 @@ void MTLContext::activate()
    }
  }

+  /* Reset SSBO bind state. */
+  for (int i = 0; i < MTL_MAX_STORAGE_BUFFER_BINDINGS; i++) {
+    if (this->pipeline_state.ssbo_bindings[i].bound &&
+        this->pipeline_state.ssbo_bindings[i].ssbo != nullptr) {
+      this->pipeline_state.ssbo_bindings[i].bound = false;
+      this->pipeline_state.ssbo_bindings[i].ssbo = nullptr;
+    }
+  }
+
  /* Ensure imm active. */
  immActivate();
 }
@ -658,6 +679,10 @@ void MTLContext::pipeline_state_init()
      this->pipeline_state.ubo_bindings[u].bound = false;
      this->pipeline_state.ubo_bindings[u].ubo = nullptr;
    }
+    for (int u = 0; u < MTL_MAX_STORAGE_BUFFER_BINDINGS; u++) {
+      this->pipeline_state.ssbo_bindings[u].bound = false;
+      this->pipeline_state.ssbo_bindings[u].ssbo = nullptr;
+    }
  }

  /*** State defaults -- restored by GPU_state_init. ***/
@ -1026,7 +1051,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
                                rps.last_bound_shader_state.pso_index_ !=
                                    pipeline_state_instance->shader_pso_index);

-  const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+  const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
  if (push_constant_block.size > 0) {

    /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1061,7 +1086,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
   * and not owned by the shader instance. */
  for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
-    const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+    const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);

    if (ubo.buffer_index >= 0) {

@ -1177,6 +1202,58 @@ bool MTLContext::ensure_uniform_buffer_bindings(
      }
    }
  }
+
+  /* Bind Global GPUStorageBuf's */
+  /* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
+   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
+   * and not owned by the shader instance. */
+  for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
+    const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
+
+    if (ssbo.buffer_index >= 0) {
+      id<MTLBuffer> ssbo_buffer = nil;
+      int ssbo_size = 0;
+      UNUSED_VARS_NDEBUG(ssbo_size);
+
+      if (this->pipeline_state.ssbo_bindings[ssbo_index].bound) {
+
+        /* Fetch UBO global-binding properties from slot. */
+        ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_metal_buffer();
+        ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_size();
+
+        /* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
+        BLI_assert(ssbo_buffer != nil);
+        BLI_assert(ssbo_size > 0);
+      }
+      else {
+        MTL_LOG_INFO(
+            "[Warning][SSBO] Shader '%s' expected SSBO '%s' to be bound at buffer index: %d -- "
+            "but "
+            "nothing was bound.\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ssbo.name_offset),
+            ssbo.buffer_index);
+      }
+
+      if (ssbo_buffer != nil) {
+        uint32_t buffer_bind_index = pipeline_state_instance->base_storage_buffer_index +
+                                     ssbo.buffer_index;
+
+        /* Bind Vertex UBO. */
+        if (bool(ssbo.stage_mask & ShaderStage::VERTEX)) {
+          BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
+          rps.bind_vertex_buffer(ssbo_buffer, 0, buffer_bind_index);
+        }
+
+        /* Bind Fragment UBOs. */
+        if (bool(ssbo.stage_mask & ShaderStage::FRAGMENT)) {
+          BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
+          rps.bind_fragment_buffer(ssbo_buffer, 0, buffer_bind_index);
+        }
+      }
+    }
+  }
+
  return true;
 }

@ -1191,7 +1268,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
  MTLComputeState &cs = this->main_command_buffer.get_compute_state();

  /* Fetch push constant block and bind. */
-  const MTLShaderUniformBlock &push_constant_block = shader_interface->get_push_constant_block();
+  const MTLShaderBufferBlock &push_constant_block = shader_interface->get_push_constant_block();
  if (push_constant_block.size > 0) {

    /* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
@ -1218,7 +1295,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
   * match. This is used to support the gpu_uniformbuffer module, where the uniform data is global,
   * and not owned by the shader instance. */
  for (const uint ubo_index : IndexRange(shader_interface->get_total_uniform_blocks())) {
-    const MTLShaderUniformBlock &ubo = shader_interface->get_uniform_block(ubo_index);
+    const MTLShaderBufferBlock &ubo = shader_interface->get_uniform_block(ubo_index);

    if (ubo.buffer_index >= 0) {

@ -1270,7 +1347,7 @@ bool MTLContext::ensure_uniform_buffer_bindings(
        uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index +
                                     buffer_index;

-        /* Bind Vertex UBO. */
+        /* Bind Compute UBO. */
        if (bool(ubo.stage_mask & ShaderStage::COMPUTE)) {
          BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
          cs.bind_compute_buffer(ubo_buffer, ubo_offset, buffer_bind_index);
@ -1286,6 +1363,52 @@ bool MTLContext::ensure_uniform_buffer_bindings(
      }
    }
  }
+
+  /* Bind Global GPUStorageBuffers */
+  /* Iterate through expected SSBOs in the shader interface, and check if the globally bound ones
+   * match. */
+  for (const uint ssbo_index : IndexRange(shader_interface->get_total_storage_blocks())) {
+    const MTLShaderBufferBlock &ssbo = shader_interface->get_storage_block(ssbo_index);
+
+    if (ssbo.buffer_index >= 0) {
+      id<MTLBuffer> ssbo_buffer = nil;
+      int ssbo_size = 0;
+
+      if (this->pipeline_state.ssbo_bindings[ssbo_index].bound) {
+
+        /* Fetch UBO global-binding properties from slot. */
+        ssbo_buffer = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_metal_buffer();
+        ssbo_size = this->pipeline_state.ssbo_bindings[ssbo_index].ssbo->get_size();
+        UNUSED_VARS_NDEBUG(ssbo_size);
+
+        /* For SSBOs, we always need to ensure the buffer exists, as it may be written to. */
+        BLI_assert(ssbo_buffer != nil);
+        BLI_assert(ssbo_size > 0);
+      }
+      else {
+        MTL_LOG_ERROR(
+            "[Error][SSBO] Shader '%s' expected SSBO '%s' to be bound at SSBO index: %d (buffer "
+            "%d) -- but "
+            "nothing was bound.\n",
+            shader_interface->get_name(),
+            shader_interface->get_name_at_offset(ssbo.name_offset),
+            ssbo.buffer_index,
+            pipeline_state_instance.base_storage_buffer_index + ssbo.buffer_index);
+      }
+
+      if (ssbo_buffer != nil) {
+        uint32_t buffer_bind_index = pipeline_state_instance.base_storage_buffer_index +
+                                     ssbo.buffer_index;
+
+        /* Bind Vertex UBO. */
+        if (bool(ssbo.stage_mask & ShaderStage::COMPUTE)) {
+          BLI_assert(buffer_bind_index >= 0 && buffer_bind_index < MTL_MAX_BUFFER_BINDINGS);
+          cs.bind_compute_buffer(ssbo_buffer, 0, buffer_bind_index);
+        }
+      }
+    }
+  }
+
  return true;
 }

--- a/source/blender/gpu/metal/mtl_index_buffer.hh
+++ b/source/blender/gpu/metal/mtl_index_buffer.hh
@ -18,12 +18,16 @@ namespace blender::gpu {
 class MTLIndexBuf : public IndexBuf {
  friend class MTLBatch;
  friend class MTLDrawList;
+  friend class MTLStorageBuf; /* For bind as SSBO resource access. */

 private:
  /* Metal buffer resource. */
  gpu::MTLBuffer *ibo_ = nullptr;
  uint64_t alloc_size_ = 0;

+  /* SSBO wrapper for bind_as_ssbo support. */
+  MTLStorageBuf *ssbo_wrapper_ = nullptr;
+
 #ifndef NDEBUG
  /* Flags whether point index buffer has been compacted
   * to remove false restart indices. */
--- a/source/blender/gpu/metal/mtl_index_buffer.mm
+++ b/source/blender/gpu/metal/mtl_index_buffer.mm
@ -7,6 +7,7 @@
 #include "mtl_index_buffer.hh"
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
+#include "mtl_storage_buffer.hh"

 #include "BLI_span.hh"

@ -22,6 +23,11 @@ MTLIndexBuf::~MTLIndexBuf()
    ibo_->free();
  }
  this->free_optimized_buffer();
+
+  if (ssbo_wrapper_) {
+    delete ssbo_wrapper_;
+    ssbo_wrapper_ = nullptr;
+  }
 }

 void MTLIndexBuf::free_optimized_buffer()
@ -42,8 +48,14 @@ void MTLIndexBuf::bind_as_ssbo(uint32_t binding)
  /* Ensure we have a valid IBO. */
  BLI_assert(this->ibo_);

-  /* TODO(Metal): Support index buffer SSBO's. Dependent on compute implementation. */
-  MTL_LOG_WARNING("MTLIndexBuf::bind_as_ssbo not yet implemented!\n");
+  /* Ensure resource is initialized. */
+  this->upload_data();
+
+  /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
+  if (ssbo_wrapper_ == nullptr) {
+    ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
+  }
+  ssbo_wrapper_->bind(binding);
 }

 void MTLIndexBuf::read(uint32_t *data) const
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@ -71,7 +71,7 @@ struct MTLRenderPipelineStateInstance {
   * bound buffers such as vertex buffers, as the count can vary. */
  int base_uniform_buffer_index;
  /* Base bind index for binding storage buffers. */
-  int base_ssbo_buffer_index;
+  int base_storage_buffer_index;
  /* buffer bind slot used for null attributes (-1 if not needed). */
  int null_attribute_buffer_index;
  /* buffer bind used for transform feedback output buffer. */
@ -101,7 +101,7 @@ struct MTLComputePipelineStateInstance {
   * bound buffers such as vertex buffers, as the count can vary. */
  int base_uniform_buffer_index = -1;
  /* Base bind index for binding storage buffers. */
-  int base_ssbo_buffer_index = -1;
+  int base_storage_buffer_index = -1;

  int threadgroup_x_len = 1;
  int threadgroup_y_len = 1;
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@ -386,7 +386,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
    valid_ = true;

    /* Prepare backing data storage for local uniforms. */
-    const MTLShaderUniformBlock &push_constant_block = mtl_interface->get_push_constant_block();
+    const MTLShaderBufferBlock &push_constant_block = mtl_interface->get_push_constant_block();
    if (push_constant_block.size > 0) {
      push_constant_data_ = MEM_callocN(push_constant_block.size, __func__);
      this->push_constant_bindstate_mark_dirty(true);
@ -987,12 +987,26 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
                        type:MTLDataTypeInt
                    withName:@"MTL_uniform_buffer_base_index"];

+    /* Storage buffer bind index.
+     * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
+     * and an additional space for the push constant block.
+     * If the shader does not have any uniform blocks, then we can place directly after the push
+     * constant block. As we do not need an extra spot for the UBO at index '0'. */
+    int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
+                                        ((mtl_interface->get_total_uniform_blocks() > 0) ?
+                                             (mtl_interface->get_max_ubo_index() + 2) :
+                                             (MTL_uniform_buffer_base_index + 1));
+    [values setConstantValue:&MTL_storage_buffer_base_index
+                        type:MTLDataTypeInt
+                    withName:@"MTL_storage_buffer_base_index"];
+
    /* Transform feedback constant.
-     * Ensure buffer is placed after existing buffers, including default buffers. */
+     * Ensure buffer is placed after existing buffers, including default buffers, UBOs and SSBOs.
+     */
    int MTL_transform_feedback_buffer_index = (this->transform_feedback_type_ !=
                                               GPU_SHADER_TFB_NONE) ?
-                                                  MTL_uniform_buffer_base_index +
-                                                      mtl_interface->get_max_ubo_index() + 2 :
+                                                  MTL_storage_buffer_base_index +
+                                                      mtl_interface->get_max_ssbo_index() + 2 :
                                                  -1;

    if (this->transform_feedback_type_ != GPU_SHADER_TFB_NONE) {
@ -1150,6 +1164,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
    pso_inst->frag = desc.fragmentFunction;
    pso_inst->pso = pso;
    pso_inst->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
+    pso_inst->base_storage_buffer_index = MTL_storage_buffer_base_index;
    pso_inst->null_attribute_buffer_index = (using_null_buffer) ? null_buffer_index : -1;
    pso_inst->transform_feedback_buffer_index = MTL_transform_feedback_buffer_index;
    pso_inst->prim_type = prim_type;
@ -1254,6 +1269,8 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
 {
  /* NOTE(Metal): Bakes and caches a PSO for compute. */
  BLI_assert(this);
+  MTLShaderInterface *mtl_interface = this->get_interface();
+  BLI_assert(mtl_interface);
  BLI_assert(this->is_valid());
  BLI_assert(shader_library_compute_ != nil);

@ -1275,7 +1292,19 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
                        type:MTLDataTypeInt
                    withName:@"MTL_uniform_buffer_base_index"];

-    /* TODO: SSBO binding base index. */
+    /* Storage buffer bind index.
+     * This is always relative to MTL_uniform_buffer_base_index, plus the number of active buffers,
+     * and an additional space for the push constant block.
+     * If the shader does not have any uniform blocks, then we can place directly after the push
+     * constant block. As we do not need an extra spot for the UBO at index '0'. */
+    int MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
+                                        ((mtl_interface->get_total_uniform_blocks() > 0) ?
+                                             (mtl_interface->get_max_ubo_index() + 2) :
+                                             (MTL_uniform_buffer_base_index + 1));
+
+    [values setConstantValue:&MTL_storage_buffer_base_index
+                        type:MTLDataTypeInt
+                    withName:@"MTL_storage_buffer_base_index"];

    /* Compile compute function. */
    NSError *error = nullptr;
@ -1321,8 +1350,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
    compute_pso_instance_.compute = [compute_function retain];
    compute_pso_instance_.pso = [pso retain];
    compute_pso_instance_.base_uniform_buffer_index = MTL_uniform_buffer_base_index;
-    /* TODO: Add SSBO base buffer index support. */
-    compute_pso_instance_.base_ssbo_buffer_index = -1;
+    compute_pso_instance_.base_storage_buffer_index = MTL_storage_buffer_base_index;
  }
  return true;
 }
--- a/source/blender/gpu/metal/mtl_shader_generator.hh
+++ b/source/blender/gpu/metal/mtl_shader_generator.hh
@ -105,6 +105,22 @@
 * }
 * \endcode
 *
+ * -- Metal buffer bindings structure --
+ *
+ * Metal shader contains several different binding types. All buffers are bound using the buffer(N)
+ * binding attribute tag. However, different ranges serve different purposes. The structure of the
+ * bindings always happen as follows:
+ *
+ * Vertex Buffers (N)                       <-- 0
+ * Index buffer
+ * Default Push constant block for uniforms <-- MTL_uniform_buffer_base_index
+ * Uniform buffers                          <-- MTL_uniform_buffer_base_index+1
+ * Storage buffers                          <-- MTL_storage_buffer_base_index
+ * Samplers/argument buffer table           <-- last buffer + 1
+ * Transform feedback buffer                <-- last_buffer + 2
+ *
+ * Up to a maximum of 31 bindings.
+ *
 * -- SSBO-vertex-fetchmode --
 *
 * SSBO-vertex-fetchmode is a special option wherein vertex buffers are bound directly
@ -200,13 +216,14 @@ struct MSLUniform {
  }
 };

-struct MSLUniformBlock {
+struct MSLBufferBlock {
  std::string type_name;
  std::string name;
  ShaderStage stage;
  bool is_array;
+  shader::Qualifier qualifiers;

-  bool operator==(const MSLUniformBlock &right) const
+  bool operator==(const MSLBufferBlock &right) const
  {
    return (type_name == right.type_name && name == right.name);
  }
@ -369,7 +386,8 @@ class MSLGeneratorInterface {
 public:
  /** Shader stage input/output binding information.
   * Derived from shader source reflection or GPUShaderCreateInfo. */
-  blender::Vector<MSLUniformBlock> uniform_blocks;
+  blender::Vector<MSLBufferBlock> uniform_blocks;
+  blender::Vector<MSLBufferBlock> storage_blocks;
  blender::Vector<MSLUniform> uniforms;
  blender::Vector<MSLTextureSampler> texture_samplers;
  blender::Vector<MSLVertexInputAttribute> vertex_input_attributes;
@ -385,7 +403,8 @@ class MSLGeneratorInterface {
  blender::Vector<char> clip_distances;
  /* Shared Memory Blocks. */
  blender::Vector<MSLSharedMemoryBlock> shared_memory_blocks;
-
+  /* Max bind IDs. */
+  int max_tex_bind_index = 0;
  /** GL Global usage. */
  /* Whether GL position is used, or an alternative vertex output should be the default. */
  bool uses_gl_Position;
@ -459,8 +478,10 @@ class MSLGeneratorInterface {
  /* Samplers. */
  bool use_argument_buffer_for_samplers() const;
  uint32_t num_samplers_for_stage(ShaderStage stage) const;
+  uint32_t max_sampler_index_for_stage(ShaderStage stage) const;

-  /* Returns the bind index, relative to MTL_uniform_buffer_base_index. */
+  /* Returns the bind index, relative to
+   * MTL_uniform_buffer_base_index+MTL_storage_buffer_base_index. */
  uint32_t get_sampler_argument_buffer_bind_index(ShaderStage stage);

  /* Code generation utility functions. */
@ -476,7 +497,7 @@ class MSLGeneratorInterface {
  std::string generate_msl_fragment_entry_stub();
  std::string generate_msl_compute_entry_stub();
  std::string generate_msl_global_uniform_population(ShaderStage stage);
-  std::string generate_ubo_block_macro_chain(MSLUniformBlock block);
+  std::string generate_ubo_block_macro_chain(MSLBufferBlock block);
  std::string generate_msl_uniform_block_population(ShaderStage stage);
  std::string generate_msl_vertex_attribute_input_population();
  std::string generate_msl_vertex_output_population();
@ -538,7 +559,9 @@ inline bool is_builtin_type(std::string type)
 {
  /* Add Types as needed. */
  /* TODO(Metal): Consider replacing this with a switch and `constexpr` hash and switch.
-   * Though most efficient and maintainable approach to be determined. */
+   * Though most efficient and maintainable approach to be determined.
+   * NOTE: Some duplicate types exit for Metal and GLSL representations, as generated typenames
+   * from createinfo may use GLSL signature. */
  static std::map<std::string, eMTLDataType> glsl_builtin_types = {
      {"float", MTL_DATATYPE_FLOAT},
      {"vec2", MTL_DATATYPE_FLOAT2},
@ -548,10 +571,17 @@ inline bool is_builtin_type(std::string type)
      {"ivec2", MTL_DATATYPE_INT2},
      {"ivec3", MTL_DATATYPE_INT3},
      {"ivec4", MTL_DATATYPE_INT4},
+      {"int2", MTL_DATATYPE_INT2},
+      {"int3", MTL_DATATYPE_INT3},
+      {"int4", MTL_DATATYPE_INT4},
      {"uint32_t", MTL_DATATYPE_UINT},
      {"uvec2", MTL_DATATYPE_UINT2},
      {"uvec3", MTL_DATATYPE_UINT3},
      {"uvec4", MTL_DATATYPE_UINT4},
+      {"uint", MTL_DATATYPE_UINT},
+      {"uint2", MTL_DATATYPE_UINT2},
+      {"uint3", MTL_DATATYPE_UINT3},
+      {"uint4", MTL_DATATYPE_UINT4},
      {"mat3", MTL_DATATYPE_FLOAT3x3},
      {"mat4", MTL_DATATYPE_FLOAT4x4},
      {"bool", MTL_DATATYPE_INT},
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@ -709,8 +709,30 @@ static void print_resource(std::ostream &os, const ShaderCreateInfo::Resource &r
      }
      break;
    }
-    case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER:
+    case ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
+      int64_t array_offset = res.storagebuf.name.find_first_of("[");
+      bool writeable = (res.storagebuf.qualifiers & shader::Qualifier::WRITE) ==
+                       shader::Qualifier::WRITE;
+      const char *memory_scope = ((writeable) ? "device " : "constant ");
+      if (array_offset == -1) {
+        /* Create local class member as device pointer reference to bound SSBO.
+         * Given usage within a shader follows ssbo_name.ubo_element syntax, we can
+         * dereference the pointer as the compiler will optimize this data fetch.
+         * To do this, we also give the UBO name a post-fix of `_local` to avoid
+         * macro accessor collisions. */
+
+        os << memory_scope << res.storagebuf.type_name << " *" << res.storagebuf.name
+           << "_local;\n";
+        os << "#define " << res.storagebuf.name << " (*" << res.storagebuf.name << "_local)\n";
+      }
+      else {
+        /* For arrays, we can directly provide the constant access pointer, as the array
+         * syntax will de-reference this at the correct fetch index. */
+        StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
+        os << memory_scope << res.storagebuf.type_name << " *" << name_no_array << ";\n";
+      }
      break;
+    }
  }
 }

@ -999,7 +1021,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
  if (msl_iface.use_argument_buffer_for_samplers()) {
    ss_vertex << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
    ss_vertex << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
-              << msl_iface.num_samplers_for_stage(ShaderStage::VERTEX) << std::endl;
+              << msl_iface.max_sampler_index_for_stage(ShaderStage::VERTEX) + 1 << std::endl;
  }
  if (msl_iface.uses_ssbo_vertex_fetch_mode) {
    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
@ -1190,7 +1212,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
    if (msl_iface.use_argument_buffer_for_samplers()) {
      ss_fragment << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
      ss_fragment << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
-                  << msl_iface.num_samplers_for_stage(ShaderStage::FRAGMENT) << std::endl;
+                  << msl_iface.max_sampler_index_for_stage(ShaderStage::FRAGMENT) + 1 << std::endl;
    }

    /* Inject common Metal header. */
@ -1437,7 +1459,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
  if (msl_iface.use_argument_buffer_for_samplers()) {
    ss_compute << "#define USE_ARGUMENT_BUFFER_FOR_SAMPLERS 1" << std::endl;
    ss_compute << "#define ARGUMENT_BUFFER_NUM_SAMPLERS "
-               << msl_iface.num_samplers_for_stage(ShaderStage::COMPUTE) << std::endl;
+               << msl_iface.max_sampler_index_for_stage(ShaderStage::COMPUTE) + 1 << std::endl;
  }

  /* Inject static workgroup sizes. */
@ -1555,6 +1577,31 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
  this->set_compute_function_name(@"compute_function_entry");
 #endif

+  /* DEBUG: Export source to file for manual verification. */
+#if MTL_SHADER_DEBUG_EXPORT_SOURCE
+  NSFileManager *sharedFM = [NSFileManager defaultManager];
+  NSURL *app_bundle_url = [[NSBundle mainBundle] bundleURL];
+  NSURL *shader_dir = [[app_bundle_url URLByDeletingLastPathComponent]
+      URLByAppendingPathComponent:@"Shaders/"
+                      isDirectory:YES];
+  [sharedFM createDirectoryAtURL:shader_dir
+      withIntermediateDirectories:YES
+                       attributes:nil
+                            error:nil];
+  const char *path_cstr = [shader_dir fileSystemRepresentation];
+
+  std::ofstream compute_fs;
+  compute_fs.open(
+      (std::string(path_cstr) + "/" + std::string(this->name) + "_GeneratedComputeShader.msl")
+          .c_str());
+  compute_fs << ss_compute.str();
+  compute_fs.close();
+
+  shader_debug_printf(
+      "Compute Shader Saved to: %s\n",
+      (std::string(path_cstr) + std::string(this->name) + "_GeneratedComputeShader.msl").c_str());
+#endif
+
  NSString *msl_final_compute = [NSString stringWithUTF8String:ss_compute.str().c_str()];
  this->shader_compute_source_from_msl(msl_final_compute);

@ -1738,6 +1785,7 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
          MSLTextureSampler msl_tex(
              ShaderStage::ANY, res.sampler.type, res.sampler.name, access, used_slot);
          texture_samplers.append(msl_tex);
+          max_tex_bind_index = max_ii(used_slot, max_tex_bind_index);
        } break;

        case shader::ShaderCreateInfo::Resource::BindType::IMAGE: {
@ -1771,14 +1819,16 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
                                    access,
                                    used_slot);
          texture_samplers.append(msl_tex);
+          max_tex_bind_index = max_ii(used_slot, max_tex_bind_index);
        } break;

        case shader::ShaderCreateInfo::Resource::BindType::UNIFORM_BUFFER: {
-          MSLUniformBlock ubo;
+          MSLBufferBlock ubo;
          BLI_assert(res.uniformbuf.type_name.size() > 0);
          BLI_assert(res.uniformbuf.name.size() > 0);
          int64_t array_offset = res.uniformbuf.name.find_first_of("[");

+          ubo.qualifiers = shader::Qualifier::READ;
          ubo.type_name = res.uniformbuf.type_name;
          ubo.is_array = (array_offset > -1);
          if (ubo.is_array) {
@ -1794,8 +1844,24 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
        } break;

        case shader::ShaderCreateInfo::Resource::BindType::STORAGE_BUFFER: {
-          /* TODO(Metal): Support shader storage buffer in Metal.
-           * Pending compute support. */
+          MSLBufferBlock ssbo;
+          BLI_assert(res.storagebuf.type_name.size() > 0);
+          BLI_assert(res.storagebuf.name.size() > 0);
+          int64_t array_offset = res.storagebuf.name.find_first_of("[");
+
+          ssbo.qualifiers = res.storagebuf.qualifiers;
+          ssbo.type_name = res.storagebuf.type_name;
+          ssbo.is_array = (array_offset > -1);
+          if (ssbo.is_array) {
+            /* If is array UBO, strip out array tag from name. */
+            StringRef name_no_array = StringRef(res.storagebuf.name.c_str(), array_offset);
+            ssbo.name = name_no_array;
+          }
+          else {
+            ssbo.name = res.storagebuf.name;
+          }
+          ssbo.stage = ShaderStage::FRAGMENT | ShaderStage::COMPUTE;
+          storage_blocks.append(ssbo);
        } break;
      }
    }
@ -1850,10 +1916,28 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn

 bool MSLGeneratorInterface::use_argument_buffer_for_samplers() const
 {
-  /* We can only use argument buffers IF sampler count exceeds static limit of 16,
-   * AND we can support more samplers with an argument buffer.
-   * NOTE: We reserve one constant sampler within the shader for fast read via point-sampling. */
-  return texture_samplers.size() >= 15 && GPU_max_samplers() > 16;
+  /* We can only use argument buffers IF highest sampler index exceeds static limit of 16,
+   * AND we can support more samplers with an argument buffer. */
+  bool use_argument_buffer = (texture_samplers.size() >= 15 || max_tex_bind_index >= 14) &&
+                             GPU_max_samplers() > 15;
+
+#ifndef NDEBUG
+  /* Due to explicit bind location support, we may be below the sampler limit, but forced to offset
+   * bindings due to the range being high. Introduce debug check here to issue warning. In these
+   * cases, if explicit bind location support is not required, best to use auto_resource_location
+   * to optimize bind point packing. */
+  if (use_argument_buffer && texture_samplers.size() < 15) {
+    MTL_LOG_WARNING(
+        "Compiled Shader '%s' is falling back to bindless via argument buffers due to having a "
+        "texture sampler of Index: %u Which exceeds the limit of 15+1. However shader only uses "
+        "%d textures. Consider optimising bind points with .auto_resource_location(true).\n",
+        parent_shader_.name_get(),
+        max_tex_bind_index,
+        (int)texture_samplers.size());
+  }
+#endif
+
+  return use_argument_buffer;
 }

 uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
@ -1863,6 +1947,13 @@ uint32_t MSLGeneratorInterface::num_samplers_for_stage(ShaderStage stage) const
  return texture_samplers.size();
 }

+uint32_t MSLGeneratorInterface::max_sampler_index_for_stage(ShaderStage stage) const
+{
+  /* NOTE: Sampler bindings and argument buffer shared across stages,
+   * in case stages share texture/sampler bindings. */
+  return max_tex_bind_index;
+}
+
 uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderStage stage)
 {
  /* Note: Shader stage must be a singular index. Compound shader masks are not valid for this
@ -1873,7 +1964,7 @@ uint32_t MSLGeneratorInterface::get_sampler_argument_buffer_bind_index(ShaderSta
    return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
  }
  sampler_argument_buffer_bind_index[get_shader_stage_index(stage)] =
-      (this->uniform_blocks.size() + 1);
+      (this->uniform_blocks.size() + this->storage_blocks.size() + 1);
  return sampler_argument_buffer_bind_index[get_shader_stage_index(stage)];
 }

@ -2148,7 +2239,6 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
  out << this->generate_msl_texture_vars(ShaderStage::COMPUTE);
  out << this->generate_msl_global_uniform_population(ShaderStage::COMPUTE);
  out << this->generate_msl_uniform_block_population(ShaderStage::COMPUTE);
-  /* TODO(Metal): SSBO Population. */

  /* Execute original 'main' function within class scope. */
  out << "\t/* Execute Compute main function */\t" << std::endl
@ -2205,8 +2295,9 @@ void MSLGeneratorInterface::generate_msl_textures_input_string(std::stringstream
 void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream &out,
                                                               ShaderStage stage)
 {
+  /* Uniform buffers. */
  int ubo_index = 0;
-  for (const MSLUniformBlock &ubo : this->uniform_blocks) {
+  for (const MSLBufferBlock &ubo : this->uniform_blocks) {
    if (bool(ubo.stage & stage)) {
      /* For literal/existing global types, we do not need the class name-space accessor. */
      out << ",\n\tconstant ";
@ -2222,6 +2313,28 @@ void MSLGeneratorInterface::generate_msl_uniforms_input_string(std::stringstream
    }
    ubo_index++;
  }
+
+  /* Storage buffers. */
+  int ssbo_index = 0;
+  for (const MSLBufferBlock &ssbo : this->storage_blocks) {
+    if (bool(ssbo.stage & stage)) {
+      /* For literal/existing global types, we do not need the class name-space accessor. */
+      bool writeable = (ssbo.qualifiers & shader::Qualifier::WRITE) == shader::Qualifier::WRITE;
+      const char *memory_scope = ((writeable) ? "device " : "constant ");
+      out << ",\n\t" << memory_scope;
+      if (!is_builtin_type(ssbo.type_name)) {
+        out << get_stage_class_name(stage) << "::";
+      }
+      /* #StorageBuffer bind indices start at `MTL_storage_buffer_base_index`.
+       * MTL_storage_buffer_base_index follows immediately after all uniform blocks.
+       * such that MTL_storage_buffer_base_index = MTL_uniform_buffer_base_index +
+       * uniform_blocks.size() + 1. Where the additional buffer is reserved for the
+       * #PushConstantBlock (push constants). */
+      out << ssbo.type_name << "* " << ssbo.name << "[[buffer(MTL_storage_buffer_base_index+"
+          << (ssbo_index) << ")]]";
+    }
+    ssbo_index++;
+  }
 }

 std::string MSLGeneratorInterface::generate_msl_vertex_inputs_string()
@ -2372,9 +2485,13 @@ std::string MSLGeneratorInterface::generate_msl_uniform_undefs(ShaderStage shade
    out << "#undef " << uniform.name << std::endl;
  }
  /* UBO block undef. */
-  for (const MSLUniformBlock &ubo : this->uniform_blocks) {
+  for (const MSLBufferBlock &ubo : this->uniform_blocks) {
    out << "#undef " << ubo.name << std::endl;
  }
+  /* SSBO block undef. */
+  for (const MSLBufferBlock &ssbo : this->storage_blocks) {
+    out << "#undef " << ssbo.name << std::endl;
+  }
  return out.str();
 }

@ -2656,7 +2773,7 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
  /* Populate Global Uniforms. */
  std::stringstream out;
  out << "\t/* Copy UBO block references into local class variables */" << std::endl;
-  for (const MSLUniformBlock &ubo : this->uniform_blocks) {
+  for (const MSLBufferBlock &ubo : this->uniform_blocks) {

    /* Only include blocks which are used within this stage. */
    if (bool(ubo.stage & stage)) {
@ -2672,6 +2789,26 @@ std::string MSLGeneratorInterface::generate_msl_uniform_block_population(ShaderS
      out << " = " << ubo.name << ";" << std::endl;
    }
  }
+
+  /* Populate storage buffer references. */
+  out << "\t/* Copy SSBO block references into local class variables */" << std::endl;
+  for (const MSLBufferBlock &ssbo : this->storage_blocks) {
+
+    /* Only include blocks which are used within this stage. */
+    if (bool(ssbo.stage & stage)) {
+      /* Generate UBO reference assignment.
+       * NOTE(Metal): We append `_local` post-fix onto the class member name
+       * for the ubo to avoid name collision with the UBO accessor macro.
+       * We only need to add this post-fix for the non-array access variant,
+       * as the array is indexed directly, rather than requiring a dereference. */
+      out << "\t" << get_shader_stage_instance_name(stage) << "." << ssbo.name;
+      if (!ssbo.is_array) {
+        out << "_local";
+      }
+      out << " = " << ssbo.name << ";" << std::endl;
+    }
+  }
+
  out << std::endl;
  return out.str();
 }
@ -3261,6 +3398,18 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
        this->uniform_blocks[uniform_block].stage);
  }

+  /* Prepare Interface Storage Blocks. */
+  for (int storage_block = 0; storage_block < this->storage_blocks.size(); storage_block++) {
+    interface->add_storage_block(
+        name_buffer_copystr(&interface->name_buffer_,
+                            this->storage_blocks[storage_block].name.c_str(),
+                            name_buffer_size,
+                            name_buffer_offset),
+        storage_block,
+        0,
+        this->storage_blocks[storage_block].stage);
+  }
+
  /* Texture/sampler bindings to interface. */
  for (const MSLTextureSampler &texture_sampler : this->texture_samplers) {
    interface->add_texture(name_buffer_copystr(&interface->name_buffer_,
--- a/source/blender/gpu/metal/mtl_shader_interface.hh
+++ b/source/blender/gpu/metal/mtl_shader_interface.hh
@ -107,7 +107,7 @@ struct MTLShaderInputAttribute {
  uint32_t matrix_element_count;
 };

-struct MTLShaderUniformBlock {
+struct MTLShaderBufferBlock {
  uint32_t name_offset;
  uint32_t size = 0;
  /* Buffer resource bind index in shader `[[buffer(index)]]`. */
@ -120,7 +120,7 @@ struct MTLShaderUniformBlock {

 struct MTLShaderUniform {
  uint32_t name_offset;
-  /* Index of `MTLShaderUniformBlock` this uniform belongs to. */
+  /* Index of `MTLShaderBufferBlock` this uniform belongs to. */
  uint32_t size_in_bytes;
  uint32_t byte_offset;
  eMTLDataType type;
@ -173,8 +173,13 @@ class MTLShaderInterface : public ShaderInterface {
  /* Uniform Blocks. */
  uint32_t total_uniform_blocks_;
  uint32_t max_uniformbuf_index_;
-  MTLShaderUniformBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
-  MTLShaderUniformBlock push_constant_block_;
+  MTLShaderBufferBlock ubos_[MTL_MAX_UNIFORM_BUFFER_BINDINGS];
+  MTLShaderBufferBlock push_constant_block_;
+
+  /* Storage blocks. */
+  uint32_t total_storage_blocks_;
+  uint32_t max_storagebuf_index_;
+  MTLShaderBufferBlock ssbos_[MTL_MAX_STORAGE_BUFFER_BINDINGS];

  /* Textures. */
  /* Textures support explicit binding indices, so some texture slots
@ -209,6 +214,10 @@ class MTLShaderInterface : public ShaderInterface {
                             uint32_t buffer_index,
                             uint32_t size,
                             ShaderStage stage_mask = ShaderStage::ANY);
+  uint32_t add_storage_block(uint32_t name_offset,
+                             uint32_t buffer_index,
+                             uint32_t size,
+                             ShaderStage stage_mask = ShaderStage::ANY);
  void add_uniform(uint32_t name_offset, eMTLDataType type, int array_len = 1);
  void add_texture(uint32_t name_offset,
                   uint32_t texture_slot,
@ -232,14 +241,21 @@ class MTLShaderInterface : public ShaderInterface {
  uint32_t get_total_uniforms() const;

  /* Fetch Uniform Blocks. */
-  const MTLShaderUniformBlock &get_uniform_block(uint index) const;
+  const MTLShaderBufferBlock &get_uniform_block(uint index) const;
  uint32_t get_total_uniform_blocks() const;
  uint32_t get_max_ubo_index() const;
  bool has_uniform_block(uint32_t block_index) const;
  uint32_t get_uniform_block_size(uint32_t block_index) const;

+  /* Fetch Storage Blocks. */
+  const MTLShaderBufferBlock &get_storage_block(uint index) const;
+  uint32_t get_total_storage_blocks() const;
+  uint32_t get_max_ssbo_index() const;
+  bool has_storage_block(uint32_t block_index) const;
+  uint32_t get_storage_block_size(uint32_t block_index) const;
+
  /* Push constant uniform data block should always be available. */
-  const MTLShaderUniformBlock &get_push_constant_block() const;
+  const MTLShaderBufferBlock &get_push_constant_block() const;

  /* Fetch textures. */
  const MTLShaderTexture &get_texture(uint index) const;
--- a/source/blender/gpu/metal/mtl_shader_interface.mm
+++ b/source/blender/gpu/metal/mtl_shader_interface.mm
@ -56,6 +56,8 @@ void MTLShaderInterface::init()
  total_attributes_ = 0;
  total_uniform_blocks_ = 0;
  max_uniformbuf_index_ = 0;
+  total_storage_blocks_ = 0;
+  max_storagebuf_index_ = 0;
  total_uniforms_ = 0;
  total_textures_ = 0;
  max_texture_index_ = -1;
@ -73,6 +75,9 @@ void MTLShaderInterface::init()
  for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
    builtin_blocks_[ubo] = -1;
  }
+  for (const int ssbo : IndexRange(GPU_NUM_STORAGE_BUFFERS)) {
+    builtin_buffers_[ssbo] = -1;
+  }
  for (const int tex : IndexRange(MTL_MAX_TEXTURE_SLOTS)) {
    textures_[tex].used = false;
    textures_[tex].slot_index = -1;
@ -117,7 +122,10 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
    size += 16 - (size % 16);
  }

-  MTLShaderUniformBlock &uni_block = ubos_[total_uniform_blocks_];
+  BLI_assert(total_uniform_blocks_ < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(buffer_index < MTL_MAX_STORAGE_BUFFER_BINDINGS);
+
+  MTLShaderBufferBlock &uni_block = ubos_[total_uniform_blocks_];
  uni_block.name_offset = name_offset;
  uni_block.buffer_index = buffer_index;
  uni_block.size = size;
@ -127,6 +135,29 @@ uint32_t MTLShaderInterface::add_uniform_block(uint32_t name_offset,
  return (total_uniform_blocks_++);
 }

+uint32_t MTLShaderInterface::add_storage_block(uint32_t name_offset,
+                                               uint32_t buffer_index,
+                                               uint32_t size,
+                                               ShaderStage stage_mask)
+{
+  /* Ensure Size is 16 byte aligned to guarantees alignment rules are satisfied. */
+  if ((size % 16) != 0) {
+    size += 16 - (size % 16);
+  }
+
+  BLI_assert(total_storage_blocks_ < MTL_MAX_STORAGE_BUFFER_BINDINGS);
+  BLI_assert(buffer_index < MTL_MAX_STORAGE_BUFFER_BINDINGS);
+
+  MTLShaderBufferBlock &ssbo_block = ssbos_[total_storage_blocks_];
+  ssbo_block.name_offset = name_offset;
+  ssbo_block.buffer_index = buffer_index;
+  ssbo_block.size = size;
+  ssbo_block.current_offset = 0;
+  ssbo_block.stage_mask = ShaderStage::ANY;
+  max_storagebuf_index_ = max_ii(max_storagebuf_index_, buffer_index);
+  return (total_storage_blocks_++);
+}
+
 void MTLShaderInterface::add_push_constant_block(uint32_t name_offset)
 {
  push_constant_block_.name_offset = name_offset;
@ -227,6 +258,9 @@ void MTLShaderInterface::map_builtins()
  for (const int ubo : IndexRange(GPU_NUM_UNIFORM_BLOCKS)) {
    builtin_blocks_[ubo] = -1;
  }
+  for (const int ssbo : IndexRange(GPU_NUM_STORAGE_BUFFERS)) {
+    builtin_buffers_[ssbo] = -1;
+  }

  /* Resolve and cache uniform locations for builtin uniforms. */
  for (const int u : IndexRange(GPU_NUM_UNIFORMS)) {
@ -257,6 +291,22 @@ void MTLShaderInterface::map_builtins()
      }
    }
  }
+
+  /* Resolve and cache uniform locations for builtin storage buffers. */
+  for (const int u : IndexRange(GPU_NUM_STORAGE_BUFFERS)) {
+    const ShaderInput *uni = this->ssbo_get(
+        builtin_storage_block_name((GPUStorageBufferBuiltin)u));
+
+    if (uni != nullptr) {
+      BLI_assert(uni->location >= 0);
+      if (uni->location >= 0) {
+        builtin_buffers_[u] = uni->binding;
+        MTL_LOG_INFO("Mapped builtin storage buffer '%s' to location %d\n",
+                     builtin_storage_block_name((GPUStorageBufferBuiltin)u),
+                     uni->location);
+      }
+    }
+  }
 }

 /* Populate #ShaderInput struct based on interface. */
@ -272,9 +322,7 @@ void MTLShaderInterface::prepare_common_shader_inputs()
  attr_len_ = this->get_total_attributes();
  ubo_len_ = this->get_total_uniform_blocks();
  uniform_len_ = this->get_total_uniforms() + this->get_total_textures();
-
-  /* TODO(Metal): Support storage buffer bindings. Pending compute shader support. */
-  ssbo_len_ = 0;
+  ssbo_len_ = this->get_total_storage_blocks();

  /* Calculate total inputs and allocate #ShaderInput array. */
  /* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */
@ -300,7 +348,7 @@ void MTLShaderInterface::prepare_common_shader_inputs()
  BLI_assert(&inputs_[attr_len_] >= current_input);
  current_input = &inputs_[attr_len_];
  for (const int ubo_index : IndexRange(total_uniform_blocks_)) {
-    MTLShaderUniformBlock &shd_ubo = ubos_[ubo_index];
+    MTLShaderBufferBlock &shd_ubo = ubos_[ubo_index];
    current_input->name_offset = shd_ubo.name_offset;
    current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ubo.name_offset));
    /* Location refers to the index in the ubos_ array. */
@ -308,7 +356,8 @@ void MTLShaderInterface::prepare_common_shader_inputs()
    /* Binding location refers to the UBO bind slot in
     * #MTLContextGlobalShaderPipelineState::ubo_bindings. The buffer bind index [[buffer(N)]]
     * within the shader will apply an offset for bound vertex buffers and the default uniform
-     * PushConstantBlock. */
+     * PushConstantBlock.
+     * see `mtl_shader_generator.hh` for buffer binding table breakdown. */
    current_input->binding = shd_ubo.buffer_index;
    current_input++;
  }
@ -357,10 +406,24 @@ void MTLShaderInterface::prepare_common_shader_inputs()
    }
  }

-  /* SSBO bindings.
-   * TODO(Metal): Support SSBOs. Pending compute support. */
+  /* SSBO bindings. */
  BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_] >= current_input);
  current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_];
+  BLI_assert(ssbo_len_ >= total_storage_blocks_);
+  for (const int ssbo_index : IndexRange(total_storage_blocks_)) {
+    MTLShaderBufferBlock &shd_ssbo = ssbos_[ssbo_index];
+    current_input->name_offset = shd_ssbo.name_offset;
+    current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_ssbo.name_offset));
+    /* Location refers to the index in the ssbos_ array. */
+    current_input->location = ssbo_index;
+    /* Binding location refers to the SSBO bind slot in
+     * #MTLContextGlobalShaderPipelineState::ssbo_bindings. The buffer bind index [[buffer(N)]]
+     * within the shader will apply an offset for bound vertex buffers and the default uniform
+     * PushConstantBlock after other uniform blocks
+     * see `mtl_shader_generator.hh` for buffer binding table breakdown. */
+    current_input->binding = shd_ssbo.buffer_index;
+    current_input++;
+  }

  /* Map builtin uniform indices to uniform binding locations. */
  this->map_builtins();
@ -417,14 +480,14 @@ uint32_t MTLShaderInterface::get_total_uniforms() const
 }

 /* Uniform Blocks. */
-const MTLShaderUniformBlock &MTLShaderInterface::get_uniform_block(uint index) const
+const MTLShaderBufferBlock &MTLShaderInterface::get_uniform_block(uint index) const
 {
  BLI_assert(index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
  BLI_assert(index < get_total_uniform_blocks());
  return ubos_[index];
 }

-const MTLShaderUniformBlock &MTLShaderInterface::get_push_constant_block() const
+const MTLShaderBufferBlock &MTLShaderInterface::get_push_constant_block() const
 {
  return push_constant_block_;
 }
@ -449,6 +512,33 @@ uint32_t MTLShaderInterface::get_uniform_block_size(uint32_t block_index) const
  return (block_index < total_uniform_blocks_) ? ubos_[block_index].size : 0;
 }

+/* Storage Blocks. */
+const MTLShaderBufferBlock &MTLShaderInterface::get_storage_block(uint index) const
+{
+  BLI_assert(index < MTL_MAX_STORAGE_BUFFER_BINDINGS);
+  BLI_assert(index < get_total_storage_blocks());
+  return ssbos_[index];
+}
+uint32_t MTLShaderInterface::get_total_storage_blocks() const
+{
+  return total_storage_blocks_;
+}
+
+uint32_t MTLShaderInterface::get_max_ssbo_index() const
+{
+  return max_storagebuf_index_;
+}
+
+bool MTLShaderInterface::has_storage_block(uint32_t block_index) const
+{
+  return (block_index < total_storage_blocks_);
+}
+
+uint32_t MTLShaderInterface::get_storage_block_size(uint32_t block_index) const
+{
+  return (block_index < total_storage_blocks_) ? ssbos_[block_index].size : 0;
+}
+
 /* Textures. */
 const MTLShaderTexture &MTLShaderInterface::get_texture(uint index) const
 {
--- a/source/blender/gpu/metal/mtl_storage_buffer.hh
+++ b/source/blender/gpu/metal/mtl_storage_buffer.hh
@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_storage_buffer_private.hh"
+
+#include "mtl_context.hh"
+
+namespace blender {
+namespace gpu {
+
+class MTLUniformBuf;
+class MTLVertBuf;
+class MTLIndexBuf;
+
+/**
+ * Implementation of Storage Buffers using Metal.
+ */
+class MTLStorageBuf : public StorageBuf {
+ private:
+  /** Allocation Handle or indirect wrapped instance.
+   * MTLStorageBuf can wrap a MTLVertBuf, MTLIndexBuf or MTLUniformBuf for binding as a writeable
+   * resource. */
+  enum {
+    MTL_STORAGE_BUF_TYPE_DEFAULT = 0,
+    MTL_STORAGE_BUF_TYPE_UNIFORMBUF = 1,
+    MTL_STORAGE_BUF_TYPE_VERTBUF = 2,
+    MTL_STORAGE_BUF_TYPE_INDEXBUF = 3,
+  } storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
+
+  union {
+    /* Own alloation. */
+    gpu::MTLBuffer *metal_buffer_;
+    /* Wrapped type. */
+    MTLUniformBuf *uniform_buffer_;
+    MTLVertBuf *vertex_buffer_;
+    MTLIndexBuf *index_buffer_;
+  };
+
+  /* Whether buffer has contents, if false, no GPU buffer will
+   * have yet been allocated. */
+  bool has_data_ = false;
+  /** Bind-state tracking. */
+  int bind_slot_ = -1;
+  MTLContext *bound_ctx_ = nullptr;
+
+  /** Usage type. */
+  GPUUsageType usage_;
+
+ public:
+  MTLStorageBuf(size_t size, GPUUsageType usage, const char *name);
+  ~MTLStorageBuf();
+
+  MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size);
+  MTLStorageBuf(MTLVertBuf *uniform_buf, size_t size);
+  MTLStorageBuf(MTLIndexBuf *uniform_buf, size_t size);
+
+  void update(const void *data) override;
+  void bind(int slot) override;
+  void unbind() override;
+  void clear(eGPUTextureFormat internal_format, eGPUDataFormat data_format, void *data) override;
+  void copy_sub(VertBuf *src, uint dst_offset, uint src_offset, uint copy_size) override;
+  void read(void *data) override;
+
+  void init();
+
+  id<MTLBuffer> get_metal_buffer();
+  int get_size();
+  const char *get_name()
+  {
+    return name_;
+  }
+
+ private:
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLStorageBuf");
+};
+
+}  // namespace gpu
+}  // namespace blender
--- a/source/blender/gpu/metal/mtl_storage_buffer.mm
+++ b/source/blender/gpu/metal/mtl_storage_buffer.mm
@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ */
+
+#include "BLI_string.h"
+
+#include "gpu_backend.hh"
+#include "gpu_context_private.hh"
+
+#include "mtl_backend.hh"
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_index_buffer.hh"
+#include "mtl_storage_buffer.hh"
+#include "mtl_uniform_buffer.hh"
+#include "mtl_vertex_buffer.hh"
+
+namespace blender::gpu {
+
+/* -------------------------------------------------------------------- */
+/** \name Creation & Deletion
+ * \{ */
+
+MTLStorageBuf::MTLStorageBuf(size_t size, GPUUsageType usage, const char *name)
+    : StorageBuf(size, name)
+{
+  usage_ = usage;
+  /* Do not create SSBO MTL buffer here to allow allocation from any thread. */
+  storage_source_ = MTL_STORAGE_BUF_TYPE_DEFAULT;
+  metal_buffer_ = nullptr;
+}
+
+MTLStorageBuf::MTLStorageBuf(MTLUniformBuf *uniform_buf, size_t size)
+    : StorageBuf(size, "UniformBuffer_as_SSBO")
+{
+  usage_ = GPU_USAGE_DYNAMIC;
+  storage_source_ = MTL_STORAGE_BUF_TYPE_UNIFORMBUF;
+  uniform_buffer_ = uniform_buf;
+  BLI_assert(uniform_buffer_ != nullptr);
+}
+
+MTLStorageBuf::MTLStorageBuf(MTLVertBuf *vert_buf, size_t size)
+    : StorageBuf(size, "VertexBuffer_as_SSBO")
+{
+  usage_ = GPU_USAGE_DYNAMIC;
+  storage_source_ = MTL_STORAGE_BUF_TYPE_VERTBUF;
+  vertex_buffer_ = vert_buf;
+  BLI_assert(vertex_buffer_ != nullptr);
+}
+
+MTLStorageBuf::MTLStorageBuf(MTLIndexBuf *index_buf, size_t size)
+    : StorageBuf(size, "IndexBuffer_as_SSBO")
+{
+  usage_ = GPU_USAGE_DYNAMIC;
+  storage_source_ = MTL_STORAGE_BUF_TYPE_INDEXBUF;
+  index_buffer_ = index_buf;
+  BLI_assert(index_buffer_ != nullptr);
+}
+
+MTLStorageBuf::~MTLStorageBuf()
+{
+  if (storage_source_ == MTL_STORAGE_BUF_TYPE_DEFAULT) {
+    if (metal_buffer_ != nullptr) {
+      metal_buffer_->free();
+      metal_buffer_ = nullptr;
+    }
+    has_data_ = false;
+  }
+
+  /* Ensure SSBO is not bound to active CTX.
+   * SSBO bindings are reset upon Context-switch so we do not need
+   * to check deactivated context's. */
+  MTLContext *ctx = MTLContext::get();
+  if (ctx) {
+    for (int i = 0; i < MTL_MAX_STORAGE_BUFFER_BINDINGS; i++) {
+      MTLStorageBufferBinding &slot = ctx->pipeline_state.ssbo_bindings[i];
+      if (slot.bound && slot.ssbo == this) {
+        slot.bound = false;
+        slot.ssbo = nullptr;
+      }
+    }
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Data upload / update
+ * \{ */
+
+void MTLStorageBuf::init()
+{
+  /* We only need to initialize the storage buffer for default buffer types. */
+  if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
+    return;
+  }
+  BLI_assert(this);
+  BLI_assert(size_in_bytes_ > 0);
+
+  /* Allocate MTL buffer */
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+  BLI_assert(ctx->device);
+  UNUSED_VARS_NDEBUG(ctx);
+
+  metal_buffer_ = MTLContext::get_global_memory_manager()->allocate(size_in_bytes_, true);
+
+#ifndef NDEBUG
+  metal_buffer_->set_label([NSString stringWithFormat:@"Storage Buffer %s", name_]);
+#endif
+  BLI_assert(metal_buffer_ != nullptr);
+  BLI_assert(metal_buffer_->get_metal_buffer() != nil);
+
+  has_data_ = false;
+}
+
+void MTLStorageBuf::update(const void *data)
+{
+  /* We only need to initialize the storage buffer for default buffer types. */
+  if (storage_source_ != MTL_STORAGE_BUF_TYPE_DEFAULT) {
+    return;
+  }
+
+  /* Ensure buffer has been allocated. */
+  if (metal_buffer_ == nullptr) {
+    init();
+  }
+
+  BLI_assert(data != nullptr);
+  if (data != nullptr) {
+    /* Upload data. */
+    BLI_assert(data != nullptr);
+    BLI_assert(!(metal_buffer_->get_resource_options() & MTLResourceStorageModePrivate));
+    BLI_assert(size_in_bytes_ <= metal_buffer_->get_size());
+    BLI_assert(size_in_bytes_ <= [metal_buffer_->get_metal_buffer() length]);
+    memcpy(metal_buffer_->get_host_ptr(), data, size_in_bytes_);
+    metal_buffer_->flush_range(0, size_in_bytes_);
+    has_data_ = true;
+  }
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Usage
+ * \{ */
+
+void MTLStorageBuf::bind(int slot)
+{
+  if (slot >= MTL_MAX_STORAGE_BUFFER_BINDINGS) {
+    fprintf(
+        stderr,
+        "Error: Trying to bind \"%s\" ssbo to slot %d which is above the reported limit of %d.\n",
+        name_,
+        slot,
+        MTL_MAX_STORAGE_BUFFER_BINDINGS);
+    BLI_assert(false);
+    return;
+  }
+
+  if (metal_buffer_ == nullptr) {
+    this->init();
+  }
+
+  if (data_ != nullptr) {
+    this->update(data_);
+    MEM_SAFE_FREE(data_);
+  }
+
+  /* Bind current UBO to active context. */
+  MTLContext *ctx = MTLContext::get();
+  BLI_assert(ctx);
+
+  MTLStorageBufferBinding &ctx_ssbo_bind_slot = ctx->pipeline_state.ssbo_bindings[slot];
+  ctx_ssbo_bind_slot.ssbo = this;
+  ctx_ssbo_bind_slot.bound = true;
+
+  bind_slot_ = slot;
+  bound_ctx_ = ctx;
+}
+
+void MTLStorageBuf::unbind()
+{
+  /* Unbind in debug mode to validate missing binds.
+   * Otherwise, only perform a full unbind upon destruction
+   * to ensure no lingering references. */
+#ifndef NDEBUG
+  if (true) {
+#else
+  if (G.debug & G_DEBUG_GPU) {
+#endif
+    if (bound_ctx_ != nullptr && bind_slot_ > -1) {
+      MTLStorageBufferBinding &ctx_ssbo_bind_slot =
+          bound_ctx_->pipeline_state.ssbo_bindings[bind_slot_];
+      if (ctx_ssbo_bind_slot.bound && ctx_ssbo_bind_slot.ssbo == this) {
+        ctx_ssbo_bind_slot.bound = false;
+        ctx_ssbo_bind_slot.ssbo = nullptr;
+      }
+    }
+  }
+
+  /* Reset bind index. */
+  bind_slot_ = -1;
+  bound_ctx_ = nullptr;
+}
+
+void MTLStorageBuf::clear(eGPUTextureFormat internal_format,
+                          eGPUDataFormat data_format,
+                          void *data)
+{
+  /* Fetch active context. */
+  MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+  BLI_assert(ctx);
+
+  if (metal_buffer_ == nullptr) {
+    this->init();
+  }
+
+  if (ctx) {
+    /* Fast clear. */
+    id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
+    [blit_encoder fillBuffer:metal_buffer_->get_metal_buffer()
+                       range:NSMakeRange(0, size_in_bytes_)
+                       value:0];
+  }
+  else {
+    /* Fallback inefficient clear if outside of render context. */
+    void *clear_data = calloc(1, size_in_bytes_);
+    this->update(clear_data);
+    free(clear_data);
+  }
+}
+
+void MTLStorageBuf::copy_sub(VertBuf *src_, uint dst_offset, uint src_offset, uint copy_size)
+{
+  /* TODO(Metal): Support Copy sub operation. */
+  MTL_LOG_WARNING("TLStorageBuf::copy_sub not yet supported.\n");
+}
+
+void MTLStorageBuf::read(void *data)
+{
+  if (data == nullptr) {
+    return;
+  }
+
+  if (metal_buffer_ == nullptr) {
+    this->init();
+  }
+
+  /* Managed buffers need to be explicitly flushed back to host. */
+  if (metal_buffer_->get_resource_options() & MTLResourceStorageModeManaged) {
+    /* Fetch active context. */
+    MTLContext *ctx = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
+    BLI_assert(ctx);
+
+    /* Ensure GPU updates are flushed back to CPU. */
+    id<MTLBlitCommandEncoder> blit_encoder = ctx->main_command_buffer.ensure_begin_blit_encoder();
+    [blit_encoder synchronizeResource:metal_buffer_->get_metal_buffer()];
+
+    /* Ensure sync has occured. */
+    GPU_finish();
+  }
+
+  /* Read data. NOTE: Unless explicitly synchronized with GPU work, results may not be ready. */
+  memcpy(data, metal_buffer_->get_host_ptr(), size_in_bytes_);
+}
+
+id<MTLBuffer> MTLStorageBuf::get_metal_buffer()
+{
+
+  gpu::MTLBuffer *source_buffer = nullptr;
+  switch (storage_source_) {
+    /* Default SSBO buffer comes from own allocation. */
+    case MTL_STORAGE_BUF_TYPE_DEFAULT: {
+      if (metal_buffer_ == nullptr) {
+        this->init();
+      }
+
+      if (data_ != nullptr) {
+        this->update(data_);
+        MEM_SAFE_FREE(data_);
+      }
+      source_buffer = metal_buffer_;
+    } break;
+    /* SSBO buffer comes from Uniform Buffer. */
+    case MTL_STORAGE_BUF_TYPE_UNIFORMBUF: {
+      source_buffer = uniform_buffer_->metal_buffer_;
+    } break;
+    /* SSBO buffer comes from Vertex Buffer. */
+    case MTL_STORAGE_BUF_TYPE_VERTBUF: {
+      source_buffer = vertex_buffer_->vbo_;
+    } break;
+    /* SSBO buffer comes from Index Buffer. */
+    case MTL_STORAGE_BUF_TYPE_INDEXBUF: {
+      source_buffer = index_buffer_->ibo_;
+    } break;
+  }
+
+  /* Return Metal allocation handle and flag as used. */
+  BLI_assert(source_buffer != nullptr);
+  source_buffer->debug_ensure_used();
+  return source_buffer->get_metal_buffer();
+}
+
+int MTLStorageBuf::get_size()
+{
+  BLI_assert(this);
+  return size_in_bytes_;
+}
+
+}  // blender::gpu
--- a/source/blender/gpu/metal/mtl_uniform_buffer.hh
+++ b/source/blender/gpu/metal/mtl_uniform_buffer.hh
@ -13,10 +13,14 @@

 namespace blender::gpu {

+class MTLStorageBuf;
+
 /**
 * Implementation of Uniform Buffers using Metal.
 **/
 class MTLUniformBuf : public UniformBuf {
+  friend class MTLStorageBuf; /* For bind as SSBO resource access. */
+
 private:
  /* Allocation Handle. */
  gpu::MTLBuffer *metal_buffer_ = nullptr;
@ -29,6 +33,9 @@ class MTLUniformBuf : public UniformBuf {
  int bind_slot_ = -1;
  MTLContext *bound_ctx_ = nullptr;

+  /* SSBO wrapper for bind_as_ssbo support. */
+  MTLStorageBuf *ssbo_wrapper_ = nullptr;
+
 public:
  MTLUniformBuf(size_t size, const char *name);
  ~MTLUniformBuf();
--- a/source/blender/gpu/metal/mtl_uniform_buffer.mm
+++ b/source/blender/gpu/metal/mtl_uniform_buffer.mm
@ -14,6 +14,7 @@
 #include "mtl_backend.hh"
 #include "mtl_context.hh"
 #include "mtl_debug.hh"
+#include "mtl_storage_buffer.hh"
 #include "mtl_uniform_buffer.hh"

 namespace blender::gpu {
@ -43,6 +44,11 @@ MTLUniformBuf::~MTLUniformBuf()
      }
    }
  }
+
+  if (ssbo_wrapper_) {
+    delete ssbo_wrapper_;
+    ssbo_wrapper_ = nullptr;
+  }
 }

 void MTLUniformBuf::update(const void *data)
@ -128,7 +134,25 @@ void MTLUniformBuf::bind_as_ssbo(int slot)
    return;
  }

-  BLI_assert_msg(0, "Not implemented yet");
+  /* We need to ensure data is actually allocated if using as an SSBO, as resource may be written
+   * to. */
+  if (metal_buffer_ == nullptr) {
+    /* Check if we have any deferred data to upload. */
+    if (data_ != nullptr) {
+      this->update(data_);
+      MEM_SAFE_FREE(data_);
+    }
+    else {
+      this->clear_to_zero();
+    }
+  }
+
+  /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
+  if (ssbo_wrapper_ == nullptr) {
+    ssbo_wrapper_ = new MTLStorageBuf(this, size_in_bytes_);
+  }
+
+  ssbo_wrapper_->bind(slot);
 }

 void MTLUniformBuf::unbind()
--- a/source/blender/gpu/metal/mtl_vertex_buffer.hh
+++ b/source/blender/gpu/metal/mtl_vertex_buffer.hh
@ -22,7 +22,8 @@ class MTLVertBuf : public VertBuf {
  friend class gpu::MTLTexture; /* For buffer texture. */
  friend class MTLShader;       /* For transform feedback. */
  friend class MTLBatch;
-  friend class MTLContext; /* For transform feedback. */
+  friend class MTLContext;    /* For transform feedback. */
+  friend class MTLStorageBuf; /* For bind as SSBO resource access. */

 private:
  /** Metal buffer allocation. **/
@ -37,6 +38,8 @@ class MTLVertBuf : public VertBuf {
  uint64_t alloc_size_ = 0;
  /** Whether existing allocation has been submitted for use by the GPU. */
  bool contents_in_flight_ = false;
+  /* SSBO wrapper for bind_as_ssbo support. */
+  MTLStorageBuf *ssbo_wrapper_ = nullptr;

  /* Fetch Metal buffer and offset into allocation if necessary.
   * Access limited to friend classes. */
--- a/source/blender/gpu/metal/mtl_vertex_buffer.mm
+++ b/source/blender/gpu/metal/mtl_vertex_buffer.mm
@ -5,6 +5,7 @@
 */
 #include "mtl_vertex_buffer.hh"
 #include "mtl_debug.hh"
+#include "mtl_storage_buffer.hh"

 namespace blender::gpu {

@ -50,6 +51,11 @@ void MTLVertBuf::release_data()
  GPU_TEXTURE_FREE_SAFE(buffer_texture_);

  MEM_SAFE_FREE(data);
+
+  if (ssbo_wrapper_) {
+    delete ssbo_wrapper_;
+    ssbo_wrapper_ = nullptr;
+  }
 }

 void MTLVertBuf::duplicate_data(VertBuf *dst_)
@ -294,10 +300,16 @@ void MTLVertBuf::update_sub(uint start, uint len, const void *data)

 void MTLVertBuf::bind_as_ssbo(uint binding)
 {
-  /* TODO(Metal): Support binding of buffers as SSBOs.
-   * Pending overall compute support for Metal backend. */
-  MTL_LOG_WARNING("MTLVertBuf::bind_as_ssbo not yet implemented!\n");
  this->flag_used();
+
+  /* Ensure resource is initialized. */
+  this->bind();
+
+  /* Create MTLStorageBuffer to wrap this resource and use conventional binding. */
+  if (ssbo_wrapper_ == nullptr) {
+    ssbo_wrapper_ = new MTLStorageBuf(this, alloc_size_);
+  }
+  ssbo_wrapper_->bind(binding);
 }

 void MTLVertBuf::bind_as_texture(uint binding)
--- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@ -97,10 +97,18 @@ struct constexp_uvec3 {
        return 0;
    }
  }
-  inline operator uint3() const
+  constexpr inline operator uint3() const
  {
    return xyz;
  }
+  constexpr inline operator uint2() const
+  {
+    return xy;
+  }
+  constexpr inline operator uint() const
+  {
+    return x;
+  }
 };

 constexpr constexp_uvec3 __internal_workgroupsize_get()
@ -136,6 +144,10 @@ template<typename T> T atomicSub(threadgroup T &mem, T data)
 {
  return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
 }
+template<typename T> T atomicAnd(threadgroup T &mem, T data)
+{
+  return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+}
 template<typename T> T atomicOr(threadgroup T &mem, T data)
 {
  return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
@ -148,33 +160,40 @@ template<typename T> T atomicXor(threadgroup T &mem, T data)
 /* Device memory. */
 template<typename T> T atomicMax(device T &mem, T data)
 {
-  return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicMin(device T &mem, T data)
 {
-  return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicAdd(device T &mem, T data)
 {
-  return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicSub(device T &mem, T data)
 {
-  return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
+}
+template<typename T> T atomicAnd(device T &mem, T data)
+{
+  return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicOr(device T &mem, T data)
 {
-  return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicXor(device T &mem, T data)
 {
-  return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }

 /* Used to replace 'out' in function parameters with threadlocal reference
 * shortened to avoid expanding the glsl source string. */
 #define THD thread
 #define OUT(type, name, array) thread type(&name)[array]
+#define THREADGROUP_OUT_ARRAY(type, name, array) threadgroup type(&name)[array]
+#define DEVICE_OUT_ARRAY(type, name, array) device type(&name)[array]
+#define DEVICE_OUT(type, name) device type &name

 /* Generate wrapper structs for combined texture and sampler type. */
 #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
@ -1122,6 +1141,27 @@ inline float4 uintBitsToFloat(uint4 f)
  return as_type<float4>(f);
 }

+#define bitfieldReverse reverse_bits
+#define bitfieldExtract extract_bits
+#define bitfieldInsert insert_bits
+#define bitCount popcount
+
+template<typename T> T findLSB(T x)
+{
+  /* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
+   * value as index, however need to filter out the case where the input value is zero to match
+   * GLSL functionality. */
+  return (x == T(0)) ? T(-1) : T(ctz(x));
+}
+
+template<typename T> T findMSB(T x)
+{
+  /* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this
+   * value as index when offset by 1. however need to filter out the case where the input value is
+   * zero to match GLSL functionality. 000000010*/
+  return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));
+}
+
 /* Texture size functions. Add texture types as needed. */
 #define imageSize(image) textureSize(image, 0)

--- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl
+++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl
@ -15,6 +15,14 @@
 #define depthCubeArray samplerCubeArray
 #define depth2DArrayShadow sampler2DArrayShadow

+/* Memory scope and pass by reference types.
+ * NOTE: These are required by Metal, but are not required in all cases by GLSL. */
+#define device
+#define threadgroup
+#define OUT(type, name, array_len) out type name[array_len]
+#define DEVICE_OUT_ARRAY(type, name, array_len) out type name[array_len]
+#define DEVICE_OUT(type, name) out type
+
 /* Backend Functions. */
 #define select(A, B, mask) mix(A, B, mask)