2023-04-20 08:03:42 +02:00 · 2023-04-03 08:07:42 +02:00 · 2023-03-28 15:42:57 +02:00 · 2023-03-28 15:40:54 +02:00 · 2023-04-17 14:55:22 +02:00 · 2023-04-19 12:18:19 +02:00
18 changed files with 191 additions and 85 deletions
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl
@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx,
 * The full pixel neighborhood is gathered.
 * \{ */

-void dof_slight_focus_gather(sampler2D depth_tx,
+void dof_slight_focus_gather(depth2D depth_tx,
                             sampler2D color_tx,
                             sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */
                             float radius,
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl
@ -62,7 +62,7 @@ void main()
  int mask_shift = 1;

 #define downsample_level(out_mip__, lod_) \
-  active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \
+  active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \
  barrier(); /* Wait for previous writes to finish. */ \
  if (active_thread) { \
    max_depth = max_v4(load_local_depths(local_px)); \
@ -89,12 +89,12 @@ void main()
  }
  finished_tile_counter = 0u;

-  ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u));
+  ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u));
  ivec2 image_border = imageSize(out_mip_5) - 1;
  for (int y = 0; y < iter.y; y++) {
    for (int x = 0; x < iter.x; x++) {
      /* Load result of the other work groups. */
-      kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y);
+      kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y);
      src_px = ivec2(kernel_origin + local_px) * 2;
      vec4 samp;
      samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl
@ -168,13 +168,15 @@ void main()
        }
        /* Fallthrough to the hemispheric case. */
      case LIGHT_RECT:
-      case LIGHT_ELLIPSE:
+      case LIGHT_ELLIPSE: {
        vec3 v000 = vP - v_right * radius - v_up * radius;
        vec3 v100 = v000 + v_right * (radius * 2.0);
        vec3 v010 = v000 + v_up * (radius * 2.0);
        vec3 v001 = v000 - v_back * radius;
        Box bbox = shape_box(v000, v100, v010, v001);
        intersect_tile = intersect_tile && intersect(tile, bbox);
+        break;
+      }
      default:
        break;
    }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
@ -74,8 +74,10 @@ void main()

  vec4 max_motion = imageLoad(in_tiles_img, src_tile);

-  MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile);
-  MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+  MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy,
+                                                                        uvec2(src_tile));
+  MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw,
+                                                                        uvec2(src_tile));
  if (true) {
    /* Rectangular area (in tiles) where the motion vector spreads. */
    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy);
@ -85,17 +87,20 @@ void main()
      for (int y = 0; y < motion_rect.extent.y; y++) {
        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
        if (is_inside_motion_line(tile, motion_line)) {
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
           * the motion next so that weighting in gather pass is better. */
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
        }
      }
    }
  }

  if (true) {
-    MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile);
+    MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw,
+                                                                      uvec2(src_tile));
    /* Rectangular area (in tiles) where the motion vector spreads. */
    MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw);
    MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw);
@ -104,10 +109,12 @@ void main()
      for (int y = 0; y < motion_rect.extent.y; y++) {
        ivec2 tile = motion_rect.bottom_left + ivec2(x, y);
        if (is_inside_motion_line(tile, motion_line)) {
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt);
          /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in
           * the motion next so that weighting in gather pass is better. */
-          motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv);
+          motion_blur_tile_indirection_store(
+              tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv);
        }
      }
    }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
@ -178,10 +178,10 @@ void main()
  vec4 max_motion;
  /* Load dilation result from the indirection table. */
  ivec2 tile_prev;
-  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev);
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev);
  max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy;
  ivec2 tile_next;
-  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next);
+  motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next);
  max_motion.zw = imageLoad(in_tiles_img, tile_next).zw;

  Accumulator accum;
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
@ -242,13 +242,13 @@ void output_aov(vec4 color, float value, uint hash)
 #if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER)
  for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) {
    if (aov_buf.hash_color[i] == hash) {
-      imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color);
+      imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color);
      return;
    }
  }
  for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) {
    if (aov_buf.hash_value[i] == hash) {
-      imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value));
+      imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value));
      return;
    }
  }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl
@ -65,7 +65,7 @@ void main()
  }

  AABB aabb_tag;
-  AABB aabb_map = AABB(vec3(-0.99999), vec3(0.99999));
+  AABB aabb_map = shape_aabb(vec3(-0.99999), vec3(0.99999));

  /* Directionnal winmat have no correct near/far in the Z dimension at this point.
   * Do not clip in this dimension. */
@ -87,7 +87,7 @@ void main()
    for (int y = box_min.y; y <= box_max.y; y++) {
      for (int x = box_min.x; x <= box_max.x; x++) {
        int tile_index = shadow_tile_offset(ivec2(x, y), tilemap.tiles_index, lod);
-        atomicOr(tiles_buf[tile_index], SHADOW_DO_UPDATE);
+        atomicOr(tiles_buf[tile_index], uint(SHADOW_DO_UPDATE));
      }
    }
  }
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl
@ -21,7 +21,7 @@ void shadow_tag_usage_tile(LightData light, ivec2 tile_co, int lod, int tilemap_

  tile_co >>= lod;
  int tile_index = shadow_tile_offset(tile_co, tilemaps_buf[tilemap_index].tiles_index, lod);
-  atomicOr(tiles_buf[tile_index], SHADOW_IS_USED);
+  atomicOr(tiles_buf[tile_index], uint(SHADOW_IS_USED));
 }

 void shadow_tag_usage_tilemap_directional(uint l_idx, vec3 P, vec3 V, float radius)
--- a/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl
+++ b/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl
@ -44,7 +44,7 @@ bool is_visible(IsectBox box)

 bool intersects_near_plane(IsectBox box)
 {
-  vec4 near_plane = drw_view_culling.planes[4];
+  vec4 near_plane = drw_view_culling.frustum_planes.planes[4];
  bool on_positive_side = false;
  bool on_negative_side = false;

--- a/source/blender/draw/intern/draw_shader_shared.h
+++ b/source/blender/draw/intern/draw_shader_shared.h
@ -21,6 +21,8 @@ typedef struct DispatchCommand DispatchCommand;
 typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer;
 typedef struct DRWDebugVert DRWDebugVert;
 typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer;
+typedef struct FrustumCorners FrustumCorners;
+typedef struct FrustumPlanes FrustumPlanes;

 /* __cplusplus is true when compiling with MSL. */
 #  if defined(__cplusplus) && !defined(GPU_SHADER)
@ -94,11 +96,27 @@ uint drw_view_id = 0;
 #  define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK)
 #endif

+struct FrustumCorners {
+  float4 corners[8];
+};
+BLI_STATIC_ASSERT_ALIGN(FrustumCorners, 16)
+
+struct FrustumPlanes {
+  /* [0] left
+   * [1] right
+   * [2] bottom
+   * [3] top
+   * [4] near
+   * [5] far */
+  float4 planes[6];
+};
+BLI_STATIC_ASSERT_ALIGN(FrustumPlanes, 16)
+
 struct ViewCullingData {
  /** \note vec3 array padded to vec4. */
  /** Frustum corners. */
-  float4 corners[8];
-  float4 planes[6];
+  FrustumCorners frustum_corners;
+  FrustumPlanes frustum_planes;
  float4 bound_sphere;
 };
 BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16)
--- a/source/blender/draw/intern/draw_view.cc
+++ b/source/blender/draw/intern/draw_view.cc
@ -50,7 +50,8 @@ void View::frustum_boundbox_calc(int view_id)
  }
 #endif

-  MutableSpan<float4> corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)};
+  MutableSpan<float4> corners = {culling_[view_id].frustum_corners.corners,
+                                 ARRAY_SIZE(culling_[view_id].frustum_corners.corners)};

  float left, right, bottom, top, near, far;
  bool is_persp = data_[view_id].winmat[3][3] == 0.0f;
@ -89,15 +90,15 @@ void View::frustum_culling_planes_calc(int view_id)
 {
  float4x4 persmat = data_[view_id].winmat * data_[view_id].viewmat;
  planes_from_projmat(persmat.ptr(),
-                      culling_[view_id].planes[0],
-                      culling_[view_id].planes[5],
-                      culling_[view_id].planes[1],
-                      culling_[view_id].planes[3],
-                      culling_[view_id].planes[4],
-                      culling_[view_id].planes[2]);
+                      culling_[view_id].frustum_planes.planes[0],
+                      culling_[view_id].frustum_planes.planes[5],
+                      culling_[view_id].frustum_planes.planes[1],
+                      culling_[view_id].frustum_planes.planes[3],
+                      culling_[view_id].frustum_planes.planes[4],
+                      culling_[view_id].frustum_planes.planes[2]);

  /* Normalize. */
-  for (float4 &plane : culling_[view_id].planes) {
+  for (float4 &plane : culling_[view_id].frustum_planes.planes) {
    plane.w /= normalize_v3(plane);
  }
 }
@ -105,7 +106,8 @@ void View::frustum_culling_planes_calc(int view_id)
 void View::frustum_culling_sphere_calc(int view_id)
 {
  BoundSphere &bsphere = *reinterpret_cast<BoundSphere *>(&culling_[view_id].bound_sphere);
-  Span<float4> corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)};
+  Span<float4> corners = {culling_[view_id].frustum_corners.corners,
+                          ARRAY_SIZE(culling_[view_id].frustum_corners.corners)};

  /* Extract Bounding Sphere */
  if (data_[view_id].winmat[3][3] != 0.0f) {
--- a/source/blender/draw/intern/shaders/common_aabb_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl
@ -9,6 +9,14 @@ struct AABB {
  vec3 min, max;
 };

+AABB shape_aabb(vec3 min, vec3 max)
+{
+  AABB aabb;
+  aabb.min = min;
+  aabb.max = max;
+  return aabb;
+}
+
 AABB aabb_init_min_max()
 {
  AABB aabb;
--- a/source/blender/draw/intern/shaders/common_intersect_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl
@ -136,7 +136,7 @@ bool intersect_view(Pyramid pyramid)
  for (int p = 0; p < 6; ++p) {
    bool is_any_vertex_on_positive_side = false;
    for (int v = 0; v < 5; ++v) {
-      float test = dot(drw_view_culling.planes[p], vec4(pyramid.corners[v], 1.0));
+      float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(pyramid.corners[v], 1.0));
      if (test > 0.0) {
        is_any_vertex_on_positive_side = true;
        break;
@ -158,7 +158,8 @@ bool intersect_view(Pyramid pyramid)
  for (int p = 0; p < 5; ++p) {
    bool is_any_vertex_on_positive_side = false;
    for (int v = 0; v < 8; ++v) {
-      float test = dot(i_pyramid.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
+      float test = dot(i_pyramid.planes[p],
+                       vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
      if (test > 0.0) {
        is_any_vertex_on_positive_side = true;
        break;
@ -181,7 +182,7 @@ bool intersect_view(Box box)
  for (int p = 0; p < 6; ++p) {
    bool is_any_vertex_on_positive_side = false;
    for (int v = 0; v < 8; ++v) {
-      float test = dot(drw_view_culling.planes[p], vec4(box.corners[v], 1.0));
+      float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(box.corners[v], 1.0));
      if (test > 0.0) {
        is_any_vertex_on_positive_side = true;
        break;
@ -203,7 +204,8 @@ bool intersect_view(Box box)
  for (int p = 0; p < 6; ++p) {
    bool is_any_vertex_on_positive_side = false;
    for (int v = 0; v < 8; ++v) {
-      float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
+      float test = dot(i_box.planes[p],
+                       vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
      if (test > 0.0) {
        is_any_vertex_on_positive_side = true;
        break;
@ -227,7 +229,7 @@ bool intersect_view(IsectBox i_box)
  for (int p = 0; p < 6; ++p) {
    bool is_any_vertex_on_positive_side = false;
    for (int v = 0; v < 8; ++v) {
-      float test = dot(drw_view_culling.planes[p], vec4(i_box.corners[v], 1.0));
+      float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(i_box.corners[v], 1.0));
      if (test > 0.0) {
        is_any_vertex_on_positive_side = true;
        break;
@ -247,7 +249,8 @@ bool intersect_view(IsectBox i_box)
  for (int p = 0; p < 6; ++p) {
    bool is_any_vertex_on_positive_side = false;
    for (int v = 0; v < 8; ++v) {
-      float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0));
+      float test = dot(i_box.planes[p],
+                       vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0));
      if (test > 0.0) {
        is_any_vertex_on_positive_side = true;
        break;
@ -268,7 +271,7 @@ bool intersect_view(Sphere sphere)
  bool intersects = true;

  for (int p = 0; p < 6 && intersects; ++p) {
-    float dist_to_plane = dot(drw_view_culling.planes[p], vec4(sphere.center, 1.0));
+    float dist_to_plane = dot(drw_view_culling.frustum_planes.planes[p], vec4(sphere.center, 1.0));
    if (dist_to_plane < -sphere.radius) {
      intersects = false;
    }
--- a/source/blender/draw/intern/shaders/common_shape_lib.glsl
+++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl
@ -18,7 +18,10 @@ struct Circle {

 Circle shape_circle(vec2 center, float radius)
 {
-  return Circle(center, radius);
+  Circle circle;
+  circle.center = center;
+  circle.radius = radius;
+  return circle;
 }

 /** \} */
@ -34,7 +37,10 @@ struct Sphere {

 Sphere shape_sphere(vec3 center, float radius)
 {
-  return Sphere(center, radius);
+  Sphere sphere;
+  sphere.center = center;
+  sphere.radius = radius;
+  return sphere;
 }

 /** \} */
@ -192,6 +198,14 @@ Frustum shape_frustum(vec3 corners[8])
 struct Cone {
  vec3 direction;
  float angle_cos;
+
+#ifdef GPU_METAL
+  inline Cone() = default;
+  inline Cone(vec3 in_direction, float in_angle_cos)
+      : direction(in_direction), angle_cos(in_angle_cos)
+  {
+  }
+#endif
 };

 Cone shape_cone(vec3 direction, float angle_cosine)
--- a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl
+++ b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl
@ -33,18 +33,19 @@ void projmat_dimensions(mat4 winmat,
  }
 }

-void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
+void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out FrustumCorners frustum_corners)
 {
  float left, right, bottom, top, near, far;
  bool is_persp = winmat[3][3] == 0.0;

  projmat_dimensions(winmat, left, right, bottom, top, near, far);

-  corners[0][2] = corners[3][2] = corners[7][2] = corners[4][2] = -near;
-  corners[0][0] = corners[3][0] = left;
-  corners[4][0] = corners[7][0] = right;
-  corners[0][1] = corners[4][1] = bottom;
-  corners[7][1] = corners[3][1] = top;
+  frustum_corners.corners[0][2] = frustum_corners.corners[3][2] = frustum_corners.corners[7][2] =
+      frustum_corners.corners[4][2] = -near;
+  frustum_corners.corners[0][0] = frustum_corners.corners[3][0] = left;
+  frustum_corners.corners[4][0] = frustum_corners.corners[7][0] = right;
+  frustum_corners.corners[0][1] = frustum_corners.corners[4][1] = bottom;
+  frustum_corners.corners[7][1] = frustum_corners.corners[3][1] = top;

  /* Get the coordinates of the far plane. */
  if (is_persp) {
@ -55,25 +56,20 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8])
    top *= sca_far;
  }

-  corners[1][2] = corners[2][2] = corners[6][2] = corners[5][2] = -far;
-  corners[1][0] = corners[2][0] = left;
-  corners[6][0] = corners[5][0] = right;
-  corners[1][1] = corners[5][1] = bottom;
-  corners[2][1] = corners[6][1] = top;
+  frustum_corners.corners[1][2] = frustum_corners.corners[2][2] = frustum_corners.corners[6][2] =
+      frustum_corners.corners[5][2] = -far;
+  frustum_corners.corners[1][0] = frustum_corners.corners[2][0] = left;
+  frustum_corners.corners[6][0] = frustum_corners.corners[5][0] = right;
+  frustum_corners.corners[1][1] = frustum_corners.corners[5][1] = bottom;
+  frustum_corners.corners[2][1] = frustum_corners.corners[6][1] = top;

  /* Transform into world space. */
  for (int i = 0; i < 8; i++) {
-    corners[i].xyz = transform_point(viewinv, corners[i].xyz);
+    frustum_corners.corners[i].xyz = transform_point(viewinv, frustum_corners.corners[i].xyz);
  }
 }

-void planes_from_projmat(mat4 mat,
-                         out vec4 left,
-                         out vec4 right,
-                         out vec4 bottom,
-                         out vec4 top,
-                         out vec4 near,
-                         out vec4 far)
+void planes_from_projmat(mat4 mat, out FrustumPlanes frustum_planes)
 {
  /* References:
   *
@ -81,35 +77,35 @@ void planes_from_projmat(mat4 mat,
   * http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf
   */
  mat = transpose(mat);
-  left = mat[3] + mat[0];
-  right = mat[3] - mat[0];
-  bottom = mat[3] + mat[1];
-  top = mat[3] - mat[1];
-  near = mat[3] + mat[2];
-  far = mat[3] - mat[2];
+  frustum_planes.planes[0] = mat[3] + mat[0];
+  frustum_planes.planes[1] = mat[3] - mat[0];
+  frustum_planes.planes[2] = mat[3] + mat[1];
+  frustum_planes.planes[3] = mat[3] - mat[1];
+  frustum_planes.planes[4] = mat[3] + mat[2];
+  frustum_planes.planes[5] = mat[3] - mat[2];
 }

-void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6])
+void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out FrustumPlanes frustum_planes)
 {
  mat4 persmat = winmat * viewmat;
-  planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]);
+  planes_from_projmat(persmat, frustum_planes);

  /* Normalize. */
  for (int p = 0; p < 6; p++) {
-    planes[p] /= length(planes[p].xyz);
+    frustum_planes.planes[p] /= length(frustum_planes.planes[p].xyz);
  }
 }

-vec4 frustum_culling_sphere_calc(vec4 corners[8])
+vec4 frustum_culling_sphere_calc(FrustumCorners frustum_corners)
 {
  /* Extract Bounding Sphere */
  /* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */

  vec4 bsphere;
-  bsphere.xyz = (corners[0].xyz + corners[6].xyz) * 0.5;
+  bsphere.xyz = (frustum_corners.corners[0].xyz + frustum_corners.corners[6].xyz) * 0.5;
  bsphere.w = 0.0;
  for (int i = 0; i < 8; i++) {
-    bsphere.w = max(bsphere.w, distance(bsphere.xyz, corners[i].xyz));
+    bsphere.w = max(bsphere.w, distance(bsphere.xyz, frustum_corners.corners[i].xyz));
  }
  return bsphere;
 }
@ -125,11 +121,15 @@ void main()
    return;
  }

-  frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, view_culling_buf[drw_view_id].corners);
+  /* Read frustom_corners from device memory, update, and write back. */
+  FrustumCorners frustum_corners = view_culling_buf[drw_view_id].frustum_corners;
+  frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, frustum_corners);
+  view_culling_buf[drw_view_id].frustum_corners = frustum_corners;

-  frustum_culling_planes_calc(
-      drw_view.winmat, drw_view.viewmat, view_culling_buf[drw_view_id].planes);
+  /* Read frustum_planes from device memory, update, and write back. */
+  FrustumPlanes frustum_planes = view_culling_buf[drw_view_id].frustum_planes;
+  frustum_culling_planes_calc(drw_view.winmat, drw_view.viewmat, frustum_planes);

-  view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(
-      view_culling_buf[drw_view_id].corners);
+  view_culling_buf[drw_view_id].frustum_planes = frustum_planes;
+  view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(frustum_corners);
 }
--- a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
+++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl
@ -34,8 +34,9 @@ void main()
                                    bounds.bounding_corners[1].xyz,
                                    bounds.bounding_corners[2].xyz,
                                    bounds.bounding_corners[3].xyz);
-    Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
-    Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius);
+    Sphere bounding_sphere = shape_sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w);
+    Sphere inscribed_sphere = shape_sphere(bounds.bounding_sphere.xyz,
+                                           bounds._inner_sphere_radius);

    for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) {
      if (drw_view_culling.bound_sphere.w == -1.0) {
--- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@ -101,10 +101,18 @@ struct constexp_uvec3 {
        return 0;
    }
  }
-  inline operator uint3() const
+  constexpr inline operator uint3() const
  {
    return xyz;
  }
+  constexpr inline operator uint2() const
+  {
+    return xy;
+  }
+  constexpr inline operator uint() const
+  {
+    return x;
+  }
 };

 constexpr constexp_uvec3 __internal_workgroupsize_get()
@ -140,6 +148,10 @@ template<typename T> T atomicSub(threadgroup T &mem, T data)
 {
  return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
 }
+template<typename T> T atomicAnd(threadgroup T &mem, T data)
+{
+  return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+}
 template<typename T> T atomicOr(threadgroup T &mem, T data)
 {
  return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
@ -152,29 +164,41 @@ template<typename T> T atomicXor(threadgroup T &mem, T data)
 /* Device memory. */
 template<typename T> T atomicMax(device T &mem, T data)
 {
-  return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicMin(device T &mem, T data)
 {
-  return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicAdd(device T &mem, T data)
 {
-  return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicSub(device T &mem, T data)
 {
-  return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
+}
+template<typename T> T atomicAnd(device T &mem, T data)
+{
+  return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicOr(device T &mem, T data)
 {
-  return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }
 template<typename T> T atomicXor(device T &mem, T data)
 {
-  return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
+  return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
 }

+/* Unblock texture atomic compilation. 
+ * TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread.
+ * We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */
+#define imageAtomicMin(tex, coord, data) \
+    uint val = _texelFetch_internal(tex, coord, 0).r;\
+    _texture_write_internal(tex, coord, uint4((val < data) ? val : data));\
+    tex.texture->fence();
+
 /* Used to replace 'out' in function parameters with threadlocal reference
 * shortened to avoid expanding the glsl source string. */
 #define THD thread
@ -1126,6 +1150,27 @@ inline float4 uintBitsToFloat(uint4 f)
  return as_type<float4>(f);
 }

+#define bitfieldReverse reverse_bits
+#define bitfieldExtract extract_bits
+#define bitfieldInsert insert_bits
+#define bitCount popcount
+
+template<typename T> T findLSB(T x)
+{
+  /* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this
+   * value as index, however need to filter out the case where the input value is zero to match
+   * GLSL functionality. */
+  return (x == T(0)) ? T(-1) : T(ctz(x));
+}
+
+template<typename T> T findMSB(T x)
+{
+  /* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this
+   * value as index when offset by 1. however need to filter out the case where the input value is
+   * zero to match GLSL functionality. 000000010*/
+  return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1));
+}
+
 /* Texture size functions. Add texture types as needed. */
 #define imageSize(image) textureSize(image, 0)

--- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl
+++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl
@ -15,6 +15,12 @@
 #define depthCubeArray samplerCubeArray
 #define depth2DArrayShadow sampler2DArrayShadow

+/* Memory scope and pass by reference types.
+ * NOTE: These are required by Metal, but are not required in all cases by GLSL. */
+#define device
+#define threadgroup
+#define OUT(type, name, array_len) out type name[array_len]
+
 /* Backend Functions. */
 #define select(A, B, mask) mix(A, B, mask)