40 changed files with 746 additions and 190 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1891,6 +1891,7 @@ elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
    C_WARN_NO_STRICT_PROTOTYPES -Wno-strict-prototypes
    C_WARN_NO_BITWISE_INSTEAD_OF_LOGICAL -Wno-bitwise-instead-of-logical
    C_WARN_NO_IMPLICIT_CONST_INT_FLOAT_CONVERSION -Wno-implicit-const-int-float-conversion
+    C_WARN_NO_SINGLE_BIT_BITFIELD_CONSTANT_CONVERSION -Wno-single-bit-bitfield-constant-conversion
  )

  add_check_cxx_compiler_flags(
--- a/intern/cycles/device/oneapi/device_impl.h
+++ b/intern/cycles/device/oneapi/device_impl.h
@ -4,6 +4,8 @@

 #ifdef WITH_ONEAPI

+/* <algorithm> is needed until included upstream in sycl/detail/property_list_base.hpp */
+#  include <algorithm>
 #  include <sycl/sycl.hpp>

 #  include "device/device.h"
--- a/intern/cycles/kernel/svm/bump.h
+++ b/intern/cycles/kernel/svm/bump.h
@ -30,6 +30,10 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,

    sd->P = P;
    sd->dP = differential_make_compact(dP);
+
+    /* Save the full differential, the compact form isn't enough for svm_node_set_bump. */
+    stack_store_float3(stack, offset + 4, dP.dx);
+    stack_store_float3(stack, offset + 7, dP.dy);
  }
 }

--- a/intern/cycles/kernel/svm/displace.h
+++ b/intern/cycles/kernel/svm/displace.h
@ -15,6 +15,9 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
                                           ccl_private float *stack,
                                           uint4 node)
 {
+  uint out_offset, bump_state_offset, dummy;
+  svm_unpack_node_uchar4(node.w, &out_offset, &bump_state_offset, &dummy, &dummy);
+
 #ifdef __RAY_DIFFERENTIALS__
  IF_KERNEL_NODES_FEATURE(BUMP)
  {
@ -25,7 +28,16 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
    float3 normal_in = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) :
                                                    sd->N;

-    differential3 dP = differential_from_compact(sd->Ng, sd->dP);
+    /* If we have saved bump state, read the full differential from there.
+     * Just using the compact form in those cases leads to incorrect normals (see #111588). */
+    differential3 dP;
+    if (bump_state_offset == SVM_STACK_INVALID) {
+      dP = differential_from_compact(sd->Ng, sd->dP);
+    }
+    else {
+      dP.dx = stack_load_float3(stack, bump_state_offset + 4);
+      dP.dy = stack_load_float3(stack, bump_state_offset + 7);
+    }

    if (use_object_space) {
      object_inverse_normal_transform(kg, sd, &normal_in);
@ -72,10 +84,10 @@ ccl_device_noinline void svm_node_set_bump(KernelGlobals kg,
      object_normal_transform(kg, sd, &normal_out);
    }

-    stack_store_float3(stack, node.w, normal_out);
+    stack_store_float3(stack, out_offset, normal_out);
  }
  else {
-    stack_store_float3(stack, node.w, zero_float3());
+    stack_store_float3(stack, out_offset, zero_float3());
  }
 #endif
 }
--- a/intern/cycles/kernel/svm/types.h
+++ b/intern/cycles/kernel/svm/types.h
@ -13,7 +13,7 @@ CCL_NAMESPACE_BEGIN
 /* SVM stack offsets with this value indicate that it's not on the stack */
 #define SVM_STACK_INVALID 255

-#define SVM_BUMP_EVAL_STATE_SIZE 4
+#define SVM_BUMP_EVAL_STATE_SIZE 10

 /* Nodes */

--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@ -6899,16 +6899,17 @@ void BumpNode::compile(SVMCompiler &compiler)
  ShaderOutput *normal_out = output("Normal");

  /* pack all parameters in the node */
-  compiler.add_node(NODE_SET_BUMP,
-                    compiler.encode_uchar4(compiler.stack_assign_if_linked(normal_in),
-                                           compiler.stack_assign(distance_in),
-                                           invert,
-                                           use_object_space),
-                    compiler.encode_uchar4(compiler.stack_assign(center_in),
-                                           compiler.stack_assign(dx_in),
-                                           compiler.stack_assign(dy_in),
-                                           compiler.stack_assign(strength_in)),
-                    compiler.stack_assign(normal_out));
+  compiler.add_node(
+      NODE_SET_BUMP,
+      compiler.encode_uchar4(compiler.stack_assign_if_linked(normal_in),
+                             compiler.stack_assign(distance_in),
+                             invert,
+                             use_object_space),
+      compiler.encode_uchar4(compiler.stack_assign(center_in),
+                             compiler.stack_assign(dx_in),
+                             compiler.stack_assign(dy_in),
+                             compiler.stack_assign(strength_in)),
+      compiler.encode_uchar4(compiler.stack_assign(normal_out), compiler.get_bump_state_offset()));
 }

 void BumpNode::compile(OSLCompiler &compiler)
--- a/intern/cycles/scene/svm.cpp
+++ b/intern/cycles/scene/svm.cpp
@ -163,6 +163,7 @@ SVMCompiler::SVMCompiler(Scene *scene) : scene(scene)
  current_graph = NULL;
  background = false;
  mix_weight_offset = SVM_STACK_INVALID;
+  bump_state_offset = SVM_STACK_INVALID;
  compile_failed = false;

  /* This struct has one entry for every node, in order of ShaderNodeType definition. */
@ -784,9 +785,8 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
  }

  /* for the bump shader we need add a node to store the shader state */
-  bool need_bump_state = (type == SHADER_TYPE_BUMP) &&
-                         (shader->get_displacement_method() == DISPLACE_BOTH);
-  int bump_state_offset = SVM_STACK_INVALID;
+  const bool need_bump_state = (type == SHADER_TYPE_BUMP) &&
+                               (shader->get_displacement_method() == DISPLACE_BOTH);
  if (need_bump_state) {
    bump_state_offset = stack_find_offset(SVM_BUMP_EVAL_STATE_SIZE);
    add_node(NODE_ENTER_BUMP_EVAL, bump_state_offset);
@ -846,6 +846,7 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty
  /* add node to restore state after bump shader has finished */
  if (need_bump_state) {
    add_node(NODE_LEAVE_BUMP_EVAL, bump_state_offset);
+    bump_state_offset = SVM_STACK_INVALID;
  }

  /* if compile failed, generate empty shader */
--- a/intern/cycles/scene/svm.h
+++ b/intern/cycles/scene/svm.h
@ -106,6 +106,10 @@ class SVMCompiler {
  {
    return mix_weight_offset;
  }
+  uint get_bump_state_offset()
+  {
+    return bump_state_offset;
+  }

  ShaderType output_type()
  {
@ -222,6 +226,7 @@ class SVMCompiler {
  Stack active_stack;
  int max_stack_use;
  uint mix_weight_offset;
+  uint bump_state_offset;
  bool compile_failed;
 };

--- a/scripts/startup/bl_operators/anim.py
+++ b/scripts/startup/bl_operators/anim.py
@ -276,10 +276,10 @@ class NLA_OT_bake(Operator):
            do_constraint_clear=self.clear_constraints,
            do_parents_clear=self.clear_parents,
            do_clean=self.clean_curves,
-            do_location = 'LOCATION' in self.channel_types,
-            do_rotation = 'ROTATION' in self.channel_types,
-            do_scale = 'SCALE' in self.channel_types,
-            do_bbone = 'BBONE' in self.channel_types,
+            do_location='LOCATION' in self.channel_types,
+            do_rotation='ROTATION' in self.channel_types,
+            do_scale='SCALE' in self.channel_types,
+            do_bbone='BBONE' in self.channel_types,
        )

        if bake_options.do_pose and self.only_selected:
--- a/source/blender/draw/intern/draw_command.cc
+++ b/source/blender/draw/intern/draw_command.cc
@ -37,6 +37,26 @@ void FramebufferBind::execute() const
  GPU_framebuffer_bind(*framebuffer);
 }

+void SubPassTransition::execute() const
+{
+  /* TODO(fclem): Require framebuffer bind to always be part of the pass so that we can track it
+   * inside RecordingState. */
+  GPUFrameBuffer *framebuffer = GPU_framebuffer_active_get();
+  /* Unpack to the real enum type. */
+  const GPUAttachmentState states[9] = {
+      GPUAttachmentState(depth_state),
+      GPUAttachmentState(color_states[0]),
+      GPUAttachmentState(color_states[1]),
+      GPUAttachmentState(color_states[2]),
+      GPUAttachmentState(color_states[3]),
+      GPUAttachmentState(color_states[4]),
+      GPUAttachmentState(color_states[5]),
+      GPUAttachmentState(color_states[6]),
+      GPUAttachmentState(color_states[7]),
+  };
+  GPU_framebuffer_subpass_transition_array(framebuffer, states, ARRAY_SIZE(states));
+}
+
 void ResourceBind::execute() const
 {
  if (slot == -1) {
@ -258,6 +278,26 @@ std::string FramebufferBind::serialize() const
         (*framebuffer == nullptr ? "nullptr" : GPU_framebuffer_get_name(*framebuffer)) + ")";
 }

+std::string SubPassTransition::serialize() const
+{
+  auto to_str = [](GPUAttachmentState state) {
+    return (state != GPU_ATTACHEMENT_IGNORE) ?
+               ((state == GPU_ATTACHEMENT_WRITE) ? "write" : "read") :
+               "ignore";
+  };
+
+  return std::string(".subpass_transition(\n") +
+         "depth=" + to_str(GPUAttachmentState(depth_state)) + ",\n" +
+         "color0=" + to_str(GPUAttachmentState(color_states[0])) + ",\n" +
+         "color1=" + to_str(GPUAttachmentState(color_states[1])) + ",\n" +
+         "color2=" + to_str(GPUAttachmentState(color_states[2])) + ",\n" +
+         "color3=" + to_str(GPUAttachmentState(color_states[3])) + ",\n" +
+         "color4=" + to_str(GPUAttachmentState(color_states[4])) + ",\n" +
+         "color5=" + to_str(GPUAttachmentState(color_states[5])) + ",\n" +
+         "color6=" + to_str(GPUAttachmentState(color_states[6])) + ",\n" +
+         "color7=" + to_str(GPUAttachmentState(color_states[7])) + "\n)";
+}
+
 std::string ResourceBind::serialize() const
 {
  switch (type) {
--- a/source/blender/draw/intern/draw_command.hh
+++ b/source/blender/draw/intern/draw_command.hh
@ -100,6 +100,7 @@ enum class Type : uint8_t {
  PushConstant,
  ResourceBind,
  ShaderBind,
+  SubPassTransition,
  StateSet,
  StencilSet,

@ -134,6 +135,16 @@ struct FramebufferBind {
  std::string serialize() const;
 };

+struct SubPassTransition {
+  /** \note uint8_t storing `GPUAttachmentState` for compactness. */
+  uint8_t depth_state;
+  /** \note 8 is GPU_FB_MAX_COLOR_ATTACHMENT. */
+  uint8_t color_states[8];
+
+  void execute() const;
+  std::string serialize() const;
+};
+
 struct ResourceBind {
  GPUSamplerState sampler;
  int slot;
@ -385,6 +396,7 @@ union Undetermined {
  ShaderBind shader_bind;
  ResourceBind resource_bind;
  FramebufferBind framebuffer_bind;
+  SubPassTransition subpass_transition;
  PushConstant push_constant;
  Draw draw;
  DrawMulti draw_multi;
--- a/source/blender/draw/intern/draw_pass.hh
+++ b/source/blender/draw/intern/draw_pass.hh
@ -214,6 +214,14 @@ class PassBase {
   */
  void framebuffer_set(GPUFrameBuffer **framebuffer);

+  /**
+   * Start a new sub-pass and change framebuffer attachments status.
+   * \note Affect the currently bound framebuffer at the time of submission and execution.
+   * \note States are copied and stored in the command.
+   */
+  void subpass_transition(GPUAttachmentState depth_attachment,
+                          Span<GPUAttachmentState> color_attachments);
+
  /**
   * Bind a material shader along with its associated resources. Any following bind() or
   * push_constant() call will use its interface.
@ -550,6 +558,9 @@ template<class T> void PassBase<T>::submit(command::RecordingState &state) const
      case command::Type::FramebufferBind:
        commands_[header.index].framebuffer_bind.execute();
        break;
+      case command::Type::SubPassTransition:
+        commands_[header.index].subpass_transition.execute();
+        break;
      case command::Type::ShaderBind:
        commands_[header.index].shader_bind.execute(state);
        break;
@ -611,6 +622,9 @@ template<class T> std::string PassBase<T>::serialize(std::string line_prefix) co
      case Type::FramebufferBind:
        ss << line_prefix << commands_[header.index].framebuffer_bind.serialize() << std::endl;
        break;
+      case Type::SubPassTransition:
+        ss << line_prefix << commands_[header.index].subpass_transition.serialize() << std::endl;
+        break;
      case Type::ShaderBind:
        ss << line_prefix << commands_[header.index].shader_bind.serialize() << std::endl;
        break;
@ -825,6 +839,25 @@ template<class T> inline void PassBase<T>::framebuffer_set(GPUFrameBuffer **fram
  create_command(Type::FramebufferBind).framebuffer_bind = {framebuffer};
 }

+template<class T>
+inline void PassBase<T>::subpass_transition(GPUAttachmentState depth_attachment,
+                                            Span<GPUAttachmentState> color_attachments)
+{
+  uint8_t color_states[8] = {GPU_ATTACHEMENT_IGNORE};
+  for (auto i : color_attachments.index_range()) {
+    color_states[i] = uint8_t(color_attachments[i]);
+  }
+  create_command(Type::SubPassTransition).subpass_transition = {uint8_t(depth_attachment),
+                                                                color_states[0],
+                                                                color_states[1],
+                                                                color_states[2],
+                                                                color_states[3],
+                                                                color_states[4],
+                                                                color_states[5],
+                                                                color_states[6],
+                                                                color_states[7]};
+}
+
 template<class T> inline void PassBase<T>::material_set(Manager &manager, GPUMaterial *material)
 {
  GPUPass *gpupass = GPU_material_get_pass(material);
--- a/source/blender/draw/tests/draw_pass_test.cc
+++ b/source/blender/draw/tests/draw_pass_test.cc
@ -29,6 +29,7 @@ static void test_draw_pass_all_commands()
  /* Won't be dereferenced. */
  GPUVertBuf *vbo = (GPUVertBuf *)1;
  GPUIndexBuf *ibo = (GPUIndexBuf *)1;
+  GPUFrameBuffer *fb = nullptr;

  float4 color(1.0f, 1.0f, 1.0f, 0.0f);
  int3 dispatch_size(1);
@ -42,6 +43,8 @@ static void test_draw_pass_all_commands()
  const int color_location = GPU_shader_get_uniform(sh, "color");
  const int mvp_location = GPU_shader_get_uniform(sh, "ModelViewProjectionMatrix");
  pass.shader_set(sh);
+  pass.framebuffer_set(&fb);
+  pass.subpass_transition(GPU_ATTACHEMENT_IGNORE, {GPU_ATTACHEMENT_WRITE, GPU_ATTACHEMENT_READ});
  pass.bind_texture("image", tex);
  pass.bind_texture("image", &tex);
  pass.bind_image("missing_image", tex);       /* Should not crash. */
@ -80,6 +83,18 @@ static void test_draw_pass_all_commands()
      << "  .stencil_set(write_mask=0b10000000, reference=0b00001111, compare_mask=0b10001111)"
      << std::endl;
  expected << "  .shader_bind(gpu_shader_3D_image_color)" << std::endl;
+  expected << "  .framebuffer_bind(nullptr)" << std::endl;
+  expected << "  .subpass_transition(" << std::endl;
+  expected << "depth=ignore," << std::endl;
+  expected << "color0=write," << std::endl;
+  expected << "color1=read," << std::endl;
+  expected << "color2=ignore," << std::endl;
+  expected << "color3=ignore," << std::endl;
+  expected << "color4=ignore," << std::endl;
+  expected << "color5=ignore," << std::endl;
+  expected << "color6=ignore," << std::endl;
+  expected << "color7=ignore" << std::endl;
+  expected << ")" << std::endl;
  expected << "  .bind_texture(0, sampler=internal)" << std::endl;
  expected << "  .bind_texture_ref(0, sampler=internal)" << std::endl;
  expected << "  .bind_image(-1)" << std::endl;
--- a/source/blender/editors/curve/editcurve_select.cc
+++ b/source/blender/editors/curve/editcurve_select.cc
@ -273,16 +273,36 @@ bool ED_curve_select_swap(EditNurb *editnurb, bool hide_handles)
  int a;
  bool changed = false;

+  /* This could be an argument to swap individual handle selection.
+   * At the moment this is always used though. */
+  bool swap_handles = false;
+
+  /* When hiding handles, ignore handle selection. */
+  if (hide_handles) {
+    swap_handles = true;
+  }
+
  LISTBASE_FOREACH (Nurb *, nu, &editnurb->nurbs) {
    if (nu->type == CU_BEZIER) {
      bezt = nu->bezt;
      a = nu->pntsu;
      while (a--) {
        if (bezt->hide == 0) {
-          bezt->f2 ^= SELECT; /* always do the center point */
-          if (!hide_handles) {
-            bezt->f1 ^= SELECT;
-            bezt->f3 ^= SELECT;
+          if (swap_handles) {
+            bezt->f2 ^= SELECT; /* always do the center point */
+            if (!hide_handles) {
+              bezt->f1 ^= SELECT;
+              bezt->f3 ^= SELECT;
+            }
+          }
+          else {
+            BLI_assert(!hide_handles);
+            if (BEZT_ISSEL_ANY(bezt)) {
+              BEZT_DESEL_ALL(bezt);
+            }
+            else {
+              BEZT_SEL_ALL(bezt);
+            }
          }
          changed = true;
        }
--- a/source/blender/editors/gpencil_legacy/gpencil_utils.cc
+++ b/source/blender/editors/gpencil_legacy/gpencil_utils.cc
@ -2691,7 +2691,12 @@ void ED_gpencil_select_curve_toggle_all(bContext *C, int action)
            break;
          case SEL_INVERT:
            gpc_pt->flag ^= GP_CURVE_POINT_SELECT;
-            BEZT_SEL_INVERT(bezt);
+            if (gpc_pt->flag & GP_CURVE_POINT_SELECT) {
+              BEZT_SEL_ALL(bezt);
+            }
+            else {
+              BEZT_DESEL_ALL(bezt);
+            }
            break;
          default:
            break;
--- a/source/blender/editors/interface/interface_handlers.cc
+++ b/source/blender/editors/interface/interface_handlers.cc
@ -3803,7 +3803,8 @@ static void ui_do_but_textedit(
        but->pos = short(selend);
        but->selsta = short(selsta);
        but->selend = short(selend);
-        data->sel_pos_init = selsta;
+        /* Anchor selection to the left side unless the last word. */
+        data->sel_pos_init = ((selend == strlen(data->str)) && (selsta != 0)) ? selend : selsta;
        retval = WM_UI_HANDLER_BREAK;
        changed = true;
      }
--- a/source/blender/gpu/GPU_common_types.h
+++ b/source/blender/gpu/GPU_common_types.h
@ -12,13 +12,63 @@
 extern "C" {
 #endif

+/**
+ * Describes the load operation of a framebuffer attachment at the start of a render pass.
+ */
 typedef enum eGPULoadOp {
+  /**
+   * Clear the framebuffer attachment using the clear value.
+   */
  GPU_LOADACTION_CLEAR = 0,
+  /**
+   * Load the value from the attached texture.
+   * Cannot be used with memoryless attachments.
+   * Slower than `GPU_LOADACTION_CLEAR` or `GPU_LOADACTION_DONT_CARE`.
+   */
  GPU_LOADACTION_LOAD,
-  GPU_LOADACTION_DONT_CARE
+  /**
+   * Do not care about the content of the attachment when the render pass starts.
+   * Useful if only the values being written are important.
+   * Faster than `GPU_LOADACTION_CLEAR`.
+   */
+  GPU_LOADACTION_DONT_CARE,
 } eGPULoadOp;

-typedef enum eGPUStoreOp { GPU_STOREACTION_STORE = 0, GPU_STOREACTION_DONT_CARE } eGPUStoreOp;
+/**
+ * Describes the store operation of a framebuffer attachment at the end of a render pass.
+ */
+typedef enum eGPUStoreOp {
+  /**
+   * Do not care about the content of the attachment when the render pass ends.
+   * Useful if only the values being written are important.
+   * Cannot be used with memoryless attachments.
+   */
+  GPU_STOREACTION_STORE = 0,
+  /**
+   * The result of the rendering for this attachment will be discarded.
+   * No writes to the texture memory will be done which makes it faster than
+   * `GPU_STOREACTION_STORE`.
+   * IMPORTANT: The actual values of the attachment is to be considered undefined.
+   * Only to be used on transient attachment that are only used within the boundaries of
+   * a render pass (ex.: Uneeded depth buffer result).
+   */
+  GPU_STOREACTION_DONT_CARE,
+} eGPUStoreOp;
+
+/**
+ * Describes the state of a framebuffer attachment during a sub-pass.
+ *
+ * NOTE: Until this is correctly implemented in all backend, reading and writing from the
+ * same attachment will not work. Although there is no case where it would currently be useful.
+ */
+typedef enum GPUAttachmentState {
+  /** Attachment will not be written during rendering. */
+  GPU_ATTACHEMENT_IGNORE = 0,
+  /** Attachment will be written during render sub-pass. This also works with blending. */
+  GPU_ATTACHEMENT_WRITE,
+  /** Attachment is used as input in the fragment shader. Incompatible with depth on Metal. */
+  GPU_ATTACHEMENT_READ,
+} GPUAttachmentState;

 typedef enum eGPUFrontFace {
  GPU_CLOCKWISE,
--- a/source/blender/gpu/GPU_framebuffer.h
+++ b/source/blender/gpu/GPU_framebuffer.h
@ -178,6 +178,35 @@ void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *framebuffer,
    GPU_framebuffer_bind_loadstore(_fb, actions, (sizeof(actions) / sizeof(GPULoadStore))); \
  }

+/**
+ * Sub-pass config array matches attachment structure of `GPU_framebuffer_config_array`.
+ * This allows to explicitly specify attachment state within the next sub-pass.
+ * This enables a number of bandwidth optimizations specially on Tile Based Deferred Renderers
+ * where the attachments can be kept into tile memory and used in place for later sub-passes.
+ *
+ * Example:
+ * \code{.c}
+ * GPU_framebuffer_bind_loadstore(&fb, {
+ *         GPU_ATTACHEMENT_WRITE,  // must be depth buffer
+ *         GPU_ATTACHEMENT_READ,   // Color attachment 0
+ *         GPU_ATTACHEMENT_IGNORE, // Color attachment 1
+ *         GPU_ATTACHEMENT_WRITE}  // Color attachment 2
+ * })
+ * \endcode
+ *
+ * \note Excess attachments will have no effect as long as they are GPU_ATTACHEMENT_IGNORE.
+ */
+void GPU_framebuffer_subpass_transition_array(GPUFrameBuffer *framebuffer,
+                                              const GPUAttachmentState *attachment_states,
+                                              uint attachment_len);
+
+#define GPU_framebuffer_subpass_transition(_fb, ...) \
+  { \
+    GPUAttachmentState actions[] = __VA_ARGS__; \
+    GPU_framebuffer_subpass_transition_array( \
+        _fb, actions, (sizeof(actions) / sizeof(GPUAttachmentState))); \
+  }
+
 /** \} */

 /* -------------------------------------------------------------------- */
--- a/source/blender/gpu/dummy/dummy_framebuffer.hh
+++ b/source/blender/gpu/dummy/dummy_framebuffer.hh
@ -35,6 +35,9 @@ class DummyFrameBuffer : public FrameBuffer {

  void attachment_set_loadstore_op(GPUAttachmentType /*type*/, GPULoadStore /*ls*/) override {}

+  void subpass_transition(const GPUAttachmentState /*depth_attachment_state*/,
+                          Span<GPUAttachmentState> /*color_attachment_states*/) override{};
+
  void read(eGPUFrameBufferBits /*planes*/,
            eGPUDataFormat /*format*/,
            const int /*area*/[4],
--- a/source/blender/gpu/intern/gpu_framebuffer.cc
+++ b/source/blender/gpu/intern/gpu_framebuffer.cc
@ -265,6 +265,14 @@ void GPU_framebuffer_bind_loadstore(GPUFrameBuffer *gpu_fb,
  fb->load_store_config_array(load_store_actions, actions_len);
 }

+void GPU_framebuffer_subpass_transition_array(GPUFrameBuffer *gpu_fb,
+                                              const GPUAttachmentState *attachment_states,
+                                              uint attachment_len)
+{
+  unwrap(gpu_fb)->subpass_transition(
+      attachment_states[0], Span<GPUAttachmentState>(attachment_states + 1, attachment_len - 1));
+}
+
 void GPU_framebuffer_bind_no_srgb(GPUFrameBuffer *gpu_fb)
 {
  const bool enable_srgb = false;
--- a/source/blender/gpu/intern/gpu_framebuffer_private.hh
+++ b/source/blender/gpu/intern/gpu_framebuffer_private.hh
@ -127,6 +127,9 @@ class FrameBuffer {
                       int dst_offset_x,
                       int dst_offset_y) = 0;

+  virtual void subpass_transition(const GPUAttachmentState depth_attachment_state,
+                                  Span<GPUAttachmentState> color_attachment_states) = 0;
+
  void load_store_config_array(const GPULoadStore *load_store_actions, uint actions_len);

  void attachment_set(GPUAttachmentType type, const GPUAttachment &new_attachment);
--- a/source/blender/gpu/metal/mtl_framebuffer.hh
+++ b/source/blender/gpu/metal/mtl_framebuffer.hh
@ -153,6 +153,9 @@ class MTLFrameBuffer : public FrameBuffer {
               int dst_offset_x,
               int dst_offset_y) override;

+  void subpass_transition(const GPUAttachmentState /*depth_attachment_state*/,
+                          Span<GPUAttachmentState> /*color_attachment_states*/) override{};
+
  void apply_state();

  /* State. */
--- a/source/blender/gpu/opengl/gl_backend.cc
+++ b/source/blender/gpu/opengl/gl_backend.cc
@ -46,8 +46,6 @@ void GLBackend::platform_init()

 #ifdef _WIN32
  os = GPU_OS_WIN;
-#elif defined(__APPLE__)
-  os = GPU_OS_MAC;
 #else
  os = GPU_OS_UNIX;
 #endif
@ -271,6 +269,8 @@ static void detect_workarounds()

    /* Turn off vendor specific extensions. */
    GLContext::native_barycentric_support = false;
+    GLContext::framebuffer_fetch_support = false;
+    GLContext::texture_barrier_support = false;

    /* Do not alter OpenGL 4.3 features.
     * These code paths should be removed. */
@ -361,17 +361,6 @@ static void detect_workarounds()
      GCaps.use_hq_normals_workaround = true;
    }
  }
-  /* There is an issue with the #glBlitFramebuffer on MacOS with radeon pro graphics.
-   * Blitting depth with#GL_DEPTH24_STENCIL8 is buggy so the workaround is to use
-   * #GPU_DEPTH32F_STENCIL8. Then Blitting depth will work but blitting stencil will
-   * still be broken. */
-  if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_MAC, GPU_DRIVER_OFFICIAL)) {
-    if (strstr(renderer, "AMD Radeon Pro") || strstr(renderer, "AMD Radeon R9") ||
-        strstr(renderer, "AMD Radeon RX"))
-    {
-      GCaps.depth_blitting_workaround = true;
-    }
-  }
  /* Limit this fix to older hardware with GL < 4.5. This means Broadwell GPUs are
   * covered since they only support GL 4.4 on windows.
   * This fixes some issues with workbench anti-aliasing on Win + Intel GPU. (see #76273) */
@ -446,11 +435,6 @@ static void detect_workarounds()
    }
  }

-  /* Disable TF on macOS. */
-  if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY)) {
-    GCaps.transform_feedback_support = false;
-  }
-
  /* Some Intel drivers have issues with using mips as frame-buffer targets if
   * GL_TEXTURE_MAX_LEVEL is higher than the target MIP.
   * Only check at the end after all other workarounds because this uses the drawing code.
@ -467,21 +451,6 @@ static void detect_workarounds()
    GLContext::debug_layer_workaround = true;
  }

-  /* Broken glGenerateMipmap on macOS 10.15.7 security update. */
-  if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY) &&
-      strstr(renderer, "HD Graphics 4000"))
-  {
-    GLContext::generate_mipmap_workaround = true;
-  }
-
-  /* Certain Intel/AMD based platforms don't clear the viewport textures. Always clearing leads to
-   * noticeable performance regressions on other platforms as well. */
-  if (GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY) ||
-      GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_ANY, GPU_DRIVER_ANY))
-  {
-    GCaps.clear_viewport_workaround = true;
-  }
-
  /* There is an issue in AMD official driver where we cannot use multi bind when using images. AMD
   * is aware of the issue, but hasn't released a fix. */
  if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_OFFICIAL)) {
@ -510,6 +479,7 @@ bool GLContext::copy_image_support = false;
 bool GLContext::debug_layer_support = false;
 bool GLContext::direct_state_access_support = false;
 bool GLContext::explicit_location_support = false;
+bool GLContext::framebuffer_fetch_support = false;
 bool GLContext::geometry_shader_invocations = false;
 bool GLContext::fixed_restart_index_support = false;
 bool GLContext::layered_rendering_support = false;
@ -519,6 +489,7 @@ bool GLContext::multi_bind_image_support = false;
 bool GLContext::multi_draw_indirect_support = false;
 bool GLContext::shader_draw_parameters_support = false;
 bool GLContext::stencil_texturing_support = false;
+bool GLContext::texture_barrier_support = false;
 bool GLContext::texture_cube_map_array_support = false;
 bool GLContext::texture_filter_anisotropic_support = false;
 bool GLContext::texture_gather_support = false;
@ -547,14 +518,7 @@ void GLBackend::capabilities_init()
  glGetIntegerv(GL_MAX_ELEMENTS_INDICES, &GCaps.max_batch_indices);
  glGetIntegerv(GL_MAX_ELEMENTS_VERTICES, &GCaps.max_batch_vertices);
  glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &GCaps.max_vertex_attribs);
-  if (GPU_type_matches(GPU_DEVICE_APPLE, GPU_OS_MAC, GPU_DRIVER_OFFICIAL)) {
-    /* Due to a bug, querying GL_MAX_VARYING_FLOATS is emitting GL_INVALID_ENUM.
-     * Force use minimum required value. */
-    GCaps.max_varying_floats = 32;
-  }
-  else {
-    glGetIntegerv(GL_MAX_VARYING_FLOATS, &GCaps.max_varying_floats);
-  }
+  glGetIntegerv(GL_MAX_VARYING_FLOATS, &GCaps.max_varying_floats);

  glGetIntegerv(GL_NUM_EXTENSIONS, &GCaps.extensions_len);
  GCaps.extension_get = gl_extension_get;
@ -609,6 +573,8 @@ void GLBackend::capabilities_init()
  GLContext::explicit_location_support = epoxy_gl_version() >= 43;
  GLContext::geometry_shader_invocations = epoxy_has_gl_extension("GL_ARB_gpu_shader5");
  GLContext::fixed_restart_index_support = epoxy_has_gl_extension("GL_ARB_ES3_compatibility");
+  GLContext::framebuffer_fetch_support = epoxy_has_gl_extension("GL_EXT_shader_framebuffer_fetch");
+  GLContext::texture_barrier_support = epoxy_has_gl_extension("GL_ARB_texture_barrier");
  GLContext::layered_rendering_support = epoxy_has_gl_extension(
      "GL_ARB_shader_viewport_layer_array");
  GLContext::native_barycentric_support = epoxy_has_gl_extension(
@ -628,6 +594,9 @@ void GLBackend::capabilities_init()
  GLContext::vertex_attrib_binding_support = epoxy_has_gl_extension(
      "GL_ARB_vertex_attrib_binding");

+  /* Disabled until it is proven to work. */
+  GLContext::framebuffer_fetch_support = false;
+
  detect_workarounds();

  /* Disable this feature entirely when not debugging. */
--- a/source/blender/gpu/opengl/gl_batch.cc
+++ b/source/blender/gpu/opengl/gl_batch.cc
@ -219,15 +219,6 @@ GLuint GLVaoCache::base_instance_vao_get(GPUBatch *batch, int i_first)
    /* Trigger update. */
    base_instance_ = 0;
  }
-  /**
-   * There seems to be a nasty bug when drawing using the same VAO reconfiguring (#71147).
-   * We just use a throwaway VAO for that. Note that this is likely to degrade performance.
-   */
-#ifdef __APPLE__
-  glDeleteVertexArrays(1, &vao_base_instance_);
-  vao_base_instance_ = 0;
-  base_instance_ = 0;
-#endif

  if (vao_base_instance_ == 0) {
    glGenVertexArrays(1, &vao_base_instance_);
@ -318,18 +309,12 @@ void GLBatch::draw(int v_first, int v_count, int i_first, int i_count)
    }
  }
  else {
-#ifdef __APPLE__
-    glDisable(GL_PRIMITIVE_RESTART);
-#endif
    if (GLContext::base_instance_support) {
      glDrawArraysInstancedBaseInstance(gl_type, v_first, v_count, i_count, i_first);
    }
    else {
      glDrawArraysInstanced(gl_type, v_first, v_count, i_count);
    }
-#ifdef __APPLE__
-    glEnable(GL_PRIMITIVE_RESTART);
-#endif
  }
 }

--- a/source/blender/gpu/opengl/gl_context.cc
+++ b/source/blender/gpu/opengl/gl_context.cc
@ -304,7 +304,6 @@ void GLContext::vao_cache_unregister(GLVaoCache *cache)

 void GLContext::memory_statistics_get(int *r_total_mem, int *r_free_mem)
 {
-  /* TODO(merwin): use Apple's platform API to get this info. */
  if (epoxy_has_gl_extension("GL_NVX_gpu_memory_info")) {
    /* Returned value in Kb. */
    glGetIntegerv(GL_GPU_MEMORY_INFO_TOTAL_AVAILABLE_MEMORY_NVX, r_total_mem);
--- a/source/blender/gpu/opengl/gl_context.hh
+++ b/source/blender/gpu/opengl/gl_context.hh
@ -56,6 +56,7 @@ class GLContext : public Context {
  static bool explicit_location_support;
  static bool geometry_shader_invocations;
  static bool fixed_restart_index_support;
+  static bool framebuffer_fetch_support;
  static bool layered_rendering_support;
  static bool native_barycentric_support;
  static bool multi_bind_support;
@ -63,6 +64,7 @@ class GLContext : public Context {
  static bool multi_draw_indirect_support;
  static bool shader_draw_parameters_support;
  static bool stencil_texturing_support;
+  static bool texture_barrier_support;
  static bool texture_cube_map_array_support;
  static bool texture_filter_anisotropic_support;
  static bool texture_gather_support;
--- a/source/blender/gpu/opengl/gl_framebuffer.cc
+++ b/source/blender/gpu/opengl/gl_framebuffer.cc
@ -226,6 +226,89 @@ void GLFrameBuffer::update_attachments()
  }
 }

+void GLFrameBuffer::subpass_transition(const GPUAttachmentState depth_attachment_state,
+                                       Span<GPUAttachmentState> color_attachment_states)
+{
+  /* NOTE: Depth is not supported as input attachment because the Metal API doesn't support it and
+   * because depth is not compatible with the framebuffer fetch implementation. */
+  BLI_assert(depth_attachment_state != GPU_ATTACHEMENT_READ);
+  GPU_depth_mask(depth_attachment_state == GPU_ATTACHEMENT_WRITE);
+
+  bool any_read = false;
+  for (auto attachment : color_attachment_states.index_range()) {
+    if (attachment == GPU_ATTACHEMENT_READ) {
+      any_read = true;
+      break;
+    }
+  }
+
+  if (GLContext::framebuffer_fetch_support) {
+    if (any_read) {
+      glFramebufferFetchBarrierEXT();
+    }
+  }
+  else if (GLContext::texture_barrier_support) {
+    if (any_read) {
+      glTextureBarrier();
+    }
+
+    GLenum attachments[GPU_FB_MAX_COLOR_ATTACHMENT] = {GL_NONE};
+    for (int i : color_attachment_states.index_range()) {
+      GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
+      GPUTexture *attach_tex = this->attachments_[type].tex;
+      if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
+        tmp_detached_[type] = this->attachments_[type]; /* Bypass feedback loop check. */
+        GPU_texture_bind_ex(attach_tex, GPUSamplerState::default_sampler(), i);
+      }
+      else {
+        tmp_detached_[type] = GPU_ATTACHMENT_NONE;
+      }
+      bool attach_write = color_attachment_states[i] == GPU_ATTACHEMENT_WRITE;
+      attachments[i] = (attach_tex && attach_write) ? to_gl(type) : GL_NONE;
+    }
+    /* We have to use `glDrawBuffers` instead of `glColorMaski` because the later is overwritten
+     * by the `GLStateManager`. */
+    /* WATCH(fclem): This modifies the frame-buffer state without setting `dirty_attachments_`. */
+    glDrawBuffers(ARRAY_SIZE(attachments), attachments);
+  }
+  else {
+    /* The only way to have correct visibility without extensions and ensure defined behavior, is
+     * to unbind the textures and update the frame-buffer. This is a slow operation but that's all
+     * we can do to emulate the sub-pass input. */
+    /* TODO(fclem): Could avoid the framebuffer reconfiguration by creating multiple framebuffers
+     * internally.  */
+    for (int i : color_attachment_states.index_range()) {
+      GPUAttachmentType type = GPU_FB_COLOR_ATTACHMENT0 + i;
+
+      if (color_attachment_states[i] == GPU_ATTACHEMENT_WRITE) {
+        if (tmp_detached_[type].tex != nullptr) {
+          /* Re-attach previous read attachments. */
+          this->attachment_set(type, tmp_detached_[type]);
+          tmp_detached_[type] = GPU_ATTACHMENT_NONE;
+        }
+      }
+      else if (color_attachment_states[i] == GPU_ATTACHEMENT_READ) {
+        tmp_detached_[type] = this->attachments_[type];
+        unwrap(tmp_detached_[type].tex)->detach_from(this);
+        GPU_texture_bind_ex(tmp_detached_[type].tex, GPUSamplerState::default_sampler(), i);
+      }
+    }
+    if (dirty_attachments_) {
+      this->update_attachments();
+    }
+  }
+}
+
+void GLFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType type, GPULoadStore ls)
+{
+  BLI_assert(context_->active_fb == this);
+
+  /* TODO(fclem): Add support for other ops. */
+  if (ls.load_action == eGPULoadOp::GPU_LOADACTION_CLEAR) {
+    clear_attachment(type, GPU_DATA_FLOAT, ls.clear_value);
+  }
+}
+
 void GLFrameBuffer::apply_state()
 {
  if (dirty_state_ == false) {
@ -364,6 +447,8 @@ void GLFrameBuffer::clear_attachment(GPUAttachmentType type,
  /* Save and restore the state. */
  eGPUWriteMask write_mask = GPU_write_mask_get();
  GPU_color_mask(true, true, true, true);
+  bool depth_mask = GPU_depth_mask_get();
+  GPU_depth_mask(true);

  context_->state_manager->apply_state();

@ -404,6 +489,7 @@ void GLFrameBuffer::clear_attachment(GPUAttachmentType type,
  }

  GPU_write_mask(write_mask);
+  GPU_depth_mask(depth_mask);
 }

 void GLFrameBuffer::clear_multi(const float (*clear_cols)[4])
--- a/source/blender/gpu/opengl/gl_framebuffer.hh
+++ b/source/blender/gpu/opengl/gl_framebuffer.hh
@ -34,6 +34,8 @@ class GLFrameBuffer : public FrameBuffer {
  GLStateManager *state_manager_ = nullptr;
  /** Copy of the GL state. Contains ONLY color attachments enums for slot binding. */
  GLenum gl_attachments_[GPU_FB_MAX_COLOR_ATTACHMENT] = {0};
+  /** List of attachment that are associated with this frame-buffer but temporarily detached. */
+  GPUAttachment tmp_detached_[GPU_FB_MAX_ATTACHMENT];
  /** Internal frame-buffers are immutable. */
  bool immutable_ = false;
  /** True is the frame-buffer has its first color target using the GPU_SRGB8_A8 format. */
@ -77,7 +79,10 @@ class GLFrameBuffer : public FrameBuffer {
                        const void *clear_value) override;

  /* Attachment load-stores are currently no-op's in OpenGL. */
-  void attachment_set_loadstore_op(GPUAttachmentType /*type*/, GPULoadStore /*ls*/) override{};
+  void attachment_set_loadstore_op(GPUAttachmentType type, GPULoadStore ls) override;
+
+  void subpass_transition(const GPUAttachmentState depth_attachment_state,
+                          Span<GPUAttachmentState> color_attachment_states) override;

  void read(eGPUFrameBufferBits planes,
            eGPUDataFormat format,
--- a/source/blender/gpu/opengl/gl_immediate.cc
+++ b/source/blender/gpu/opengl/gl_immediate.cc
@ -147,13 +147,8 @@ void GLImmediate::end()
    /* Update matrices. */
    GPU_shader_bind(shader);

-#ifdef __APPLE__
-    glDisable(GL_PRIMITIVE_RESTART);
-#endif
    glDrawArrays(to_gl(prim_type), 0, vertex_len);
-#ifdef __APPLE__
-    glEnable(GL_PRIMITIVE_RESTART);
-#endif
+
    /* These lines are causing crash on startup on some old GPU + drivers.
     * They are not required so just comment them. (#55722) */
    // glBindBuffer(GL_ARRAY_BUFFER, 0);
--- a/source/blender/gpu/opengl/gl_shader.cc
+++ b/source/blender/gpu/opengl/gl_shader.cc
@ -110,9 +110,139 @@ static const char *to_string(const Type &type)
      return "ivec4";
    case Type::BOOL:
      return "bool";
-    default:
-      return "unknown";
+    /* Alias special types. */
+    case Type::UCHAR:
+    case Type::USHORT:
+      return "uint";
+    case Type::UCHAR2:
+    case Type::USHORT2:
+      return "uvec2";
+    case Type::UCHAR3:
+    case Type::USHORT3:
+      return "uvec3";
+    case Type::UCHAR4:
+    case Type::USHORT4:
+      return "uvec4";
+    case Type::CHAR:
+    case Type::SHORT:
+      return "int";
+    case Type::CHAR2:
+    case Type::SHORT2:
+      return "ivec2";
+    case Type::CHAR3:
+    case Type::SHORT3:
+      return "ivec3";
+    case Type::CHAR4:
+    case Type::SHORT4:
+      return "ivec4";
+    case Type::VEC3_101010I2:
+      return "vec3";
  }
+  BLI_assert_unreachable();
+  return "unknown";
+}
+
+static const int to_component_count(const Type &type)
+{
+  switch (type) {
+    case Type::FLOAT:
+    case Type::UINT:
+    case Type::INT:
+    case Type::BOOL:
+      return 1;
+    case Type::VEC2:
+    case Type::UVEC2:
+    case Type::IVEC2:
+      return 2;
+    case Type::VEC3:
+    case Type::UVEC3:
+    case Type::IVEC3:
+      return 3;
+    case Type::VEC4:
+    case Type::UVEC4:
+    case Type::IVEC4:
+      return 4;
+    case Type::MAT3:
+      return 9;
+    case Type::MAT4:
+      return 16;
+    /* Alias special types. */
+    case Type::UCHAR:
+    case Type::USHORT:
+      return 1;
+    case Type::UCHAR2:
+    case Type::USHORT2:
+      return 2;
+    case Type::UCHAR3:
+    case Type::USHORT3:
+      return 3;
+    case Type::UCHAR4:
+    case Type::USHORT4:
+      return 4;
+    case Type::CHAR:
+    case Type::SHORT:
+      return 1;
+    case Type::CHAR2:
+    case Type::SHORT2:
+      return 2;
+    case Type::CHAR3:
+    case Type::SHORT3:
+      return 3;
+    case Type::CHAR4:
+    case Type::SHORT4:
+      return 4;
+    case Type::VEC3_101010I2:
+      return 3;
+  }
+  BLI_assert_unreachable();
+  return -1;
+}
+
+static const Type to_component_type(const Type &type)
+{
+  switch (type) {
+    case Type::FLOAT:
+    case Type::VEC2:
+    case Type::VEC3:
+    case Type::VEC4:
+    case Type::MAT3:
+    case Type::MAT4:
+      return Type::FLOAT;
+    case Type::UINT:
+    case Type::UVEC2:
+    case Type::UVEC3:
+    case Type::UVEC4:
+      return Type::UINT;
+    case Type::INT:
+    case Type::IVEC2:
+    case Type::IVEC3:
+    case Type::IVEC4:
+    case Type::BOOL:
+      return Type::INT;
+    /* Alias special types. */
+    case Type::UCHAR:
+    case Type::UCHAR2:
+    case Type::UCHAR3:
+    case Type::UCHAR4:
+    case Type::USHORT:
+    case Type::USHORT2:
+    case Type::USHORT3:
+    case Type::USHORT4:
+      return Type::UINT;
+    case Type::CHAR:
+    case Type::CHAR2:
+    case Type::CHAR3:
+    case Type::CHAR4:
+    case Type::SHORT:
+    case Type::SHORT2:
+    case Type::SHORT3:
+    case Type::SHORT4:
+      return Type::INT;
+    case Type::VEC3_101010I2:
+      return Type::FLOAT;
+  }
+  BLI_assert_unreachable();
+  return Type::FLOAT;
 }

 static const char *to_string(const eGPUTextureFormat &type)
@ -540,12 +670,6 @@ std::string GLShader::vertex_interface_declare(const ShaderCreateInfo &info) con
    }
    ss << "in " << to_string(attr.type) << " " << attr.name << ";\n";
  }
-  /* NOTE(D4490): Fix a bug where shader without any vertex attributes do not behave correctly. */
-  if (GPU_type_matches_ex(GPU_DEVICE_APPLE, GPU_OS_MAC, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL) &&
-      info.vertex_inputs_.is_empty())
-  {
-    ss << "in float gpu_dummy_workaround;\n";
-  }
  ss << "\n/* Interfaces. */\n";
  for (const StageInterfaceInfo *iface : info.vertex_out_interfaces_) {
    print_interface(ss, "out", *iface);
@ -581,7 +705,7 @@ std::string GLShader::vertex_interface_declare(const ShaderCreateInfo &info) con
 std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) const
 {
  std::stringstream ss;
-  std::string pre_main;
+  std::string pre_main, post_main;

  ss << "\n/* Interfaces. */\n";
  const Vector<StageInterfaceInfo *> &in_interfaces = info.geometry_source_.is_empty() ?
@ -605,7 +729,6 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c
      ss << "#define gpu_position_at_vertex(v) gpu_pos[v]\n";
    }
    else if (epoxy_has_gl_extension("GL_AMD_shader_explicit_vertex_parameter")) {
-      std::cout << "native" << std::endl;
      /* NOTE(fclem): This won't work with geometry shader. Hopefully, we don't need geometry
       * shader workaround if this extension/feature is detected. */
      ss << "\n/* Stable Barycentric Coordinates. */\n";
@ -638,12 +761,63 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c
  if (epoxy_has_gl_extension("GL_ARB_conservative_depth")) {
    ss << "layout(" << to_string(info.depth_write_) << ") out float gl_FragDepth;\n";
  }
+
  ss << "\n/* Sub-pass Inputs. */\n";
  for (const ShaderCreateInfo::SubpassIn &input : info.subpass_inputs_) {
-    /* TODO(fclem): Add GL_EXT_shader_framebuffer_fetch support and fallback using imageLoad.
-     * For now avoid compilation failure. */
-    ss << "const " << to_string(input.type) << " " << input.name << " = " << to_string(input.type)
-       << "(0);\n";
+    if (GLContext::framebuffer_fetch_support) {
+      /* Declare as inout but do not write to it. */
+      ss << "layout(location = " << std::to_string(input.index) << ") inout "
+         << to_string(input.type) << " " << input.name << ";\n";
+    }
+    else {
+      std::string image_name = "gpu_subpass_img_";
+      image_name += std::to_string(input.index);
+
+      /* Declare global for input. */
+      ss << to_string(input.type) << " " << input.name << ";\n";
+
+      /* IMPORTANT: We assume that the frame-buffer will be layered or not based on the layer
+       * built-in flag. */
+      bool is_layered_fb = bool(info.builtins_ & BuiltinBits::LAYER);
+
+      /* Start with invalid value to detect failure cases. */
+      ImageType image_type = ImageType::FLOAT_BUFFER;
+      switch (to_component_type(input.type)) {
+        case Type::FLOAT:
+          image_type = is_layered_fb ? ImageType::FLOAT_2D_ARRAY : ImageType::FLOAT_2D;
+          break;
+        case Type::INT:
+          image_type = is_layered_fb ? ImageType::INT_2D_ARRAY : ImageType::INT_2D;
+          break;
+        case Type::UINT:
+          image_type = is_layered_fb ? ImageType::UINT_2D_ARRAY : ImageType::UINT_2D;
+          break;
+        default:
+          break;
+      }
+      /* Declare image. */
+      using Resource = ShaderCreateInfo::Resource;
+      /* NOTE(fclem): Using the attachment index as resource index might be problematic as it might
+       * collide with other resources. */
+      Resource res(Resource::BindType::SAMPLER, input.index);
+      res.sampler.type = image_type;
+      res.sampler.sampler = GPUSamplerState::default_sampler();
+      res.sampler.name = image_name;
+      print_resource(ss, res, false);
+
+      char swizzle[] = "xyzw";
+      swizzle[to_component_count(input.type)] = '\0';
+
+      std::string texel_co = (is_layered_fb) ? "ivec3(gl_FragCoord.xy, gpu_Layer)" :
+                                               "ivec2(gl_FragCoord.xy)";
+
+      std::stringstream ss_pre;
+      /* Populate the global before main using imageLoad. */
+      ss_pre << "  " << input.name << " = texelFetch(" << image_name << ", " << texel_co << ", 0)."
+             << swizzle << ";\n";
+
+      pre_main += ss_pre.str();
+    }
  }
  ss << "\n/* Outputs. */\n";
  for (const ShaderCreateInfo::FragOut &output : info.fragment_outputs_) {
@ -663,8 +837,7 @@ std::string GLShader::fragment_interface_declare(const ShaderCreateInfo &info) c
  }
  ss << "\n";

-  if (pre_main.empty() == false) {
-    std::string post_main;
+  if (!pre_main.empty() || !post_main.empty()) {
    ss << main_function_wrapper(pre_main, post_main);
  }
  return ss.str();
@ -891,6 +1064,9 @@ static char *glsl_patch_default_get()
  if (GLContext::native_barycentric_support) {
    STR_CONCAT(patch, slen, "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n");
  }
+  if (GLContext::framebuffer_fetch_support) {
+    STR_CONCAT(patch, slen, "#extension GL_EXT_shader_framebuffer_fetch: enable\n");
+  }

  /* Fallbacks. */
  if (!GLContext::shader_draw_parameters_support) {
--- a/source/blender/gpu/opengl/gl_shader_interface.hh
+++ b/source/blender/gpu/opengl/gl_shader_interface.hh
@ -31,6 +31,8 @@ class GLShaderInterface : public ShaderInterface {
 private:
  /** Reference to VaoCaches using this interface */
  Vector<GLVaoCache *> refs_;
+  /** Bitmask of color attachments to bind as images for sub-pass input emulation. */
+  uint8_t subpass_inputs_ = 0u;

 public:
  GLShaderInterface(GLuint program, const shader::ShaderCreateInfo &info);
--- a/source/blender/gpu/opengl/gl_shader_log.cc
+++ b/source/blender/gpu/opengl/gl_shader_log.cc
@ -39,10 +39,7 @@ const char *GLLogParser::parse_line(const char *log_line, GPULogItem &log_item)
  }

  if ((log_item.cursor.row != -1) && (log_item.cursor.column != -1)) {
-    if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL) ||
-        GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_OFFICIAL) ||
-        GPU_type_matches(GPU_DEVICE_APPLE, GPU_OS_MAC, GPU_DRIVER_OFFICIAL))
-    {
+    if (GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_ANY, GPU_DRIVER_OFFICIAL)) {
      /* 0:line */
      log_item.cursor.row = log_item.cursor.column;
      log_item.cursor.column = -1;
--- a/source/blender/gpu/opengl/gl_texture.cc
+++ b/source/blender/gpu/opengl/gl_texture.cc
@ -735,7 +735,6 @@ bool GLTexture::proxy_check(int mip)
  }

  if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_WIN, GPU_DRIVER_ANY) ||
-      GPU_type_matches(GPU_DEVICE_NVIDIA, GPU_OS_MAC, GPU_DRIVER_OFFICIAL) ||
      GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OFFICIAL))
  {
    /* Some AMD drivers have a faulty `GL_PROXY_TEXTURE_..` check.
@ -747,13 +746,6 @@ bool GLTexture::proxy_check(int mip)
    return true;
  }

-  if ((type_ == GPU_TEXTURE_CUBE_ARRAY) &&
-      GPU_type_matches(GPU_DEVICE_ANY, GPU_OS_MAC, GPU_DRIVER_ANY))
-  {
-    /* Special fix for #79703. */
-    return true;
-  }
-
  GLenum gl_proxy = to_gl_proxy(type_);
  GLenum internal_format = to_gl_internal_format(format_);
  GLenum gl_format = to_gl_data_format(format_);
@ -817,7 +809,10 @@ void GLTexture::check_feedback_loop()
    if (fb_[i] == fb) {
      GPUAttachmentType type = fb_attachment_[i];
      GPUAttachment attachment = fb->attachments_[type];
-      if (attachment.mip <= mip_max_ && attachment.mip >= mip_min_) {
+      /* Check for when texture is used with texture barrier. */
+      GPUAttachment attachment_read = fb->tmp_detached_[type];
+      if (attachment.mip <= mip_max_ && attachment.mip >= mip_min_ &&
+          attachment_read.tex == nullptr) {
        char msg[256];
        SNPRINTF(msg,
                 "Feedback loop: Trying to bind a texture (%s) with mip range %d-%d but mip %d is "
--- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
+++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl
@ -985,52 +985,52 @@ inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S

 /* Atomic Min. */
 template<typename S, access A>
-vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
-                                             int coord,
-                                             vec<S, 4> data)
+S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+                                     int coord,
+                                     S data)
 {
-  return tex.texture->atomic_fetch_min(uint(coord), data);
+  return tex.texture->atomic_fetch_min(uint(coord), vec<S, 4>(data)).x;
 }

 template<typename S, access A>
-vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
-                                             int2 coord,
-                                             vec<S, 4> data)
+S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
+                                     int2 coord,
+                                     S data)
 {
-  return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), data);
+  return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
 }

 template<typename S, access A>
-vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
-                                             int2 coord,
-                                             vec<S, 4> data)
+S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
+                                     int2 coord,
+                                     S data)
 {
-  return tex.texture->atomic_fetch_min(uint2(coord.xy), data);
+  return tex.texture->atomic_fetch_min(uint2(coord.xy), vec<S, 4>(data)).x;
 }

 template<typename S, access A>
-vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
-                                             int3 coord,
-                                             vec<S, 4> data)
+S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
+                                     int3 coord,
+                                     S data)
 {
-  return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), data);
+  return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
 }

 template<typename S, access A>
-vec<S, 4> _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
-                                             int3 coord,
-                                             vec<S, 4> data)
+S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
+                                     int3 coord,
+                                     S data)
 {
-  return tex.texture->atomic_fetch_min(uint3(coord), data);
+  return tex.texture->atomic_fetch_min(uint3(coord), vec<S, 4>(data)).x;
 }

 /* Atomic Exchange. */
-template<typename S, access A, int N>
-vec<S, N> _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
-                                                  int coord,
-                                                  vec<S, N> data)
+template<typename S, access A>
+S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
+                                          int coord,
+                                          S data)
 {
-  return tex.texture->atomic_exchange(uint(coord), data);
+  return tex.texture->atomic_exchange(uint(coord), vec<S, 4>(data)).x;
 }

 template<typename S, access A>
@ -1038,31 +1038,31 @@ S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_
                                          int2 coord,
                                          S data)
 {
-  return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), data);
+  return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
 }

-template<typename S, access A, int N>
+template<typename S, access A>
 S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
                                          int2 coord,
                                          S data)
 {
-  return tex.texture->atomic_exchange(uint2(coord.xy), data);
+  return tex.texture->atomic_exchange(uint2(coord.xy), vec<S, 4>(data)).x;
 }

-template<typename S, access A, int N>
+template<typename S, access A>
 S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
                                          int3 coord,
                                          S data)
 {
-  return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), data);
+  return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
 }

-template<typename S, access A, int N>
+template<typename S, access A>
 S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
                                          int3 coord,
                                          S data)
 {
-  return tex.texture->atomic_exchange(uint3(coord), data);
+  return tex.texture->atomic_exchange(uint3(coord), vec<S, 4>(data)).x;
 }

 #else
--- a/source/blender/gpu/tests/framebuffer_test.cc
+++ b/source/blender/gpu/tests/framebuffer_test.cc
@ -336,12 +336,15 @@ static void test_framebuffer_subpass_input()

  const int2 size(1, 1);
  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_ATTACHMENT | GPU_TEXTURE_USAGE_HOST_READ;
-  GPUTexture *texture = GPU_texture_create_2d(
+  GPUTexture *texture_a = GPU_texture_create_2d(
+      __func__, UNPACK2(size), 1, GPU_R32I, usage, nullptr);
+  GPUTexture *texture_b = GPU_texture_create_2d(
      __func__, UNPACK2(size), 1, GPU_R32I, usage, nullptr);

  GPUFrameBuffer *framebuffer = GPU_framebuffer_create(__func__);
-  GPU_framebuffer_ensure_config(&framebuffer,
-                                {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture)});
+  GPU_framebuffer_ensure_config(
+      &framebuffer,
+      {GPU_ATTACHMENT_NONE, GPU_ATTACHMENT_TEXTURE(texture_a), GPU_ATTACHMENT_TEXTURE(texture_b)});
  GPU_framebuffer_bind(framebuffer);

  const float4 clear_color(0.0f);
@ -361,7 +364,7 @@ static void test_framebuffer_subpass_input()
  create_info_read.vertex_source("gpu_framebuffer_subpass_input_test.glsl");
  create_info_read.fragment_source("gpu_framebuffer_subpass_input_test.glsl");
  create_info_read.subpass_in(0, Type::INT, "in_value", 0);
-  create_info_read.fragment_out(0, Type::INT, "out_value");
+  create_info_read.fragment_out(1, Type::INT, "out_value");

  GPUShader *shader_read = GPU_shader_create_from_info(
      reinterpret_cast<GPUShaderCreateInfo *>(&create_info_read));
@ -373,10 +376,16 @@ static void test_framebuffer_subpass_input()
  GPU_vertbuf_data_alloc(verts, 3);
  GPUBatch *batch = GPU_batch_create_ex(GPU_PRIM_TRIS, verts, nullptr, GPU_BATCH_OWNS_VBO);

+  /* Metal Raster Order Group does not need that. */
+  GPU_framebuffer_subpass_transition(
+      framebuffer, {GPU_ATTACHEMENT_IGNORE, GPU_ATTACHEMENT_WRITE, GPU_ATTACHEMENT_IGNORE});
+
  GPU_batch_set_shader(batch, shader_write);
  GPU_batch_draw(batch);

-  /* TODO(fclem): Vulkan might want to introduce an explicit sync event here. */
+  /* Metal Raster Order Group does not need that. */
+  GPU_framebuffer_subpass_transition(
+      framebuffer, {GPU_ATTACHEMENT_IGNORE, GPU_ATTACHEMENT_READ, GPU_ATTACHEMENT_WRITE});

  GPU_batch_set_shader(batch, shader_read);
  GPU_batch_draw(batch);
@ -385,12 +394,17 @@ static void test_framebuffer_subpass_input()

  GPU_finish();

-  int *read_data = static_cast<int *>(GPU_texture_read(texture, GPU_DATA_INT, 0));
-  EXPECT_EQ(*read_data, 0xDEADC0DE);
-  MEM_freeN(read_data);
+  int *read_data_a = static_cast<int *>(GPU_texture_read(texture_a, GPU_DATA_INT, 0));
+  EXPECT_EQ(*read_data_a, 0xDEADBEEF);
+  MEM_freeN(read_data_a);
+
+  int *read_data_b = static_cast<int *>(GPU_texture_read(texture_b, GPU_DATA_INT, 0));
+  EXPECT_EQ(*read_data_b, 0xDEADC0DE);
+  MEM_freeN(read_data_b);

  GPU_framebuffer_free(framebuffer);
-  GPU_texture_free(texture);
+  GPU_texture_free(texture_a);
+  GPU_texture_free(texture_b);
  GPU_shader_free(shader_write);
  GPU_shader_free(shader_read);

--- a/source/blender/gpu/vulkan/vk_framebuffer.cc
+++ b/source/blender/gpu/vulkan/vk_framebuffer.cc
@ -213,6 +213,18 @@ void VKFrameBuffer::attachment_set_loadstore_op(GPUAttachmentType /*type*/, GPUL

 /** \} */

+/* -------------------------------------------------------------------- */
+/** \name Sub-pass transition
+ * \{ */
+
+void VKFrameBuffer::subpass_transition(const GPUAttachmentState /*depth_attachment_state*/,
+                                       Span<GPUAttachmentState> /*color_attachment_states*/)
+{
+  NOT_YET_IMPLEMENTED;
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Read back
 * \{ */
--- a/source/blender/gpu/vulkan/vk_framebuffer.hh
+++ b/source/blender/gpu/vulkan/vk_framebuffer.hh
@ -55,6 +55,9 @@ class VKFrameBuffer : public FrameBuffer {

  void attachment_set_loadstore_op(GPUAttachmentType type, GPULoadStore /*ls*/) override;

+  void subpass_transition(const GPUAttachmentState depth_attachment_state,
+                          Span<GPUAttachmentState> color_attachment_states) override;
+
  void read(eGPUFrameBufferBits planes,
            eGPUDataFormat format,
            const int area[4],
--- a/source/blender/makesrna/intern/makesrna.cc
+++ b/source/blender/makesrna/intern/makesrna.cc
@ -3010,6 +3010,25 @@ static void rna_def_property_wrapper_funcs(FILE *f, StructDefRNA *dsrna, Propert
  }
 }

+/**
+ * Counts the number of template arguments by looking at `<` and `,` characters in the name. More
+ * complex template arguments that contains `,` themselves are not handled yet.
+ */
+static int count_template_args(const char *function_name)
+{
+  BLI_assert(function_name != nullptr);
+  if (!strstr(function_name, "<")) {
+    return 0;
+  }
+  int count = 1;
+  for (const char *c = function_name; *c; c++) {
+    if (*c == ',') {
+      count++;
+    }
+  }
+  return count;
+}
+
 static void rna_def_function_wrapper_funcs(FILE *f, StructDefRNA *dsrna, FunctionDefRNA *dfunc)
 {
  StructRNA *srna = dsrna->srna;
@ -3026,7 +3045,10 @@ static void rna_def_function_wrapper_funcs(FILE *f, StructDefRNA *dsrna, Functio
  rna_construct_wrapper_function_name(
      funcname, sizeof(funcname), srna->identifier, func->identifier, "func");

-  fprintf(f, "RNA_EXTERN_C ");
+  /* A function with templates cannot have C linkage. */
+  if (!(dfunc->call && count_template_args(dfunc->call) > 0)) {
+    fprintf(f, "RNA_EXTERN_C ");
+  }
  rna_generate_static_parameter_prototypes(f, srna, dfunc, funcname, 0);

  fprintf(f, "\n{\n");
@ -3785,6 +3807,19 @@ static void rna_generate_static_parameter_prototypes(FILE *f,
  dsrna = rna_find_struct_def(srna);
  func = dfunc->func;

+  const int template_args_num = dfunc->call ? count_template_args(dfunc->call) : 0;
+  if (!name_override && template_args_num > 0) {
+    /* The template names are called A, B, C, etc. */
+    BLI_assert(template_args_num <= 26);
+    fprintf(f, "template<typename A");
+    char template_name = 'B';
+    for (int i = 0; i < template_args_num - 1; i++) {
+      fprintf(f, ", typename %c", template_name);
+      template_name++;
+    }
+    fprintf(f, "> ");
+  }
+
  /* return type */
  LISTBASE_FOREACH (PropertyDefRNA *, dparm, &dfunc->cont.properties) {
    if (dparm->prop == func->c_ret) {
@ -3810,7 +3845,15 @@ static void rna_generate_static_parameter_prototypes(FILE *f,

  /* function name */
  if (name_override == nullptr || name_override[0] == '\0') {
-    fprintf(f, "%s(", dfunc->call);
+    /* Here we only need the function name without the template parameters. */
+    const char *template_begin = strstr(dfunc->call, "<");
+    if (template_begin) {
+      const int num_chars = template_begin - dfunc->call;
+      fprintf(f, "%.*s(", num_chars, dfunc->call);
+    }
+    else {
+      fprintf(f, "%s(", dfunc->call);
+    }
  }
  else {
    fprintf(f, "%s(", name_override);
--- a/source/blender/python/intern/bpy_rna.cc
+++ b/source/blender/python/intern/bpy_rna.cc
@ -5248,13 +5248,15 @@ static bool foreach_attr_type(BPy_PropertyRNA *self,
                              /* Values to assign. */
                              RawPropertyType *r_raw_type,
                              int *r_attr_tot,
-                              bool *r_attr_signed)
+                              bool *r_attr_signed,
+                              bool *r_is_empty)
 {
  PropertyRNA *prop;
  bool attr_ok = true;
  *r_raw_type = PROP_RAW_UNSET;
  *r_attr_tot = 0;
  *r_attr_signed = false;
+  *r_is_empty = true;

  /* NOTE: this is fail with zero length lists, so don't let this get called in that case. */
  RNA_PROP_BEGIN (&self->ptr, itemptr, self->prop) {
@ -5267,6 +5269,7 @@ static bool foreach_attr_type(BPy_PropertyRNA *self,
    else {
      attr_ok = false;
    }
+    *r_is_empty = false;
    break;
  }
  RNA_PROP_END;
@ -5277,6 +5280,7 @@ static bool foreach_attr_type(BPy_PropertyRNA *self,
 /* pyrna_prop_collection_foreach_get/set both use this. */
 static int foreach_parse_args(BPy_PropertyRNA *self,
                              PyObject *args,
+                              const char *function_name,

                              /* Values to assign. */
                              const char **r_attr,
@ -5287,9 +5291,6 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
                              int *r_attr_tot,
                              bool *r_attr_signed)
 {
-  int array_tot;
-  int target_tot;
-
  *r_size = *r_attr_tot = 0;
  *r_attr_signed = false;
  *r_raw_type = PROP_RAW_UNSET;
@ -5299,10 +5300,10 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
  }

  if (!PySequence_Check(*r_seq) && PyObject_CheckBuffer(*r_seq)) {
-    PyErr_Format(
-        PyExc_TypeError,
-        "foreach_get/set expected second argument to be a sequence or buffer, not a %.200s",
-        Py_TYPE(*r_seq)->tp_name);
+    PyErr_Format(PyExc_TypeError,
+                 "%s(..) expected second argument to be a sequence or buffer, not a %.200s",
+                 function_name,
+                 Py_TYPE(*r_seq)->tp_name);
    return -1;
  }

@ -5310,6 +5311,10 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
  *r_tot = PySequence_Size(*r_seq);

  if (*r_tot > 0) {
+#if 0
+    /* Avoid a full collection count when all that's needed is to check it's empty. */
+    int array_tot;
+
    if (RNA_property_type(self->prop) == PROP_COLLECTION) {
      array_tot = RNA_property_collection_length(&self->ptr, self->prop);
    }
@ -5318,42 +5323,63 @@ static int foreach_parse_args(BPy_PropertyRNA *self,
    }
    if (array_tot == 0) {
      PyErr_Format(PyExc_TypeError,
-                   "foreach_get(attr, sequence) sequence length mismatch given %d, needed 0",
+                   "%s(..) sequence length mismatch given %d, needed 0",
+                   function_name,
                   *r_tot);
      return -1;
    }
+#endif

-    if (!foreach_attr_type(self, *r_attr, r_raw_type, r_attr_tot, r_attr_signed)) {
+    bool is_empty = false; /* `array_tot == 0`. */
+    if (!foreach_attr_type(self, *r_attr, r_raw_type, r_attr_tot, r_attr_signed, &is_empty)) {
      PyErr_Format(PyExc_AttributeError,
-                   "foreach_get/set '%.200s.%200s[...]' elements have no attribute '%.200s'",
+                   "%s(..) '%.200s.%200s[...]' elements have no attribute '%.200s'",
+                   function_name,
                   RNA_struct_identifier(self->ptr.type),
                   RNA_property_identifier(self->prop),
                   *r_attr);
      return -1;
    }
+
+    if (is_empty) {
+      PyErr_Format(PyExc_TypeError,
+                   "%s(..) sequence length mismatch given %d, needed 0",
+                   function_name,
+                   *r_tot);
+      return -1;
+    }
+
    *r_size = RNA_raw_type_sizeof(*r_raw_type);

+#if 0
+    /* This size check does not work as the size check is based on the size of the
+     * first element and elements in the collection/array can have different sizes
+     * (i.e. for mixed quad/triangle meshes). See for example issue #111117. */
+
    if ((*r_attr_tot) < 1) {
      *r_attr_tot = 1;
    }

-    target_tot = array_tot * (*r_attr_tot);
+    const int target_tot = array_tot * (*r_attr_tot);

    /* rna_access.cc - rna_raw_access(...) uses this same method. */
    if (target_tot != (*r_tot)) {
      PyErr_Format(PyExc_TypeError,
-                   "foreach_get(attr, sequence) sequence length mismatch given %d, needed %d",
+                   "%s(..) sequence length mismatch given %d, needed %d",
+                   function_name,
                   *r_tot,
                   target_tot);
      return -1;
    }
+#endif
  }

  /* Check 'r_attr_tot' otherwise we don't know if any values were set.
   * This isn't ideal because it means running on an empty list may
   * fail silently when it's not compatible. */
  if (*r_size == 0 && *r_attr_tot != 0) {
-    PyErr_SetString(PyExc_AttributeError, "attribute does not support foreach method");
+    PyErr_Format(
+        PyExc_AttributeError, "%s(..): attribute does not support foreach method", function_name);
    return -1;
  }
  return 0;
@ -5412,8 +5438,16 @@ static PyObject *foreach_getset(BPy_PropertyRNA *self, PyObject *args, int set)
  bool attr_signed;
  RawPropertyType raw_type;

-  if (foreach_parse_args(
-          self, args, &attr, &seq, &tot, &size, &raw_type, &attr_tot, &attr_signed) == -1)
+  if (foreach_parse_args(self,
+                         args,
+                         set ? "foreach_set" : "foreach_get",
+                         &attr,
+                         &seq,
+                         &tot,
+                         &size,
+                         &raw_type,
+                         &attr_tot,
+                         &attr_signed) == -1)
  {
    return nullptr;
  }
--- a/source/blender/windowmanager/intern/wm_files.cc
+++ b/source/blender/windowmanager/intern/wm_files.cc
@ -692,8 +692,6 @@ static void wm_file_read_post(bContext *C,
    CTX_wm_window_set(C, static_cast<wmWindow *>(wm->windows.first));
  }

-  WM_cursor_wait(true);
-
 #ifdef WITH_PYTHON
  if (is_startup_file) {
    /* On startup (by default), Python won't have been initialized.
@ -820,8 +818,6 @@ static void wm_file_read_post(bContext *C,
      WM_toolsystem_init(C);
    }
  }
-
-  WM_cursor_wait(false);
 }

 static void wm_read_callback_pre_wrapper(bContext *C, const char *filepath)
@ -1071,7 +1067,6 @@ bool WM_file_read(bContext *C, const char *filepath, ReportList *reports)
      bf_reports.duration.whole = PIL_check_seconds_timer() - bf_reports.duration.whole;
      file_read_reports_finalize(&bf_reports);

-      WM_cursor_wait(true);
      success = true;
    }
  }
@ -1176,8 +1171,6 @@ void wm_homefile_read_ex(bContext *C,
  char filepath_startup[FILE_MAX];
  char filepath_userdef[FILE_MAX];

-  WM_cursor_wait(true);
-
  /* When 'app_template' is set:
   * '{BLENDER_USER_CONFIG}/{app_template}' */
  char app_template_system[FILE_MAX];
@ -1495,8 +1488,6 @@ void wm_homefile_read_ex(bContext *C,
      CTX_wm_window_set(C, nullptr);
    }
  }
-
-  WM_cursor_wait(false);
 }

 void wm_homefile_read(bContext *C,