Metal: MTLContext implementation and immediate mode rendering support.

MTLContext provides functionality for command encoding, binding management and graphics device management. MTLImmediate provides simple draw enablement with dynamically encoded data. These draws utilise temporary scratch buffer memory to provide minimal bandwidth overhead during workload submission. This patch also contains empty placeholders for MTLBatch and MTLDrawList to enable testing of first pixels on-screen without failure. The Metal API also requires access to the GHOST_Context to ensure the same pre-initialized Metal GPU device is used by the viewport. Given the explicit nature of Metal, explicit control is also needed over presentation, to ensure correct work scheduling and rendering pipeline state. Authored by Apple: Michael Parkin-White Ref T96261 (The diff is based on 043f59cb3b) Reviewed By: fclem Differential Revision: https://developer.blender.org/D15953
2022-09-22 17:27:51 +02:00
parent bb63b98d1f
commit 697b447c20
45 changed files with 3259 additions and 605 deletions
--- a/intern/ghost/intern/GHOST_Context.h
+++ b/intern/ghost/intern/GHOST_Context.h
@@ -36,19 +36,19 @@ class GHOST_Context : public GHOST_IContext {
   * Swaps front and back buffers of a window.
   * \return A boolean success indicator.
   */
-  virtual GHOST_TSuccess swapBuffers() = 0;
+  virtual GHOST_TSuccess swapBuffers() override = 0;

  /**
   * Activates the drawing context of this window.
   * \return A boolean success indicator.
   */
-  virtual GHOST_TSuccess activateDrawingContext() = 0;
+  virtual GHOST_TSuccess activateDrawingContext() override = 0;

  /**
   * Release the drawing context of the calling thread.
   * \return A boolean success indicator.
   */
-  virtual GHOST_TSuccess releaseDrawingContext() = 0;
+  virtual GHOST_TSuccess releaseDrawingContext() override = 0;

  /**
   * Call immediately after new to initialize.  If this fails then immediately delete the object.
@@ -130,7 +130,7 @@ class GHOST_Context : public GHOST_IContext {
   * Gets the OpenGL frame-buffer associated with the OpenGL context
   * \return The ID of an OpenGL frame-buffer object.
   */
-  virtual unsigned int getDefaultFramebuffer()
+  virtual unsigned int getDefaultFramebuffer() override
  {
    return 0;
  }
--- a/intern/ghost/intern/GHOST_ContextCGL.h
+++ b/intern/ghost/intern/GHOST_ContextCGL.h
@@ -9,8 +9,13 @@

 #include "GHOST_Context.h"

+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
@class CAMetalLayer;
@class MTLCommandQueue;
+@class MTLDevice;
@class MTLRenderPipelineState;
@class MTLTexture;
@class NSOpenGLContext;
@@ -36,62 +41,89 @@ class GHOST_ContextCGL : public GHOST_Context {
   * Swaps front and back buffers of a window.
   * \return A boolean success indicator.
   */
-  GHOST_TSuccess swapBuffers();
+  GHOST_TSuccess swapBuffers() override;

  /**
   * Activates the drawing context of this window.
   * \return A boolean success indicator.
   */
-  GHOST_TSuccess activateDrawingContext();
+  GHOST_TSuccess activateDrawingContext() override;

  /**
   * Release the drawing context of the calling thread.
   * \return A boolean success indicator.
   */
-  GHOST_TSuccess releaseDrawingContext();
+  GHOST_TSuccess releaseDrawingContext() override;

-  unsigned int getDefaultFramebuffer();
+  unsigned int getDefaultFramebuffer() override;

  /**
   * Call immediately after new to initialize.  If this fails then immediately delete the object.
   * \return Indication as to whether initialization has succeeded.
   */
-  GHOST_TSuccess initializeDrawingContext();
+  GHOST_TSuccess initializeDrawingContext() override;

  /**
   * Removes references to native handles from this context and then returns
   * \return GHOST_kSuccess if it is OK for the parent to release the handles and
   * GHOST_kFailure if releasing the handles will interfere with sharing
   */
-  GHOST_TSuccess releaseNativeHandles();
+  GHOST_TSuccess releaseNativeHandles() override;

  /**
   * Sets the swap interval for #swapBuffers.
   * \param interval: The swap interval to use.
   * \return A boolean success indicator.
   */
-  GHOST_TSuccess setSwapInterval(int interval);
+  GHOST_TSuccess setSwapInterval(int interval) override;

  /**
   * Gets the current swap interval for #swapBuffers.
   * \param intervalOut: Variable to store the swap interval if it can be read.
   * \return Whether the swap interval can be read.
   */
-  GHOST_TSuccess getSwapInterval(int &);
+  GHOST_TSuccess getSwapInterval(int &) override;

  /**
   * Updates the drawing context of this window.
   * Needed whenever the window is changed.
   * \return Indication of success.
   */
-  GHOST_TSuccess updateDrawingContext();
+  GHOST_TSuccess updateDrawingContext() override;
+
+  /**
+   * Returns a texture that Metal code can use as a render target. The current
+   * contents of this texture will be composited on top of the framebuffer
+   * each time `swapBuffers` is called.
+   */
+  id<MTLTexture> metalOverlayTexture();
+
+  /**
+   * Return a pointer to the Metal command queue used by this context.
+   */
+  MTLCommandQueue *metalCommandQueue();
+
+  /**
+   * Return a pointer to the Metal device associated with this context.
+   */
+  MTLDevice *metalDevice();
+
+  /**
+   * Register present callback
+   */
+  void metalRegisterPresentCallback(void (*callback)(
+      MTLRenderPassDescriptor *, id<MTLRenderPipelineState>, id<MTLTexture>, id<CAMetalDrawable>));

 private:
  /** Metal state */
+  /* Set this flag to `true` when rendering with Metal API for Viewport.
+   * TODO(Metal): This should be assigned to externally. */
+  bool m_useMetalForRendering = false;
  NSView *m_metalView;
  CAMetalLayer *m_metalLayer;
  MTLCommandQueue *m_metalCmdQueue;
  MTLRenderPipelineState *m_metalRenderPipeline;
+  bool m_ownsMetalDevice;

  /** OpenGL state, for GPUs that don't support Metal */
  NSOpenGLView *m_openGLView;
@@ -102,9 +134,31 @@ class GHOST_ContextCGL : public GHOST_Context {
  /** The virtualized default frame-buffer. */
  unsigned int m_defaultFramebuffer;

-  /** The virtualized default frame-buffer's texture. */
-  MTLTexture *m_defaultFramebufferMetalTexture;
+  /** The virtualized default framebuffer's texture */
+  /**
+   * Texture that you can render into with Metal. The texture will be
+   * composited on top of `m_defaultFramebufferMetalTexture` whenever
+   * `swapBuffers` is called.
+   */
+  static const int METAL_SWAPCHAIN_SIZE = 3;
+  struct MTLSwapchainTexture {
+    id<MTLTexture> texture;
+    unsigned int index;
+  };
+  MTLSwapchainTexture m_defaultFramebufferMetalTexture[METAL_SWAPCHAIN_SIZE];
+  unsigned int current_swapchain_index = 0;

+  /* Present callback.
+   * We use this such that presentation can be controlled from within the Metal
+   * Context. This is required for optimal performance and clean control flow.
+   * Also helps ensure flickering does not occur by present being dependent
+   * on existing submissions. */
+  void (*contextPresentCallback)(MTLRenderPassDescriptor *,
+                                 id<MTLRenderPipelineState>,
+                                 id<MTLTexture>,
+                                 id<CAMetalDrawable>);
+
+  int mtl_SwapInterval;
  const bool m_debug;

  /** The first created OpenGL context (for sharing display lists) */
@@ -117,4 +171,5 @@ class GHOST_ContextCGL : public GHOST_Context {
  void metalInitFramebuffer();
  void metalUpdateFramebuffer();
  void metalSwapBuffers();
+  void initClear();
 };
--- a/intern/ghost/intern/GHOST_ContextCGL.mm
+++ b/intern/ghost/intern/GHOST_ContextCGL.mm
--- a/intern/ghost/intern/GHOST_Window.cpp
+++ b/intern/ghost/intern/GHOST_Window.cpp
@@ -92,6 +92,11 @@ GHOST_TSuccess GHOST_Window::getSwapInterval(int &intervalOut)
  return m_context->getSwapInterval(intervalOut);
 }

+GHOST_Context *GHOST_Window::getContext()
+{
+  return m_context;
+}
+
 unsigned int GHOST_Window::getDefaultFramebuffer()
 {
  return (m_context) ? m_context->getDefaultFramebuffer() : 0;
--- a/intern/ghost/intern/GHOST_Window.h
+++ b/intern/ghost/intern/GHOST_Window.h
@@ -72,7 +72,7 @@ class GHOST_Window : public GHOST_IWindow {
   * Returns indication as to whether the window is valid.
   * \return The validity of the window.
   */
-  virtual bool getValid() const
+  virtual bool getValid() const override
  {
    return m_context != NULL;
  }
@@ -81,15 +81,15 @@ class GHOST_Window : public GHOST_IWindow {
   * Returns the associated OS object/handle
   * \return The associated OS object/handle
   */
-  virtual void *getOSWindow() const;
+  virtual void *getOSWindow() const override;

  /**
   * Returns the current cursor shape.
   * \return The current cursor shape.
   */
-  inline GHOST_TStandardCursor getCursorShape() const;
+  inline GHOST_TStandardCursor getCursorShape() const override;

-  inline bool isDialog() const
+  inline bool isDialog() const override
  {
    return false;
  }
@@ -99,7 +99,7 @@ class GHOST_Window : public GHOST_IWindow {
   * \param cursorShape: The new cursor shape type id.
   * \return Indication of success.
   */
-  GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape);
+  GHOST_TSuccess setCursorShape(GHOST_TStandardCursor cursorShape) override;

  /**
   * Set the shape of the cursor to a custom cursor.
@@ -115,15 +115,15 @@ class GHOST_Window : public GHOST_IWindow {
                                      int sizey,
                                      int hotX,
                                      int hotY,
-                                      bool canInvertColor);
+                                      bool canInvertColor) override;

-  GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap);
+  GHOST_TSuccess getCursorBitmap(GHOST_CursorBitmapRef *bitmap) override;

  /**
   * Returns the visibility state of the cursor.
   * \return The visibility state of the cursor.
   */
-  inline bool getCursorVisibility() const;
+  inline bool getCursorVisibility() const override;
  inline GHOST_TGrabCursorMode getCursorGrabMode() const;
  inline bool getCursorGrabModeIsWarp() const;
  inline GHOST_TAxisFlag getCursorGrabAxis() const;
@@ -136,7 +136,7 @@ class GHOST_Window : public GHOST_IWindow {
   * \param visible: The new visibility state of the cursor.
   * \return Indication of success.
   */
-  GHOST_TSuccess setCursorVisibility(bool visible);
+  GHOST_TSuccess setCursorVisibility(bool visible) override;

  /**
   * Sets the cursor grab.
@@ -146,28 +146,28 @@ class GHOST_Window : public GHOST_IWindow {
  GHOST_TSuccess setCursorGrab(GHOST_TGrabCursorMode mode,
                               GHOST_TAxisFlag wrap_axis,
                               GHOST_Rect *bounds,
-                               int32_t mouse_ungrab_xy[2]);
+                               int32_t mouse_ungrab_xy[2]) override;

  /**
   * Gets the cursor grab region, if unset the window is used.
   * reset when grab is disabled.
   */
-  GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds);
+  GHOST_TSuccess getCursorGrabBounds(GHOST_Rect &bounds) override;

  void getCursorGrabState(GHOST_TGrabCursorMode &mode,
                          GHOST_TAxisFlag &axis_flag,
                          GHOST_Rect &bounds,
-                          bool &use_software_cursor);
+                          bool &use_software_cursor) override;
  /**
   * Return true when a software cursor should be used.
   */
-  bool getCursorGrabUseSoftwareDisplay();
+  bool getCursorGrabUseSoftwareDisplay() override;

  /**
   * Sets the progress bar value displayed in the window/application icon
   * \param progress: The progress percentage (0.0 to 1.0).
   */
-  virtual GHOST_TSuccess setProgressBar(float /*progress*/)
+  virtual GHOST_TSuccess setProgressBar(float /*progress*/) override
  {
    return GHOST_kFailure;
  }
@@ -175,7 +175,7 @@ class GHOST_Window : public GHOST_IWindow {
  /**
   * Hides the progress bar in the icon
   */
-  virtual GHOST_TSuccess endProgressBar()
+  virtual GHOST_TSuccess endProgressBar() override
  {
    return GHOST_kFailure;
  }
@@ -185,43 +185,43 @@ class GHOST_Window : public GHOST_IWindow {
   * \param interval: The swap interval to use.
   * \return A boolean success indicator.
   */
-  GHOST_TSuccess setSwapInterval(int interval);
+  GHOST_TSuccess setSwapInterval(int interval) override;

  /**
   * Gets the current swap interval for #swapBuffers.
   * \return An integer.
   */
-  GHOST_TSuccess getSwapInterval(int &intervalOut);
+  GHOST_TSuccess getSwapInterval(int &intervalOut) override;

  /**
   * Tells if the ongoing drag'n'drop object can be accepted upon mouse drop
   */
-  void setAcceptDragOperation(bool canAccept);
+  void setAcceptDragOperation(bool canAccept) override;

  /**
   * Returns acceptance of the dropped object
   * Usually called by the "object dropped" event handling function
   */
-  bool canAcceptDragOperation() const;
+  bool canAcceptDragOperation() const override;

  /**
   * Sets the window "modified" status, indicating unsaved changes
   * \param isUnsavedChanges: Unsaved changes or not.
   * \return Indication of success.
   */
-  virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges);
+  virtual GHOST_TSuccess setModifiedState(bool isUnsavedChanges) override;

  /**
   * Gets the window "modified" status, indicating unsaved changes
   * \return True if there are unsaved changes
   */
-  virtual bool getModifiedState();
+  virtual bool getModifiedState() override;

  /**
   * Returns the type of drawing context used in this window.
   * \return The current type of drawing context.
   */
-  inline GHOST_TDrawingContextType getDrawingContextType();
+  inline GHOST_TDrawingContextType getDrawingContextType() override;

  /**
   * Tries to install a rendering context in this window.
@@ -230,19 +230,19 @@ class GHOST_Window : public GHOST_IWindow {
   * \param type: The type of rendering context installed.
   * \return Indication as to whether installation has succeeded.
   */
-  GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type);
+  GHOST_TSuccess setDrawingContextType(GHOST_TDrawingContextType type) override;

  /**
   * Swaps front and back buffers of a window.
   * \return A boolean success indicator.
   */
-  virtual GHOST_TSuccess swapBuffers();
+  virtual GHOST_TSuccess swapBuffers() override;

  /**
   * Activates the drawing context of this window.
   * \return A boolean success indicator.
   */
-  virtual GHOST_TSuccess activateDrawingContext();
+  virtual GHOST_TSuccess activateDrawingContext() override;

  /**
   * Updates the drawing context of this window. Needed
@@ -252,16 +252,22 @@ class GHOST_Window : public GHOST_IWindow {
  GHOST_TSuccess updateDrawingContext();

  /**
-   * Gets the OpenGL frame-buffer associated with the window's contents.
-   * \return The ID of an OpenGL frame-buffer object.
+   * Get the drawing context associated with this window.
+   *\return Pointer to the context object.
   */
-  virtual unsigned int getDefaultFramebuffer();
+  GHOST_Context *getContext();
+
+  /**
+   * Gets the OpenGL framebuffer associated with the window's contents.
+   * \return The ID of an OpenGL framebuffer object.
+   */
+  virtual unsigned int getDefaultFramebuffer() override;

  /**
   * Returns the window user data.
   * \return The window user data.
   */
-  inline GHOST_TUserDataPtr getUserData() const
+  inline GHOST_TUserDataPtr getUserData() const override
  {
    return m_userData;
  }
@@ -270,12 +276,12 @@ class GHOST_Window : public GHOST_IWindow {
   * Changes the window user data.
   * \param userData: The window user data.
   */
-  void setUserData(const GHOST_TUserDataPtr userData)
+  void setUserData(const GHOST_TUserDataPtr userData) override
  {
    m_userData = userData;
  }

-  float getNativePixelSize(void)
+  float getNativePixelSize(void) override
  {
    if (m_nativePixelSize > 0.0f)
      return m_nativePixelSize;
@@ -286,18 +292,18 @@ class GHOST_Window : public GHOST_IWindow {
   * Returns the recommended DPI for this window.
   * \return The recommended DPI for this window.
   */
-  virtual inline uint16_t getDPIHint()
+  virtual inline uint16_t getDPIHint() override
  {
    return 96;
  }

 #ifdef WITH_INPUT_IME
-  virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed)
+  virtual void beginIME(int32_t x, int32_t y, int32_t w, int32_t h, bool completed) override
  {
    /* do nothing temporarily if not in windows */
  }

-  virtual void endIME()
+  virtual void endIME() override
  {
    /* do nothing temporarily if not in windows */
  }
--- a/intern/ghost/test/multitest/MultiTest.c
+++ b/intern/ghost/test/multitest/MultiTest.c
@@ -323,7 +323,7 @@ MainWindow *mainwindow_new(MultiTestApp *app)
  if (win) {
    MainWindow *mw = MEM_callocN(sizeof(*mw), "mainwindow_new");

-    mw->gpu_context = GPU_context_create(win);
+    mw->gpu_context = GPU_context_create(win, NULL);
    GPU_init();

    mw->app = app;
@@ -578,7 +578,7 @@ LoggerWindow *loggerwindow_new(MultiTestApp *app)
  if (win) {
    LoggerWindow *lw = MEM_callocN(sizeof(*lw), "loggerwindow_new");

-    lw->gpu_context = GPU_context_create(win);
+    lw->gpu_context = GPU_context_create(win, NULL);
    GPU_init();

    int bbox[2][2];
@@ -780,7 +780,7 @@ ExtraWindow *extrawindow_new(MultiTestApp *app)
  if (win) {
    ExtraWindow *ew = MEM_callocN(sizeof(*ew), "mainwindow_new");

-    ew->gpu_context = GPU_context_create(win);
+    ew->gpu_context = GPU_context_create(win, NULL);
    GPU_init();

    ew->app = app;
--- a/source/blender/draw/DRW_engine.h
+++ b/source/blender/draw/DRW_engine.h
@@ -201,6 +201,7 @@ void DRW_gpu_render_context_enable(void *re_gpu_context);
 void DRW_gpu_render_context_disable(void *re_gpu_context);

 void DRW_deferred_shader_remove(struct GPUMaterial *mat);
+void DRW_deferred_shader_optimize_remove(struct GPUMaterial *mat);

 /**
 * Get DrawData from the given ID-block. In order for this to work, we assume that
--- a/source/blender/draw/engines/eevee/eevee_lightcache.c
+++ b/source/blender/draw/engines/eevee/eevee_lightcache.c
@@ -597,7 +597,7 @@ static void eevee_lightbake_context_enable(EEVEE_LightBake *lbake)
  if (lbake->gl_context) {
    DRW_opengl_render_context_enable(lbake->gl_context);
    if (lbake->gpu_context == NULL) {
-      lbake->gpu_context = GPU_context_create(NULL);
+      lbake->gpu_context = GPU_context_create(NULL, lbake->gl_context);
    }
    DRW_gpu_render_context_enable(lbake->gpu_context);
  }
--- a/source/blender/draw/engines/eevee_next/eevee_shader.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_shader.cc
@@ -471,6 +471,8 @@ GPUMaterial *ShaderModule::material_shader_get(const char *name,
                                                   this);
  GPU_material_status_set(gpumat, GPU_MAT_QUEUED);
  GPU_material_compile(gpumat);
+  /* Queue deferred material optimization. */
+  DRW_shader_queue_optimize_material(gpumat);
  return gpumat;
 }

--- a/source/blender/draw/intern/DRW_render.h
+++ b/source/blender/draw/intern/DRW_render.h
@@ -251,6 +251,7 @@ struct GPUMaterial *DRW_shader_from_material(struct Material *ma,
                                             bool deferred,
                                             GPUCodegenCallbackFn callback,
                                             void *thunk);
+void DRW_shader_queue_optimize_material(struct GPUMaterial *mat);
 void DRW_shader_free(struct GPUShader *shader);
 #define DRW_SHADER_FREE_SAFE(shader) \
  do { \
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@@ -3139,7 +3139,7 @@ void DRW_opengl_context_create(void)
  DST.gl_context = WM_opengl_context_create();
  WM_opengl_context_activate(DST.gl_context);
  /* Be sure to create gpu_context too. */
-  DST.gpu_context = GPU_context_create(NULL);
+  DST.gpu_context = GPU_context_create(0, DST.gl_context);
  /* So we activate the window's one afterwards. */
  wm_window_reset_drawable();
 }
--- a/source/blender/draw/intern/draw_manager_shader.c
+++ b/source/blender/draw/intern/draw_manager_shader.c
@@ -51,9 +51,13 @@ extern char datatoc_common_fullscreen_vert_glsl[];
 * \{ */

 typedef struct DRWShaderCompiler {
+  /** Default compilation queue. */
  ListBase queue; /* GPUMaterial */
  SpinLock list_lock;

+  /** Optimization queue. */
+  ListBase optimize_queue; /* GPUMaterial */
+
  void *gl_context;
  GPUContext *gpu_context;
  bool own_context;
@@ -109,7 +113,29 @@ static void drw_deferred_shader_compilation_exec(
      MEM_freeN(link);
    }
    else {
-      break;
+      /* Check for Material Optimization job once there are no more
+       * shaders to compile. */
+      BLI_spin_lock(&comp->list_lock);
+      /* Pop tail because it will be less likely to lock the main thread
+       * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
+      LinkData *link = (LinkData *)BLI_poptail(&comp->optimize_queue);
+      GPUMaterial *optimize_mat = link ? (GPUMaterial *)link->data : NULL;
+      if (optimize_mat) {
+        /* Avoid another thread freeing the material during optimization. */
+        GPU_material_acquire(optimize_mat);
+      }
+      BLI_spin_unlock(&comp->list_lock);
+
+      if (optimize_mat) {
+        /* Compile optimized material shader. */
+        GPU_material_optimize(optimize_mat);
+        GPU_material_release(optimize_mat);
+        MEM_freeN(link);
+      }
+      else {
+        /* No more materials to optimize, or shaders to compile. */
+        break;
+      }
    }

    if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
@@ -131,6 +157,7 @@ static void drw_deferred_shader_compilation_free(void *custom_data)

  BLI_spin_lock(&comp->list_lock);
  BLI_freelistN(&comp->queue);
+  BLI_freelistN(&comp->optimize_queue);
  BLI_spin_unlock(&comp->list_lock);

  if (comp->own_context) {
@@ -146,11 +173,90 @@ static void drw_deferred_shader_compilation_free(void *custom_data)
  MEM_freeN(comp);
 }

+/**
+ * Append either shader compilation or optimization job to deferred queue and
+ * ensure shader compilation worker is active.
+ * We keep two separate queue's to ensure core compilations always complete before optimization.
+ */
+static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
+{
+  const bool use_main_context = GPU_use_main_context_workaround();
+  const bool job_own_context = !use_main_context;
+
+  BLI_assert(DST.draw_ctx.evil_C);
+  wmWindowManager *wm = CTX_wm_manager(DST.draw_ctx.evil_C);
+  wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C);
+
+  /* Get the running job or a new one if none is running. Can only have one job per type & owner.
+   */
+  wmJob *wm_job = WM_jobs_get(
+      wm, win, wm, "Shaders Compilation", 0, WM_JOB_TYPE_SHADER_COMPILATION);
+
+  DRWShaderCompiler *old_comp = (DRWShaderCompiler *)WM_jobs_customdata_get(wm_job);
+
+  DRWShaderCompiler *comp = MEM_callocN(sizeof(DRWShaderCompiler), "DRWShaderCompiler");
+  BLI_spin_init(&comp->list_lock);
+
+  if (old_comp) {
+    BLI_spin_lock(&old_comp->list_lock);
+    BLI_movelisttolist(&comp->queue, &old_comp->queue);
+    BLI_movelisttolist(&comp->optimize_queue, &old_comp->optimize_queue);
+    BLI_spin_unlock(&old_comp->list_lock);
+    /* Do not recreate context, just pass ownership. */
+    if (old_comp->gl_context) {
+      comp->gl_context = old_comp->gl_context;
+      comp->gpu_context = old_comp->gpu_context;
+      old_comp->own_context = false;
+      comp->own_context = job_own_context;
+    }
+  }
+
+  /* Add to either compilation or optimization queue. */
+  if (is_optimization_job) {
+    BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
+    LinkData *node = BLI_genericNodeN(mat);
+    BLI_addtail(&comp->optimize_queue, node);
+  }
+  else {
+    GPU_material_status_set(mat, GPU_MAT_QUEUED);
+    LinkData *node = BLI_genericNodeN(mat);
+    BLI_addtail(&comp->queue, node);
+  }
+
+  /* Create only one context. */
+  if (comp->gl_context == NULL) {
+    if (use_main_context) {
+      comp->gl_context = DST.gl_context;
+      comp->gpu_context = DST.gpu_context;
+    }
+    else {
+      comp->gl_context = WM_opengl_context_create();
+      comp->gpu_context = GPU_context_create(NULL, comp->gl_context);
+      GPU_context_active_set(NULL);
+
+      WM_opengl_context_activate(DST.gl_context);
+      GPU_context_active_set(DST.gpu_context);
+    }
+    comp->own_context = job_own_context;
+  }
+
+  WM_jobs_customdata_set(wm_job, comp, drw_deferred_shader_compilation_free);
+  WM_jobs_timer(wm_job, 0.1, NC_MATERIAL | ND_SHADING_DRAW, 0);
+  WM_jobs_delay_start(wm_job, 0.1);
+  WM_jobs_callbacks(wm_job, drw_deferred_shader_compilation_exec, NULL, NULL, NULL);
+
+  G.is_break = false;
+
+  WM_jobs_start(wm, wm_job);
+}
+
 static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
 {
  if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
    return;
  }
+
  /* Do not defer the compilation if we are rendering for image.
   * deferred rendering is only possible when `evil_C` is available */
  if (DST.draw_ctx.evil_C == NULL || DRW_state_is_image_render() || !USE_DEFERRED_COMPILATION) {
@@ -174,65 +280,8 @@ static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
    return;
  }

-  const bool use_main_context = GPU_use_main_context_workaround();
-  const bool job_own_context = !use_main_context;
-
-  BLI_assert(DST.draw_ctx.evil_C);
-  wmWindowManager *wm = CTX_wm_manager(DST.draw_ctx.evil_C);
-  wmWindow *win = CTX_wm_window(DST.draw_ctx.evil_C);
-
-  /* Get the running job or a new one if none is running. Can only have one job per type & owner.
-   */
-  wmJob *wm_job = WM_jobs_get(
-      wm, win, wm, "Shaders Compilation", 0, WM_JOB_TYPE_SHADER_COMPILATION);
-
-  DRWShaderCompiler *old_comp = (DRWShaderCompiler *)WM_jobs_customdata_get(wm_job);
-
-  DRWShaderCompiler *comp = MEM_callocN(sizeof(DRWShaderCompiler), "DRWShaderCompiler");
-  BLI_spin_init(&comp->list_lock);
-
-  if (old_comp) {
-    BLI_spin_lock(&old_comp->list_lock);
-    BLI_movelisttolist(&comp->queue, &old_comp->queue);
-    BLI_spin_unlock(&old_comp->list_lock);
-    /* Do not recreate context, just pass ownership. */
-    if (old_comp->gl_context) {
-      comp->gl_context = old_comp->gl_context;
-      comp->gpu_context = old_comp->gpu_context;
-      old_comp->own_context = false;
-      comp->own_context = job_own_context;
-    }
-  }
-
-  GPU_material_status_set(mat, GPU_MAT_QUEUED);
-  LinkData *node = BLI_genericNodeN(mat);
-  BLI_addtail(&comp->queue, node);
-
-  /* Create only one context. */
-  if (comp->gl_context == NULL) {
-    if (use_main_context) {
-      comp->gl_context = DST.gl_context;
-      comp->gpu_context = DST.gpu_context;
-    }
-    else {
-      comp->gl_context = WM_opengl_context_create();
-      comp->gpu_context = GPU_context_create(NULL);
-      GPU_context_active_set(NULL);
-
-      WM_opengl_context_activate(DST.gl_context);
-      GPU_context_active_set(DST.gpu_context);
-    }
-    comp->own_context = job_own_context;
-  }
-
-  WM_jobs_customdata_set(wm_job, comp, drw_deferred_shader_compilation_free);
-  WM_jobs_timer(wm_job, 0.1, NC_MATERIAL | ND_SHADING_DRAW, 0);
-  WM_jobs_delay_start(wm_job, 0.1);
-  WM_jobs_callbacks(wm_job, drw_deferred_shader_compilation_exec, NULL, NULL, NULL);
-
-  G.is_break = false;
-
-  WM_jobs_start(wm, wm_job);
+  /* Add deferred shader compilation to queue. */
+  drw_deferred_queue_append(mat, false);
 }

 void DRW_deferred_shader_remove(GPUMaterial *mat)
@@ -243,14 +292,49 @@ void DRW_deferred_shader_remove(GPUMaterial *mat)
          wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
      if (comp != NULL) {
        BLI_spin_lock(&comp->list_lock);
+
+        /* Search for compilation job in queue. */
        LinkData *link = (LinkData *)BLI_findptr(&comp->queue, mat, offsetof(LinkData, data));
        if (link) {
          BLI_remlink(&comp->queue, link);
          GPU_material_status_set(link->data, GPU_MAT_CREATED);
        }
-        BLI_spin_unlock(&comp->list_lock);

        MEM_SAFE_FREE(link);
+
+        /* Search for optimization job in queue. */
+        LinkData *opti_link = (LinkData *)BLI_findptr(
+            &comp->optimize_queue, mat, offsetof(LinkData, data));
+        if (opti_link) {
+          BLI_remlink(&comp->optimize_queue, opti_link);
+          GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
+        }
+        BLI_spin_unlock(&comp->list_lock);
+
+        MEM_SAFE_FREE(opti_link);
+      }
+    }
+  }
+}
+
+void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
+{
+  LISTBASE_FOREACH (wmWindowManager *, wm, &G_MAIN->wm) {
+    LISTBASE_FOREACH (wmWindow *, win, &wm->windows) {
+      DRWShaderCompiler *comp = (DRWShaderCompiler *)WM_jobs_customdata_from_type(
+          wm, wm, WM_JOB_TYPE_SHADER_COMPILATION);
+      if (comp != NULL) {
+        BLI_spin_lock(&comp->list_lock);
+        /* Search for optimization job in queue. */
+        LinkData *opti_link = (LinkData *)BLI_findptr(
+            &comp->optimize_queue, mat, offsetof(LinkData, data));
+        if (opti_link) {
+          BLI_remlink(&comp->optimize_queue, opti_link);
+          GPU_material_optimization_status_set(opti_link->data, GPU_MAT_OPTIMIZATION_READY);
+        }
+        BLI_spin_unlock(&comp->list_lock);
+
+        MEM_SAFE_FREE(opti_link);
      }
    }
  }
@@ -384,6 +468,7 @@ GPUMaterial *DRW_shader_from_world(World *wo,
  }

  drw_deferred_shader_add(mat, deferred);
+  DRW_shader_queue_optimize_material(mat);
  return mat;
 }

@@ -413,9 +498,52 @@ GPUMaterial *DRW_shader_from_material(Material *ma,
  }

  drw_deferred_shader_add(mat, deferred);
+  DRW_shader_queue_optimize_material(mat);
  return mat;
 }

+void DRW_shader_queue_optimize_material(GPUMaterial *mat)
+{
+  /* Do not perform deferred optimization if performing render.
+   * De-queue any queued optimization jobs. */
+  if (DRW_state_is_image_render()) {
+    if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
+      /* Remove from pending optimization job queue. */
+      DRW_deferred_shader_optimize_remove(mat);
+      /* If optimization job had already started, wait for it to complete. */
+      while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
+        PIL_sleep_ms(20);
+      }
+    }
+    return;
+  }
+
+  /* We do not need to perform optimization on the material if it is already compiled or in the
+   * optimization queue. If optimization is not required, the status will be flagged as
+   * `GPU_MAT_OPTIMIZATION_SKIP`.
+   * We can also skip cases which have already been queued up. */
+  if (ELEM(GPU_material_optimization_status(mat),
+           GPU_MAT_OPTIMIZATION_SKIP,
+           GPU_MAT_OPTIMIZATION_SUCCESS,
+           GPU_MAT_OPTIMIZATION_QUEUED)) {
+    return;
+  }
+
+  /* Only queue optimization once the original shader has been successfully compiled. */
+  if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
+    return;
+  }
+
+  /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
+   * recompilation for shaders which are being actively modified. */
+  if (!GPU_material_optimization_ready(mat)) {
+    return;
+  }
+
+  /* Add deferred shader compilation to queue. */
+  drw_deferred_queue_append(mat, true);
+}
+
 void DRW_shader_free(GPUShader *shader)
 {
  GPU_shader_free(shader);
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -192,6 +192,7 @@ set(METAL_SRC
  metal/mtl_context.mm
  metal/mtl_debug.mm
  metal/mtl_framebuffer.mm
+  metal/mtl_immediate.mm
  metal/mtl_index_buffer.mm
  metal/mtl_memory.mm
  metal/mtl_query.mm
@@ -205,11 +206,14 @@ set(METAL_SRC
  metal/mtl_vertex_buffer.mm

  metal/mtl_backend.hh
+  metal/mtl_batch.hh
  metal/mtl_capabilities.hh
  metal/mtl_common.hh
  metal/mtl_context.hh
  metal/mtl_debug.hh
+  metal/mtl_drawlist.hh
  metal/mtl_framebuffer.hh
+  metal/mtl_immediate.hh
  metal/mtl_index_buffer.hh
  metal/mtl_memory.hh
  metal/mtl_primitive.hh
--- a/source/blender/gpu/GPU_context.h
+++ b/source/blender/gpu/GPU_context.h
@@ -26,7 +26,7 @@ eGPUBackendType GPU_backend_get_type(void);
 /** Opaque type hiding blender::gpu::Context. */
 typedef struct GPUContext GPUContext;

-GPUContext *GPU_context_create(void *ghost_window);
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context);
 /**
 * To be called after #GPU_context_active_set(ctx_to_destroy).
 */
--- a/source/blender/gpu/GPU_material.h
+++ b/source/blender/gpu/GPU_material.h
@@ -117,6 +117,15 @@ typedef enum eGPUMaterialStatus {
  GPU_MAT_SUCCESS,
 } eGPUMaterialStatus;

+/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
+ * plan to perform optimization on a given material. */
+typedef enum eGPUMaterialOptimizationStatus {
+  GPU_MAT_OPTIMIZATION_SKIP = 0,
+  GPU_MAT_OPTIMIZATION_READY,
+  GPU_MAT_OPTIMIZATION_QUEUED,
+  GPU_MAT_OPTIMIZATION_SUCCESS,
+} eGPUMaterialOptimizationStatus;
+
 typedef enum eGPUDefaultValue {
  GPU_DEFAULT_0 = 0,
  GPU_DEFAULT_1,
@@ -246,6 +255,15 @@ struct Scene *GPU_material_scene(GPUMaterial *material);
 struct GPUPass *GPU_material_get_pass(GPUMaterial *material);
 struct GPUShader *GPU_material_get_shader(GPUMaterial *material);
 const char *GPU_material_get_name(GPUMaterial *material);
+
+/**
+ * Material Optimization.
+ * \note Compiles optimal version of shader graph, populating mat->optimized_pass.
+ * This operation should always be deferred until existing compilations have completed.
+ * Default un-optimized materials will still exist for interactive material editing performance.
+ */
+void GPU_material_optimize(GPUMaterial *mat);
+
 /**
 * Return can be NULL if it's a world material.
 */
@@ -256,6 +274,13 @@ struct Material *GPU_material_get_material(GPUMaterial *material);
 eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
 void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);

+/**
+ * Return status for async optimization jobs.
+ */
+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
+bool GPU_material_optimization_ready(GPUMaterial *mat);
+
 struct GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
 /**
 * Create dynamic UBO from parameters
--- a/source/blender/gpu/intern/gpu_backend.hh
+++ b/source/blender/gpu/intern/gpu_backend.hh
@@ -38,7 +38,7 @@ class GPUBackend {
  virtual void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) = 0;
  virtual void compute_dispatch_indirect(StorageBuf *indirect_buf) = 0;

-  virtual Context *context_alloc(void *ghost_window) = 0;
+  virtual Context *context_alloc(void *ghost_window, void *ghost_context) = 0;

  virtual Batch *batch_alloc() = 0;
  virtual DrawList *drawlist_alloc(int list_length) = 0;
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@@ -95,6 +95,9 @@ struct GPUPass {
  uint32_t hash;
  /** Did we already tried to compile the attached GPUShader. */
  bool compiled;
+  /** Hint that an optimized variant of this pass should be created based on a complexity heuristic
+   * during pass code generation. */
+  bool should_optimize;
 };

 /* -------------------------------------------------------------------- */
@@ -242,6 +245,11 @@ class GPUCodegen {
  ListBase ubo_inputs_ = {nullptr, nullptr};
  GPUInput *cryptomatte_input_ = nullptr;

+  /** Cache paramters for complexity heuristic. */
+  uint nodes_total_ = 0;
+  uint textures_total_ = 0;
+  uint uniforms_total_ = 0;
+
 public:
  GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
  {
@@ -282,6 +290,14 @@ class GPUCodegen {
    return hash_;
  }

+  /* Heuristic determined during pass codegen for whether a
+   * more optimal variant of this material should be compiled. */
+  bool should_optimize_heuristic() const
+  {
+    bool do_optimize = (nodes_total_ >= 100 || textures_total_ >= 4 || uniforms_total_ >= 64);
+    return do_optimize;
+  }
+
 private:
  void set_unique_ids();

@@ -403,6 +419,9 @@ void GPUCodegen::generate_resources()
    }
  }

+  /* Increment heuristic. */
+  textures_total_ = slot;
+
  if (!BLI_listbase_is_empty(&ubo_inputs_)) {
    /* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
    ss << "struct NodeTree {\n";
@@ -440,11 +459,16 @@ void GPUCodegen::generate_library()
  GPUCodegenCreateInfo &info = *create_info;

  void *value;
-  GSetIterState pop_state = {};
-  while (BLI_gset_pop(graph.used_libraries, &pop_state, &value)) {
+  /* Iterate over libraries. We need to keep this struct intact incase
+   * it is required for the optimization an pass. */
+  GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
+  while (!BLI_ghashIterator_done(ihash)) {
+    value = BLI_ghashIterator_getKey(ihash);
    auto deps = gpu_shader_dependency_get_resolved_source((const char *)value);
    info.dependencies_generated.extend_non_duplicates(deps);
+    BLI_ghashIterator_step(ihash);
  }
+  BLI_ghashIterator_free(ihash);
 }

 void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
@@ -512,6 +536,9 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
    }
  }
  eval_ss << ");\n\n";
+
+  /* Increment heuristic. */
+  nodes_total_++;
 }

 char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag, GPUNodeLink *output_link)
@@ -575,6 +602,7 @@ void GPUCodegen::generate_uniform_buffer()
      if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
        /* We handle the UBO uniforms separately. */
        BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
+        uniforms_total_++;
      }
    }
  }
@@ -602,6 +630,7 @@ void GPUCodegen::generate_graphs()
 {
  set_unique_ids();

+  /* Serialize graph. */
  output.surface = graph_serialize(GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface);
  output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume);
  output.displacement = graph_serialize(GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement);
@@ -637,10 +666,17 @@ void GPUCodegen::generate_graphs()
 GPUPass *GPU_generate_pass(GPUMaterial *material,
                           GPUNodeGraph *graph,
                           GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk)
+                           void *thunk,
+                           bool optimize_graph)
 {
  gpu_node_graph_prune_unused(graph);

+  /* If Optimize flag is passed in, we are generating an optimized
+   * variant of the GPUMaterial's GPUPass. */
+  if (optimize_graph) {
+    gpu_node_graph_optimize(graph);
+  }
+
  /* Extract attributes before compiling so the generated VBOs are ready to accept the future
   * shader. */
  gpu_node_graph_finalize_uniform_attrs(graph);
@@ -648,23 +684,33 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
  GPUCodegen codegen(material, graph);
  codegen.generate_graphs();
  codegen.generate_cryptomatte();
-  codegen.generate_uniform_buffer();

-  /* Cache lookup: Reuse shaders already compiled. */
-  GPUPass *pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
+  GPUPass *pass_hash = nullptr;

-  /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
-   * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
-  if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
-    if (!gpu_pass_is_valid(pass_hash)) {
-      /* Shader has already been created but failed to compile. */
-      return nullptr;
+  if (!optimize_graph) {
+    /* The optimized version of the shader should not re-generate a UBO.
+     * The UBO will not be used for this variant. */
+    codegen.generate_uniform_buffer();
+
+    /** Cache lookup: Reuse shaders already compiled.
+     * NOTE: We only perform cache look-up for non-optimized shader
+     * graphs, as baked constant data amongst other optimizations will generate too many
+     * shader source permutations, with minimal re-usability. */
+    pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
+
+    /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
+     * there is no way to have a collision currently. Some advocated to only use a bigger hash. */
+    if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
+      if (!gpu_pass_is_valid(pass_hash)) {
+        /* Shader has already been created but failed to compile. */
+        return nullptr;
+      }
+      /* No collision, just return the pass. */
+      BLI_spin_lock(&pass_cache_spin);
+      pass_hash->refcount += 1;
+      BLI_spin_unlock(&pass_cache_spin);
+      return pass_hash;
    }
-    /* No collision, just return the pass. */
-    BLI_spin_lock(&pass_cache_spin);
-    pass_hash->refcount += 1;
-    BLI_spin_unlock(&pass_cache_spin);
-    return pass_hash;
  }

  /* Either the shader is not compiled or there is a hash collision...
@@ -702,14 +748,31 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
    pass->create_info = codegen.create_info;
    pass->hash = codegen.hash_get();
    pass->compiled = false;
+    /* Only flag pass optimization hint if this is the first generated pass for a material.
+     * Optimized passes cannot be optimized further, even if the heuristic is still not
+     * favourable. */
+    pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();

    codegen.create_info = nullptr;

-    gpu_pass_cache_insert_after(pass_hash, pass);
+    /* Only insert non-optimized graphs into cache.
+     * Optimized graphs will continuously be recompiled with new unique source during material
+     * editing, and thus causing the cache to fill up quickly with materials offering minimal
+     * re-use. */
+    if (!optimize_graph) {
+      gpu_pass_cache_insert_after(pass_hash, pass);
+    }
  }
  return pass;
 }

+bool GPU_pass_should_optimize(GPUPass *pass)
+{
+  /* Returns optimization heuristic prepared during
+   * initial codegen. */
+  return pass->should_optimize;
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
--- a/source/blender/gpu/intern/gpu_codegen.h
+++ b/source/blender/gpu/intern/gpu_codegen.h
@@ -25,10 +25,12 @@ typedef struct GPUPass GPUPass;
 GPUPass *GPU_generate_pass(GPUMaterial *material,
                           struct GPUNodeGraph *graph,
                           GPUCodegenCallbackFn finalize_source_cb,
-                           void *thunk);
+                           void *thunk,
+                           bool optimize_graph);
 GPUShader *GPU_pass_shader_get(GPUPass *pass);
 bool GPU_pass_compile(GPUPass *pass, const char *shname);
 void GPU_pass_release(GPUPass *pass);
+bool GPU_pass_should_optimize(GPUPass *pass);

 /* Module */

--- a/source/blender/gpu/intern/gpu_context.cc
+++ b/source/blender/gpu/intern/gpu_context.cc
@@ -94,7 +94,7 @@ Context *Context::get()

 /* -------------------------------------------------------------------- */

-GPUContext *GPU_context_create(void *ghost_window)
+GPUContext *GPU_context_create(void *ghost_window, void *ghost_context)
 {
  {
    std::scoped_lock lock(backend_users_mutex);
@@ -105,7 +105,7 @@ GPUContext *GPU_context_create(void *ghost_window)
    num_backend_users++;
  }

-  Context *ctx = GPUBackend::get()->context_alloc(ghost_window);
+  Context *ctx = GPUBackend::get()->context_alloc(ghost_window, ghost_context);

  GPU_context_active_set(wrap(ctx));
  return wrap(ctx);
@@ -216,6 +216,9 @@ void GPU_render_step()
 /** \name Backend selection
 * \{ */

+/* NOTE: To enable Metal API, we need to temporarily change this to `GPU_BACKEND_METAL`.
+ * Until a global switch is added, Metal also needs to be enabled in GHOST_ContextCGL:
+ * `m_useMetalForRendering = true`. */
 static const eGPUBackendType g_backend_type = GPU_BACKEND_OPENGL;
 static GPUBackend *g_backend = nullptr;

--- a/source/blender/gpu/intern/gpu_material.c
+++ b/source/blender/gpu/intern/gpu_material.c
@@ -34,6 +34,8 @@

 #include "DRW_engine.h"

+#include "PIL_time.h"
+
 #include "gpu_codegen.h"
 #include "gpu_node_graph.h"

@@ -43,6 +45,17 @@
 #define MAX_COLOR_BAND 128
 #define MAX_GPU_SKIES 8

+/** Whether the optimized variant of the GPUPass should be created asynchronously.
+ * Usage of this depends on whether there are possible threading challenges of doing so.
+ * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
+ * compilation, though this option exists in case any potential scenarios for material graph
+ * optimization cause a slow down on the main thread.
+ *
+ * NOTE: The actual shader program for the optimized pass will alwaysbe compiled asynchronously,
+ * this flag controls whether shader node graph source serialization happens on the compilation
+ * worker thread. */
+#define ASYNC_OPTIMIZED_PASS_CREATION 0
+
 typedef struct GPUColorBandBuilder {
  float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
  int current_layer;
@@ -57,6 +70,27 @@ struct GPUMaterial {
  /* Contains GPUShader and source code for deferred compilation.
   * Can be shared between similar material (i.e: sharing same nodetree topology). */
  GPUPass *pass;
+  /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
+   * This shader variant bakes dynamic uniform data as constant. This variant will not use
+   * the ubo, and instead bake constants directly into the shader source. */
+  GPUPass *optimized_pass;
+  /* Optimization status.
+   * We also use this status to determine whether this material should be considered for
+   * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
+   *   `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
+   *   `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
+   * performance to do so, based on the heuristic.
+   */
+  eGPUMaterialOptimizationStatus optimization_status;
+  double creation_time;
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  struct DeferredOptimizePass {
+    GPUCodegenCallbackFn callback;
+    void *thunk;
+  } DeferredOptimizePass;
+  struct DeferredOptimizePass optimize_pass_info;
+#endif
+
  /** UBOs for this material parameters. */
  GPUUniformBuf *ubo;
  /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
@@ -209,6 +243,9 @@ void GPU_material_free_single(GPUMaterial *material)

  gpu_node_graph_free(&material->graph);

+  if (material->optimized_pass != NULL) {
+    GPU_pass_release(material->optimized_pass);
+  }
  if (material->pass != NULL) {
    GPU_pass_release(material->pass);
  }
@@ -247,12 +284,15 @@ Scene *GPU_material_scene(GPUMaterial *material)

 GPUPass *GPU_material_get_pass(GPUMaterial *material)
 {
-  return material->pass;
+  return (material->optimized_pass) ? material->optimized_pass : material->pass;
 }

 GPUShader *GPU_material_get_shader(GPUMaterial *material)
 {
-  return material->pass ? GPU_pass_shader_get(material->pass) : NULL;
+  /* First attempt to select optimized shader. If not available, fetch original. */
+  GPUShader *shader = (material->optimized_pass) ? GPU_pass_shader_get(material->optimized_pass) :
+                                                   NULL;
+  return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : NULL);
 }

 const char *GPU_material_get_name(GPUMaterial *material)
@@ -665,6 +705,29 @@ void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
  mat->status = status;
 }

+eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
+{
+  return mat->optimization_status;
+}
+
+void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
+{
+  mat->optimization_status = status;
+  if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+    /* Reset creation timer to delay optimization pass. */
+    mat->creation_time = PIL_check_seconds_timer();
+  }
+}
+
+bool GPU_material_optimization_ready(GPUMaterial *mat)
+{
+  /* Timer threshold before optimizations will be queued.
+   * When materials are frequently being modified, optimization
+   * can incur CPU overhead from excessive compilation. */
+  const double optimization_time_threshold_s = 5.0;
+  return ((PIL_check_seconds_timer() - mat->creation_time) >= optimization_time_threshold_s);
+}
+
 /* Code generation */

 bool GPU_material_has_surface_output(GPUMaterial *mat)
@@ -730,6 +793,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
  mat->uuid = shader_uuid;
  mat->flag = GPU_MATFLAG_UPDATED;
  mat->status = GPU_MAT_CREATED;
+  mat->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
  mat->is_volume_shader = is_volume_shader;
  mat->graph.used_libraries = BLI_gset_new(
      BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
@@ -748,7 +812,7 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,

  {
    /* Create source code and search pass cache for an already compiled version. */
-    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk);
+    mat->pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, false);

    if (mat->pass == NULL) {
      /* We had a cache hit and the shader has already failed to compile. */
@@ -756,11 +820,44 @@ GPUMaterial *GPU_material_from_nodetree(Scene *scene,
      gpu_node_graph_free(&mat->graph);
    }
    else {
+      /* Determine whether we should generate an optimized variant of the graph.
+       * Heuristic is based on complexity of default material pass and shader node graph. */
+      if (GPU_pass_should_optimize(mat->pass)) {
+        GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+      }
+
      GPUShader *sh = GPU_pass_shader_get(mat->pass);
      if (sh != NULL) {
        /* We had a cache hit and the shader is already compiled. */
        mat->status = GPU_MAT_SUCCESS;
-        gpu_node_graph_free_nodes(&mat->graph);
+
+        if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+          gpu_node_graph_free_nodes(&mat->graph);
+        }
+      }
+
+      /* Generate optimized pass. */
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+        mat->optimized_pass = NULL;
+        mat->optimize_pass_info.callback = callback;
+        mat->optimize_pass_info.thunk = thunk;
+#else
+        mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, callback, thunk, true);
+        if (mat->optimized_pass == NULL) {
+          /* Failed to create optimized pass. */
+          gpu_node_graph_free_nodes(&mat->graph);
+          GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+        }
+        else {
+          GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
+          if (optimized_sh != NULL) {
+            /* Optimized shader already available. */
+            gpu_node_graph_free_nodes(&mat->graph);
+            GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+          }
+        }
+#endif
      }
    }
  }
@@ -811,7 +908,11 @@ void GPU_material_compile(GPUMaterial *mat)
    GPUShader *sh = GPU_pass_shader_get(mat->pass);
    if (sh != NULL) {
      mat->status = GPU_MAT_SUCCESS;
-      gpu_node_graph_free_nodes(&mat->graph);
+
+      if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+        /* Only free node graph nodes if not required by secondary optimization pass. */
+        gpu_node_graph_free_nodes(&mat->graph);
+      }
    }
    else {
      mat->status = GPU_MAT_FAILED;
@@ -825,6 +926,71 @@ void GPU_material_compile(GPUMaterial *mat)
  }
 }

+void GPU_material_optimize(GPUMaterial *mat)
+{
+  /* If shader is flagged for skipping optimization or has already been successfully
+   * optimized, skip. */
+  if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
+    return;
+  }
+
+  /* If original shader has not been fully compiled, we are not
+   * ready to perform optimization. */
+  if (mat->status != GPU_MAT_SUCCESS) {
+    /* Reset optimization status. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
+    return;
+  }
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+  /* If the optimized pass is not valid, first generate optimized pass.
+   * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
+   * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
+   * optimal, as these do not benefit from caching, due to baked constants. However, this could
+   * possibly be cause for concern for certain cases.  */
+  if (!mat->optimized_pass) {
+    mat->optimized_pass = GPU_generate_pass(
+        mat, &mat->graph, mat->optimize_pass_info.callback, mat->optimize_pass_info.thunk, true);
+    BLI_assert(mat->optimized_pass);
+  }
+#else
+  if (!mat->optimized_pass) {
+    /* Optimized pass has not been created, skip future optimization attempts. */
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    return;
+  }
+#endif
+
+  bool success;
+  /* NOTE: The shader may have already been compiled here since we are
+   * sharing GPUShader across GPUMaterials. In this case it's a no-op. */
+#ifndef NDEBUG
+  success = GPU_pass_compile(mat->optimized_pass, mat->name);
+#else
+  success = GPU_pass_compile(mat->optimized_pass, __func__);
+#endif
+
+  if (success) {
+    GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
+    if (sh != NULL) {
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
+    }
+    else {
+      /* Optimized pass failed to compile. Disable any future optimization attempts. */
+      GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+  }
+  else {
+    /* Optimization pass generation failed. Disable future attempts to optimize. */
+    GPU_pass_release(mat->optimized_pass);
+    mat->optimized_pass = NULL;
+    GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
+  }
+
+  /* Release node graph as no longer needed. */
+  gpu_node_graph_free_nodes(&mat->graph);
+}
+
 void GPU_materials_free(Main *bmain)
 {
  LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
@@ -848,6 +1014,8 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
  material->graph.used_libraries = BLI_gset_new(
      BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
  material->refcount = 1;
+  material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
+  material->optimized_pass = NULL;

  /* Construct the material graph by adding and linking the necessary GPU material nodes. */
  construct_function_cb(thunk, material);
@@ -856,7 +1024,9 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
  gpu_material_ramp_texture_build(material);

  /* Lookup an existing pass in the cache or generate a new one. */
-  material->pass = GPU_generate_pass(material, &material->graph, generate_code_function_cb, thunk);
+  material->pass = GPU_generate_pass(
+      material, &material->graph, generate_code_function_cb, thunk, false);
+  material->optimized_pass = NULL;

  /* The pass already exists in the pass cache but its shader already failed to compile. */
  if (material->pass == NULL) {
@@ -865,11 +1035,42 @@ GPUMaterial *GPU_material_from_callbacks(ConstructGPUMaterialFn construct_functi
    return material;
  }

+  /* Generate optimized pass. */
+  if (GPU_pass_should_optimize(material->pass)) {
+
+#if ASYNC_OPTIMIZED_PASS_CREATION == 1
+    mmaterial->optimized_pass = NULL;
+    material->optimize_pass_info.callback = generate_code_function_cb;
+    material->optimize_pass_info.thunk = thunk;
+    GPU_material_optimization_status_set(GPU_MAT_OPTIMIZATION_READY);
+#else
+    material->optimized_pass = GPU_generate_pass(
+        material, &material->graph, generate_code_function_cb, thunk, true);
+
+    if (material->optimized_pass == NULL) {
+      /* Failed to create optimized pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+      GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SKIP);
+    }
+    else {
+      GPUShader *optimized_sh = GPU_pass_shader_get(material->optimized_pass);
+      if (optimized_sh != NULL) {
+        /* Optimized shader already available. */
+        gpu_node_graph_free_nodes(&material->graph);
+        GPU_material_optimization_status_set(material, GPU_MAT_OPTIMIZATION_SUCCESS);
+      }
+    }
+#endif
+  }
+
  /* The pass already exists in the pass cache and its shader is already compiled. */
  GPUShader *shader = GPU_pass_shader_get(material->pass);
  if (shader != NULL) {
    material->status = GPU_MAT_SUCCESS;
-    gpu_node_graph_free_nodes(&material->graph);
+    if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
+      /* Only free node graph if not required by secondary optimization pass. */
+      gpu_node_graph_free_nodes(&material->graph);
+    }
    return material;
  }

--- a/source/blender/gpu/intern/gpu_node_graph.c
+++ b/source/blender/gpu/intern/gpu_node_graph.c
@@ -914,3 +914,22 @@ void gpu_node_graph_prune_unused(GPUNodeGraph *graph)
    }
  }
 }
+
+void gpu_node_graph_optimize(GPUNodeGraph *graph)
+{
+  /* Replace all uniform node links with constant. */
+  LISTBASE_FOREACH (GPUNode *, node, &graph->nodes) {
+    LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
+      if (input->link) {
+        if (input->link->link_type == GPU_NODE_LINK_UNIFORM) {
+          input->link->link_type = GPU_NODE_LINK_CONSTANT;
+        }
+      }
+      if (input->source == GPU_SOURCE_UNIFORM) {
+        input->source = (input->type == GPU_CLOSURE) ? GPU_SOURCE_STRUCT : GPU_SOURCE_CONSTANT;
+      }
+    }
+  }
+
+  /* TODO: Consider performing other node graph optimizations here. */
+}
--- a/source/blender/gpu/intern/gpu_node_graph.h
+++ b/source/blender/gpu/intern/gpu_node_graph.h
@@ -179,6 +179,21 @@ typedef struct GPUNodeGraph {

 void gpu_node_graph_prune_unused(GPUNodeGraph *graph);
 void gpu_node_graph_finalize_uniform_attrs(GPUNodeGraph *graph);
+
+/**
+ * Optimize node graph for optimized material shader path.
+ * Once the base material has been generated, we can modify the shader
+ * node graph to create one which will produce an optimally performing shader.
+ * This currently involves baking uniform data into constant data to enable
+ * aggressive constant folding by the compiler in order to reduce complexity and
+ * shader core memory pressure.
+ *
+ * NOTE: Graph optimizations will produce a shader which needs to be re-compiled
+ * more frequently, however, the default material pass will always exist to fall
+ * back on.
+ */
+void gpu_node_graph_optimize(GPUNodeGraph *graph);
+
 /**
 * Free intermediate node graph.
 */
--- a/source/blender/gpu/intern/gpu_shader_builder.cc
+++ b/source/blender/gpu/intern/gpu_shader_builder.cc
@@ -45,7 +45,7 @@ void ShaderBuilder::init()
  ghost_context_ = GHOST_CreateOpenGLContext(ghost_system_, glSettings);
  GHOST_ActivateOpenGLContext(ghost_context_);

-  gpu_context_ = GPU_context_create(nullptr);
+  gpu_context_ = GPU_context_create(nullptr, ghost_context_);
  GPU_init();
 }

--- a/source/blender/gpu/intern/gpu_shader_interface.cc
+++ b/source/blender/gpu/intern/gpu_shader_interface.cc
@@ -22,8 +22,8 @@ ShaderInterface::ShaderInterface() = default;
 ShaderInterface::~ShaderInterface()
 {
  /* Free memory used by name_buffer. */
-  MEM_freeN(name_buffer_);
-  MEM_freeN(inputs_);
+  MEM_SAFE_FREE(name_buffer_);
+  MEM_SAFE_FREE(inputs_);
 }

 static void sort_input_list(MutableSpan<ShaderInput> dst)
--- a/source/blender/gpu/metal/mtl_backend.hh
+++ b/source/blender/gpu/metal/mtl_backend.hh
@@ -63,7 +63,7 @@ class MTLBackend : public GPUBackend {

  /* MTL Allocators need to be implemented in separate .mm files, due to allocation of Objective-C
   * objects. */
-  Context *context_alloc(void *ghost_window) override;
+  Context *context_alloc(void *ghost_window, void *ghost_context) override;
  Batch *batch_alloc() override;
  DrawList *drawlist_alloc(int list_length) override;
  FrameBuffer *framebuffer_alloc(const char *name) override;
--- a/source/blender/gpu/metal/mtl_backend.mm
+++ b/source/blender/gpu/metal/mtl_backend.mm
@@ -8,8 +8,11 @@

 #include "gpu_backend.hh"
 #include "mtl_backend.hh"
+#include "mtl_batch.hh"
 #include "mtl_context.hh"
+#include "mtl_drawlist.hh"
 #include "mtl_framebuffer.hh"
+#include "mtl_immediate.hh"
 #include "mtl_index_buffer.hh"
 #include "mtl_query.hh"
 #include "mtl_shader.hh"
@@ -37,21 +40,21 @@ void MTLBackend::samplers_update(){
    /* Placeholder -- Handled in MTLContext. */
 };

-Context *MTLBackend::context_alloc(void *ghost_window)
+Context *MTLBackend::context_alloc(void *ghost_window, void *ghost_context)
 {
-  return new MTLContext(ghost_window);
+  return new MTLContext(ghost_window, ghost_context);
 };

 Batch *MTLBackend::batch_alloc()
 {
-  /* TODO(Metal): Implement MTLBatch. */
-  return nullptr;
+  /* TODO(Metal): Full MTLBatch implementation. */
+  return new MTLBatch();
 };

 DrawList *MTLBackend::drawlist_alloc(int list_length)
 {
-  /* TODO(Metal): Implement MTLDrawList. */
-  return nullptr;
+  /* TODO(Metal): Full MTLDrawList implementation. */
+  return new MTLDrawList(list_length);
 };

 FrameBuffer *MTLBackend::framebuffer_alloc(const char *name)
--- a/source/blender/gpu/metal/mtl_batch.hh
+++ b/source/blender/gpu/metal/mtl_batch.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * GPU geometry batch
+ * Contains VAOs + VBOs + Shader representing a drawable entity.
+ */
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+
+#include "gpu_batch_private.hh"
+
+namespace blender {
+namespace gpu {
+
+
+/* Pass-through MTLBatch. TODO(Metal): Implement. */
+class MTLBatch : public Batch {
+ public:
+  void draw(int v_first, int v_count, int i_first, int i_count) override {
+
+  }
+
+  void draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) override {
+
+  }
+  
+  void multi_draw_indirect(GPUStorageBuf *indirect_buf,
+                           int count,
+                           intptr_t offset,
+                           intptr_t stride) override {
+                               
+                           }
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLBatch");
+};
+
+}  // namespace gpu
+}  // namespace blender
--- a/source/blender/gpu/metal/mtl_command_buffer.mm
+++ b/source/blender/gpu/metal/mtl_command_buffer.mm
@@ -54,6 +54,7 @@ id<MTLCommandBuffer> MTLCommandBufferManager::ensure_begin()
      MTLCommandBufferDescriptor *desc = [[MTLCommandBufferDescriptor alloc] init];
      desc.errorOptions = MTLCommandBufferErrorOptionEncoderExecutionStatus;
      desc.retainedReferences = YES;
+      BLI_assert(context_.queue != nil);
      active_command_buffer_ = [context_.queue commandBufferWithDescriptor:desc];
    }
    else {
@@ -611,40 +612,187 @@ void MTLRenderPassState::bind_vertex_sampler(MTLSamplerBinding &sampler_binding,
                                             bool use_argument_buffer_for_samplers,
                                             uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch. */
+  if (this->cached_vertex_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_vertex_sampler_state_bindings[slot].binding_state == sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setVertexSamplerState:sampler_state atIndex:slot];
+      this->cached_vertex_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_vertex_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type. */
+    this->cached_vertex_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }

 void MTLRenderPassState::bind_fragment_sampler(MTLSamplerBinding &sampler_binding,
                                               bool use_argument_buffer_for_samplers,
                                               uint slot)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment sampler binding utility. This will be
-   * implemented alongside MTLShader. */
+  /* Range check. */
+  const MTLShaderInterface *shader_interface = ctx.pipeline_state.active_shader->get_interface();
+  BLI_assert(slot >= 0);
+  BLI_assert(slot <= shader_interface->get_max_texture_index());
+  BLI_assert(slot < MTL_MAX_TEXTURE_SLOTS);
+  UNUSED_VARS_NDEBUG(shader_interface);
+
+  /* If sampler state has not changed for the given slot, we do not need to fetch*/
+  if (this->cached_fragment_sampler_state_bindings[slot].sampler_state == nil ||
+      !(this->cached_fragment_sampler_state_bindings[slot].binding_state ==
+        sampler_binding.state) ||
+      use_argument_buffer_for_samplers) {
+
+    id<MTLSamplerState> sampler_state = (sampler_binding.state == DEFAULT_SAMPLER_STATE) ?
+                                            ctx.get_default_sampler_state() :
+                                            ctx.get_sampler_from_state(sampler_binding.state);
+    if (!use_argument_buffer_for_samplers) {
+      /* Update binding and cached state. */
+      id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+      BLI_assert(rec != nil);
+      [rec setFragmentSamplerState:sampler_state atIndex:slot];
+      this->cached_fragment_sampler_state_bindings[slot].binding_state = sampler_binding.state;
+      this->cached_fragment_sampler_state_bindings[slot].sampler_state = sampler_state;
+    }
+
+    /* Flag last binding type */
+    this->cached_fragment_sampler_state_bindings[slot].is_arg_buffer_binding =
+        use_argument_buffer_for_samplers;
+
+    /* Always assign to argument buffer samplers binding array - Efficiently ensures the value in
+     * the samplers array is always up to date. */
+    ctx.samplers_.mtl_sampler[slot] = sampler_state;
+    ctx.samplers_.mtl_sampler_flags[slot] = sampler_binding.state;
+  }
 }

 void MTLRenderPassState::bind_vertex_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_vert_ubo_binding = this->cached_vertex_buffer_bindings[index];
+  if (current_vert_ubo_binding.offset != buffer_offset ||
+      current_vert_ubo_binding.metal_buffer != buffer || current_vert_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_vert_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setVertexBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Vertex Buffer. */
+      [rec setVertexBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache. */
+    this->cached_vertex_buffer_bindings[index].is_bytes = false;
+    this->cached_vertex_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_vertex_buffer_bindings[index].offset = buffer_offset;
+  }
 }

 void MTLRenderPassState::bind_fragment_buffer(id<MTLBuffer> buffer, uint buffer_offset, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment buffer binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  BLI_assert(index >= 0);
+  BLI_assert(buffer_offset >= 0);
+  BLI_assert(buffer != nil);
+
+  BufferBindingCached &current_frag_ubo_binding = this->cached_fragment_buffer_bindings[index];
+  if (current_frag_ubo_binding.offset != buffer_offset ||
+      current_frag_ubo_binding.metal_buffer != buffer || current_frag_ubo_binding.is_bytes) {
+
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    BLI_assert(rec != nil);
+
+    if (current_frag_ubo_binding.metal_buffer == buffer) {
+      /* If buffer is the same, but offset has changed. */
+      [rec setFragmentBufferOffset:buffer_offset atIndex:index];
+    }
+    else {
+      /* Bind Fragment Buffer */
+      [rec setFragmentBuffer:buffer offset:buffer_offset atIndex:index];
+    }
+
+    /* Update Bind-state cache */
+    this->cached_fragment_buffer_bindings[index].is_bytes = false;
+    this->cached_fragment_buffer_bindings[index].metal_buffer = buffer;
+    this->cached_fragment_buffer_bindings[index].offset = buffer_offset;
+  }
 }

 void MTLRenderPassState::bind_vertex_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder vertex bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setVertexBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_vertex_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache */
+  this->cached_vertex_buffer_bindings[index].is_bytes = true;
+  this->cached_vertex_buffer_bindings[index].metal_buffer = nil;
+  this->cached_vertex_buffer_bindings[index].offset = -1;
 }

 void MTLRenderPassState::bind_fragment_bytes(void *bytes, uint length, uint index)
 {
-  /* TODO(Metal): Implement RenderCommandEncoder fragment bytes binding utility. This will be
-   * implemented alongside the full MTLMemoryManager. */
+  /* Bytes always updated as source data may have changed. */
+  BLI_assert(index >= 0 && index < MTL_MAX_UNIFORM_BUFFER_BINDINGS);
+  BLI_assert(length > 0);
+  BLI_assert(bytes != nullptr);
+
+  if (length < MTL_MAX_SET_BYTES_SIZE) {
+    id<MTLRenderCommandEncoder> rec = this->cmd.get_active_render_command_encoder();
+    [rec setFragmentBytes:bytes length:length atIndex:index];
+  }
+  else {
+    /* We have run over the setBytes limit, bind buffer instead. */
+    MTLTemporaryBuffer range =
+        ctx.get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(length, 256);
+    memcpy(range.data, bytes, length);
+    this->bind_fragment_buffer(range.metal_buffer, range.buffer_offset, index);
+  }
+
+  /* Update Bind-state cache. */
+  this->cached_fragment_buffer_bindings[index].is_bytes = true;
+  this->cached_fragment_buffer_bindings[index].metal_buffer = nil;
+  this->cached_fragment_buffer_bindings[index].offset = -1;
 }

 /** \} */
--- a/source/blender/gpu/metal/mtl_common.hh
+++ b/source/blender/gpu/metal/mtl_common.hh
@@ -3,7 +3,9 @@
 #ifndef __MTL_COMMON
 #define __MTL_COMMON

-// -- Renderer Options --
+/** -- Renderer Options -- */
+/* Number of frames over which rolling averages are taken. */
+#define MTL_FRAME_AVERAGE_COUNT 5
 #define MTL_MAX_DRAWABLES 3
 #define MTL_MAX_SET_BYTES_SIZE 4096
 #define MTL_FORCE_WAIT_IDLE 0
--- a/source/blender/gpu/metal/mtl_context.hh
+++ b/source/blender/gpu/metal/mtl_context.hh
@@ -12,6 +12,10 @@
 #include "GPU_common_types.h"
 #include "GPU_context.h"

+#include "intern/GHOST_Context.h"
+#include "intern/GHOST_ContextCGL.h"
+#include "intern/GHOST_Window.h"
+
 #include "mtl_backend.hh"
 #include "mtl_capabilities.hh"
 #include "mtl_common.hh"
@@ -570,12 +574,44 @@ class MTLCommandBufferManager {

 class MTLContext : public Context {
  friend class MTLBackend;
+  friend class MTLRenderPassState;
+
+ public:
+  /* Swapchain and latency management. */
+  static std::atomic<int> max_drawables_in_flight;
+  static std::atomic<int64_t> avg_drawable_latency_us;
+  static int64_t frame_latency[MTL_FRAME_AVERAGE_COUNT];
+
+ public:
+  /* Shaders and Pipeline state. */
+  MTLContextGlobalShaderPipelineState pipeline_state;
+
+  /* Metal API Resource Handles. */
+  id<MTLCommandQueue> queue = nil;
+  id<MTLDevice> device = nil;
+
+#ifndef NDEBUG
+  /* Label for Context debug name assignemnt. */
+  NSString *label = nil;
+#endif
+
+  /* Memory Management. */
+  MTLScratchBufferManager memory_manager;
+  static MTLBufferPool global_memory_manager;
+
+  /* CommandBuffer managers. */
+  MTLCommandBufferManager main_command_buffer;

 private:
-  /* Null buffers for empty/uninitialized bindings.
-   * Null attribute buffer follows default attribute format of OpenGL Back-end. */
-  id<MTLBuffer> null_buffer_;           /* All zero's. */
-  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+  /* Parent Context. */
+  GHOST_ContextCGL *ghost_context_;
+
+  /* Render Passes and Framebuffers. */
+  id<MTLTexture> default_fbo_mtltexture_ = nil;
+  gpu::MTLTexture *default_fbo_gputexture_ = nullptr;
+
+  /* Depth-stencil state cache. */
+  blender::Map<MTLContextDepthStencilState, id<MTLDepthStencilState>> depth_stencil_state_cache;

  /* Compute and specialization caches. */
  MTLContextTextureUtils texture_utils_;
@@ -601,23 +637,20 @@ class MTLContext : public Context {
  gpu::MTLBuffer *visibility_buffer_ = nullptr;
  bool visibility_is_dirty_ = false;

+  /* Null buffers for empty/unintialized bindings.
+   * Null attribute buffer follows default attribute format of OpenGL Backend. */
+  id<MTLBuffer> null_buffer_;           /* All zero's. */
+  id<MTLBuffer> null_attribute_buffer_; /* Value float4(0.0,0.0,0.0,1.0). */
+
+  /** Dummy Resources */
+  /* Maximum of 32 texture types. Though most combinations invalid. */
+  gpu::MTLTexture *dummy_textures_[GPU_TEXTURE_BUFFER] = {nullptr};
+  GPUVertFormat dummy_vertformat_;
+  GPUVertBuf *dummy_verts_ = nullptr;
+
 public:
-  /* Shaders and Pipeline state. */
-  MTLContextGlobalShaderPipelineState pipeline_state;
-
-  /* Metal API Resource Handles. */
-  id<MTLCommandQueue> queue = nil;
-  id<MTLDevice> device = nil;
-
-  /* Memory Management */
-  MTLScratchBufferManager memory_manager;
-  static MTLBufferPool global_memory_manager;
-
-  /* CommandBuffer managers. */
-  MTLCommandBufferManager main_command_buffer;
-
  /* GPUContext interface. */
-  MTLContext(void *ghost_window);
+  MTLContext(void *ghost_window, void *ghost_context);
  ~MTLContext();

  static void check_error(const char *info);
@@ -673,6 +706,35 @@ class MTLContext : public Context {
  void pipeline_state_init();
  MTLShader *get_active_shader();

+  /* These functions ensure that the current RenderCommandEncoder has
+   * the correct global state assigned. This should be called prior
+   * to every draw call, to ensure that all state is applied and up
+   * to date. We handle:
+   *
+   * - Buffer bindings (Vertex buffers, Uniforms, UBOs, transform feedback)
+   * - Texture bindings
+   * - Sampler bindings (+ argument buffer bindings)
+   * - Dynamic Render pipeline state (on encoder)
+   * - Baking Pipeline State Objects (PSOs) for current shader, based
+   *   on final pipeline state.
+   *
+   * `ensure_render_pipeline_state` will return false if the state is
+   * invalid and cannot be applied. This should cancel a draw call. */
+  bool ensure_render_pipeline_state(MTLPrimitiveType prim_type);
+  bool ensure_uniform_buffer_bindings(
+      id<MTLRenderCommandEncoder> rec,
+      const MTLShaderInterface *shader_interface,
+      const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
+                               MTLShaderInterface *shader_interface,
+                               const MTLRenderPipelineStateInstance *pipeline_state_instance);
+  void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
+
+  id<MTLBuffer> get_null_buffer();
+  id<MTLBuffer> get_null_attribute_buffer();
+  gpu::MTLTexture *get_dummy_texture(eGPUTextureType type);
+  void free_dummy_resources();
+
  /* State assignment. */
  void set_viewport(int origin_x, int origin_y, int width, int height);
  void set_scissor(int scissor_x, int scissor_y, int scissor_width, int scissor_height);
@@ -720,9 +782,37 @@ class MTLContext : public Context {
  {
    return MTLContext::global_memory_manager;
  }
-  /* Uniform Buffer Bindings to command encoders. */
-  id<MTLBuffer> get_null_buffer();
-  id<MTLBuffer> get_null_attribute_buffer();
+
+  /* Swapchain and latency management. */
+  static void latency_resolve_average(int64_t frame_latency_us)
+  {
+    int64_t avg = 0;
+    int64_t frame_c = 0;
+    for (int i = MTL_FRAME_AVERAGE_COUNT - 1; i > 0; i--) {
+      MTLContext::frame_latency[i] = MTLContext::frame_latency[i - 1];
+      avg += MTLContext::frame_latency[i];
+      frame_c += (MTLContext::frame_latency[i] > 0) ? 1 : 0;
+    }
+    MTLContext::frame_latency[0] = frame_latency_us;
+    avg += MTLContext::frame_latency[0];
+    if (frame_c > 0) {
+      avg /= frame_c;
+    }
+    else {
+      avg = 0;
+    }
+    MTLContext::avg_drawable_latency_us = avg;
+  }
+
+ private:
+  void set_ghost_context(GHOST_ContextHandle ghostCtxHandle);
+  void set_ghost_window(GHOST_WindowHandle ghostWinHandle);
 };

+/* GHOST Context callback and present. */
+void present(MTLRenderPassDescriptor *blit_descriptor,
+             id<MTLRenderPipelineState> blit_pso,
+             id<MTLTexture> swapchain_texture,
+             id<CAMetalDrawable> drawable);
+
 }  // namespace blender::gpu
--- a/source/blender/gpu/metal/mtl_context.mm
+++ b/source/blender/gpu/metal/mtl_context.mm
--- a/source/blender/gpu/metal/mtl_drawlist.hh
+++ b/source/blender/gpu/metal/mtl_drawlist.hh
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Implementation of Multi Draw Indirect using OpenGL.
+ * Fallback if the needed extensions are not supported.
+ */
+
+#pragma once
+
+#pragma once
+
+#include "gpu_drawlist_private.hh"
+
+namespace blender {
+namespace gpu {
+
+/**
+ * TODO(Metal): MTLDrawList Implementation. Included as temporary stub.
+ */
+class MTLDrawList : public DrawList {
+ public:
+  MTLDrawList(int length) {}
+  ~MTLDrawList() {}
+
+  void append(GPUBatch *batch, int i_first, int i_count) override {}
+  void submit() override {}
+
+  MEM_CXX_CLASS_ALLOC_FUNCS("MTLDrawList");
+};
+
+}  // namespace gpu
+}  // namespace blender
--- a/source/blender/gpu/metal/mtl_immediate.hh
+++ b/source/blender/gpu/metal/mtl_immediate.hh
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+
+#pragma once
+
+#include "MEM_guardedalloc.h"
+#include "gpu_immediate_private.hh"
+
+#include <Cocoa/Cocoa.h>
+#include <Metal/Metal.h>
+#include <QuartzCore/QuartzCore.h>
+
+namespace blender::gpu {
+
+class MTLImmediate : public Immediate {
+ private:
+  MTLContext *context_ = nullptr;
+  MTLTemporaryBuffer current_allocation_;
+  MTLPrimitiveTopologyClass metal_primitive_mode_;
+  MTLPrimitiveType metal_primitive_type_;
+  bool has_begun_ = false;
+
+ public:
+  MTLImmediate(MTLContext *ctx);
+  ~MTLImmediate();
+
+  uchar *begin() override;
+  void end() override;
+  bool imm_is_recording()
+  {
+    return has_begun_;
+  }
+};
+
+}  // namespace blender::gpu
--- a/source/blender/gpu/metal/mtl_immediate.mm
+++ b/source/blender/gpu/metal/mtl_immediate.mm
@@ -0,0 +1,397 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup gpu
+ *
+ * Mimics old style opengl immediate mode drawing.
+ */
+
+#include "BKE_global.h"
+
+#include "GPU_vertex_format.h"
+#include "gpu_context_private.hh"
+#include "gpu_shader_private.hh"
+#include "gpu_vertex_format_private.h"
+
+#include "mtl_context.hh"
+#include "mtl_debug.hh"
+#include "mtl_immediate.hh"
+#include "mtl_primitive.hh"
+#include "mtl_shader.hh"
+
+namespace blender::gpu {
+
+MTLImmediate::MTLImmediate(MTLContext *ctx)
+{
+  context_ = ctx;
+}
+
+MTLImmediate::~MTLImmediate()
+{
+}
+
+uchar *MTLImmediate::begin()
+{
+  BLI_assert(!has_begun_);
+
+  /* Determine primitive type. */
+  metal_primitive_type_ = gpu_prim_type_to_metal(this->prim_type);
+  metal_primitive_mode_ = mtl_prim_type_to_topology_class(metal_primitive_type_);
+  has_begun_ = true;
+
+  /* Allocate a range of data and return host-accessible pointer. */
+  const size_t bytes_needed = vertex_buffer_size(&vertex_format, vertex_len);
+  current_allocation_ = context_->get_scratchbuffer_manager()
+                            .scratch_buffer_allocate_range_aligned(bytes_needed, 256);
+  [current_allocation_.metal_buffer retain];
+  return reinterpret_cast<uchar *>(current_allocation_.data);
+}
+
+void MTLImmediate::end()
+{
+  /* Ensure we're between a imm::begin/imm:end pair. */
+  BLI_assert(has_begun_);
+  BLI_assert(prim_type != GPU_PRIM_NONE);
+
+  /* Verify context is valid, vertex data is written and a valid shader is bound. */
+  if (context_ && this->vertex_idx > 0 && this->shader) {
+
+    MTLShader *active_mtl_shader = static_cast<MTLShader *>(unwrap(shader));
+
+    /* Skip draw if Metal shader is not valid. */
+    if (active_mtl_shader == nullptr || !active_mtl_shader->is_valid() ||
+        active_mtl_shader->get_interface() == nullptr) {
+
+      const char *ptr = (active_mtl_shader) ? active_mtl_shader->name_get() : nullptr;
+      MTL_LOG_WARNING(
+          "MTLImmediate::end -- cannot perform draw as active shader is NULL or invalid (likely "
+          "unimplemented) (shader %p '%s')\n",
+          active_mtl_shader,
+          ptr);
+      return;
+    }
+
+    /* Ensure we are inside a render pass and fetch active RenderCommandEncoder. */
+    id<MTLRenderCommandEncoder> rec = context_->ensure_begin_render_pass();
+    BLI_assert(rec != nil);
+
+    /* Fetch active render pipeline state. */
+    MTLRenderPassState &rps = context_->main_command_buffer.get_render_pass_state();
+
+    /* Bind Shader. */
+    GPU_shader_bind(this->shader);
+
+    /* Debug markers for frame-capture and detailed error messages. */
+    if (G.debug & G_DEBUG_GPU) {
+      [rec pushDebugGroup:[NSString
+                              stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                               this->vertex_idx,
+                                               active_mtl_shader->get_interface()->get_name()]];
+      [rec insertDebugSignpost:[NSString stringWithFormat:@"immEnd(verts: %d, shader: %s)",
+                                                          this->vertex_idx,
+                                                          active_mtl_shader->get_interface()
+                                                              ->get_name()]];
+    }
+
+    /* Populate pipeline state vertex descriptor. */
+    MTLStateManager *state_manager = static_cast<MTLStateManager *>(
+        MTLContext::get()->state_manager);
+    MTLRenderPipelineStateDescriptor &desc = state_manager->get_pipeline_descriptor();
+    const MTLShaderInterface *interface = active_mtl_shader->get_interface();
+
+    desc.vertex_descriptor.num_attributes = interface->get_total_attributes();
+    desc.vertex_descriptor.num_vert_buffers = 1;
+
+    for (int i = 0; i < desc.vertex_descriptor.num_attributes; i++) {
+      desc.vertex_descriptor.attributes[i].format = MTLVertexFormatInvalid;
+    }
+    desc.vertex_descriptor.uses_ssbo_vertex_fetch =
+        active_mtl_shader->get_uses_ssbo_vertex_fetch();
+    desc.vertex_descriptor.num_ssbo_attributes = 0;
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_begin();
+
+      /* Disable Indexed rendering in SSBO vertex fetch. */
+      int uniform_ssbo_use_indexed = active_mtl_shader->uni_ssbo_uses_indexed_rendering;
+      BLI_assert_msg(uniform_ssbo_use_indexed != -1, "Expected valid uniform location for ssbo_uses_indexed_rendering.");
+      int uses_indexed_rendering = 0;
+      active_mtl_shader->uniform_int(uniform_ssbo_use_indexed, 1, 1, &uses_indexed_rendering);
+    }
+
+    /* Populate Vertex descriptor and verify attributes.
+     * TODO(Metal): Cache this vertex state based on Vertex format and shaders. */
+    for (int i = 0; i < interface->get_total_attributes(); i++) {
+
+      /* Note: Attribute in VERTEX FORMAT does not necessarily share the same array index as
+       * attributes in shader interface. */
+      GPUVertAttr *attr = nullptr;
+      const MTLShaderInputAttribute &mtl_shader_attribute = interface->get_attribute(i);
+
+      /* Scan through vertex_format attributes until one with a name matching the shader interface
+       * is found. */
+      for (uint32_t a_idx = 0; a_idx < this->vertex_format.attr_len && attr == nullptr; a_idx++) {
+        GPUVertAttr *check_attribute = &this->vertex_format.attrs[a_idx];
+
+        /* Attributes can have multiple name aliases associated with them. */
+        for (uint32_t n_idx = 0; n_idx < check_attribute->name_len; n_idx++) {
+          const char *name = GPU_vertformat_attr_name_get(
+              &this->vertex_format, check_attribute, n_idx);
+
+          if (strcmp(name, interface->get_name_at_offset(mtl_shader_attribute.name_offset)) == 0) {
+            attr = check_attribute;
+            break;
+          }
+        }
+      }
+
+      BLI_assert_msg(attr != nullptr,
+                     "Could not find expected attribute in immediate mode vertex format.");
+      if (attr == nullptr) {
+        MTL_LOG_ERROR(
+            "MTLImmediate::end Could not find matching attribute '%s' from Shader Interface in "
+            "Vertex Format! - TODO: Bind Dummy attribute\n",
+            interface->get_name_at_offset(mtl_shader_attribute.name_offset));
+        return;
+      }
+
+      /* Determine whether implicit type conversion between input vertex format
+       * and shader interface vertex format is supported. */
+      MTLVertexFormat convertedFormat;
+      bool can_use_implicit_conversion = mtl_convert_vertex_format(
+          mtl_shader_attribute.format,
+          (GPUVertCompType)attr->comp_type,
+          attr->comp_len,
+          (GPUVertFetchMode)attr->fetch_mode,
+          &convertedFormat);
+
+      if (can_use_implicit_conversion) {
+        /* Metal API can implicitly convert some formats during vertex assembly:
+         * - Converting from a normalized short2 format to float2
+         * - Type truncation e.g. Float4 to Float2.
+         * - Type expansion from Float3 to Float4.
+         * - Note: extra components are filled with the corresponding components of (0,0,0,1).
+         * (See
+         * https://developer.apple.com/documentation/metal/mtlvertexattributedescriptor/1516081-format)
+         */
+        bool is_floating_point_format = (attr->comp_type == GPU_COMP_F32);
+        desc.vertex_descriptor.attributes[i].format = convertedFormat;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode =
+            (is_floating_point_format) ? (GPUVertFetchMode)GPU_FETCH_FLOAT :
+                                         (GPUVertFetchMode)GPU_FETCH_INT;
+        BLI_assert(convertedFormat != MTLVertexFormatInvalid);
+      }
+      else {
+        /* Some conversions are NOT valid, e.g. Int4 to Float4
+         * - In this case, we need to implement a conversion routine inside the shader.
+         * - This is handled using the format_conversion_mode flag
+         * - This flag is passed into the PSO as a function specialisation,
+         *   and will generate an appropriate conversion function when reading the vertex attribute
+         *   value into local shader storage.
+         *   (If no explicit conversion is needed, the function specialize to a pass-through). */
+        MTLVertexFormat converted_format;
+        bool can_convert = mtl_vertex_format_resize(
+            mtl_shader_attribute.format, attr->comp_len, &converted_format);
+        desc.vertex_descriptor.attributes[i].format = (can_convert) ? converted_format :
+                                                                      mtl_shader_attribute.format;
+        desc.vertex_descriptor.attributes[i].format_conversion_mode = (GPUVertFetchMode)
+                                                                          attr->fetch_mode;
+        BLI_assert(desc.vertex_descriptor.attributes[i].format != MTLVertexFormatInvalid);
+      }
+      /* Using attribute offset in vertex format, as this will be correct */
+      desc.vertex_descriptor.attributes[i].offset = attr->offset;
+      desc.vertex_descriptor.attributes[i].buffer_index = mtl_shader_attribute.buffer_index;
+
+      /* SSBO Vertex Fetch Attribute bind. */
+      if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+        BLI_assert_msg(mtl_shader_attribute.buffer_index == 0,
+                   "All attributes should be in buffer index zero");
+        MTLSSBOAttribute ssbo_attr(
+            mtl_shader_attribute.index,
+            mtl_shader_attribute.buffer_index,
+            attr->offset,
+            this->vertex_format.stride,
+            MTLShader::ssbo_vertex_type_to_attr_type(desc.vertex_descriptor.attributes[i].format),
+            false);
+        desc.vertex_descriptor.ssbo_attributes[desc.vertex_descriptor.num_ssbo_attributes] =
+            ssbo_attr;
+        desc.vertex_descriptor.num_ssbo_attributes++;
+        active_mtl_shader->ssbo_vertex_fetch_bind_attribute(ssbo_attr);
+      }
+    }
+
+    /* Buffer bindings for singular vertex buffer. */
+    desc.vertex_descriptor.buffer_layouts[0].step_function = MTLVertexStepFunctionPerVertex;
+    desc.vertex_descriptor.buffer_layouts[0].step_rate = 1;
+    desc.vertex_descriptor.buffer_layouts[0].stride = this->vertex_format.stride;
+    BLI_assert(this->vertex_format.stride > 0);
+
+    /* SSBO Vertex Fetch -- Verify Attributes. */
+    if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+      active_mtl_shader->ssbo_vertex_fetch_bind_attributes_end(rec);
+
+      /* Set Status uniforms. */
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_prim_type_loc != -1,
+                     "ssbo_input_prim_type uniform location invalid!");
+      BLI_assert_msg(active_mtl_shader->uni_ssbo_input_vert_count_loc != -1,
+                     "ssbo_input_vert_count uniform location invalid!");
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_prim_type_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->prim_type));
+      GPU_shader_uniform_vector_int(reinterpret_cast<GPUShader *>(wrap(active_mtl_shader)),
+                                    active_mtl_shader->uni_ssbo_input_vert_count_loc,
+                                    1,
+                                    1,
+                                    (const int *)(&this->vertex_idx));
+    }
+
+    MTLPrimitiveType mtl_prim_type = gpu_prim_type_to_metal(this->prim_type);
+    if (context_->ensure_render_pipeline_state(mtl_prim_type)) {
+
+      /* Issue draw call. */
+      BLI_assert(this->vertex_idx > 0);
+
+      /* Metal API does not support triangle fan, so we can emulate this
+       * input data by generating an index buffer to re-map indices to
+       * a TriangleList.
+       *
+       * NOTE(Metal): Consider caching generated triangle fan index buffers.
+       * For immediate mode, generating these is currently very cheap, as we use
+       * fast scratch buffer allocations. Though we may benefit from caching of
+       * frequently used buffer sizes. */
+      if (mtl_needs_topology_emulation(this->prim_type)) {
+
+        /* Debug safety check for SSBO FETCH MODE. */
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+          BLI_assert(false && "Topology emulation not supported with SSBO Vertex Fetch mode");
+        }
+
+        /* Emulate Tri-fan. */
+        if (this->prim_type == GPU_PRIM_TRI_FAN) {
+          /* Prepare Triangle-Fan emulation index buffer on CPU based on number of input
+           * vertices. */
+          uint32_t base_vert_count = this->vertex_idx;
+          uint32_t num_triangles = max_ii(base_vert_count - 2, 0);
+          uint32_t fan_index_count = num_triangles * 3;
+          BLI_assert(num_triangles > 0);
+
+          uint32_t alloc_size = sizeof(uint32_t) * fan_index_count;
+          uint32_t *index_buffer = nullptr;
+
+          MTLTemporaryBuffer allocation =
+              context_->get_scratchbuffer_manager().scratch_buffer_allocate_range_aligned(
+                  alloc_size, 128);
+          index_buffer = (uint32_t *)allocation.data;
+
+          int a = 0;
+          for (int i = 0; i < num_triangles; i++) {
+            index_buffer[a++] = 0;
+            index_buffer[a++] = i + 1;
+            index_buffer[a++] = i + 2;
+          }
+
+          @autoreleasepool {
+
+            id<MTLBuffer> index_buffer_mtl = nil;
+            uint32_t index_buffer_offset = 0;
+
+            /* Region of scratch buffer used for topology emulation element data.
+             * NOTE(Metal): We do not need to manually flush as the entire scratch
+             * buffer for current command buffer is flushed upon submission. */
+            index_buffer_mtl = allocation.metal_buffer;
+            index_buffer_offset = allocation.buffer_offset;
+
+            /* Set depth stencil state (requires knowledge of primitive type). */
+            context_->ensure_depth_stencil_state(MTLPrimitiveTypeTriangle);
+
+            /* Bind Vertex Buffer. */
+            rps.bind_vertex_buffer(
+                current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+            /* Draw. */
+            [rec drawIndexedPrimitives:MTLPrimitiveTypeTriangle
+                            indexCount:fan_index_count
+                             indexType:MTLIndexTypeUInt32
+                           indexBuffer:index_buffer_mtl
+                     indexBufferOffset:index_buffer_offset];
+          }
+        }
+        else {
+          /* TODO(Metal): Topology emulation for line loop.
+           * NOTE(Metal): This is currently not used anywhere and modified at the high
+           * level for efficiency in such cases. */
+          BLI_assert_msg(false, "LineLoop requires emulation support in immediate mode.");
+        }
+      }
+      else {
+        MTLPrimitiveType primitive_type = metal_primitive_type_;
+        int vertex_count = this->vertex_idx;
+
+        /* Bind Vertex Buffer. */
+        rps.bind_vertex_buffer(
+            current_allocation_.metal_buffer, current_allocation_.buffer_offset, 0);
+
+        /* Set depth stencil state (requires knowledge of primitive type). */
+        context_->ensure_depth_stencil_state(primitive_type);
+
+        if (active_mtl_shader->get_uses_ssbo_vertex_fetch()) {
+
+          /* Bind Null Buffers for empty/missing bind slots. */
+          id<MTLBuffer> null_buffer = context_->get_null_buffer();
+          BLI_assert(null_buffer != nil);
+          for (int i = 1; i < MTL_SSBO_VERTEX_FETCH_MAX_VBOS; i++) {
+
+            /* We only need to ensure a buffer is bound to the context, its contents do not matter
+             * as it will not be used. */
+            if (rps.cached_vertex_buffer_bindings[i].metal_buffer == nil) {
+              rps.bind_vertex_buffer(null_buffer, 0, i);
+            }
+          }
+
+          /* SSBO vertex fetch - Nullify elements buffer. */
+          if (rps.cached_vertex_buffer_bindings[MTL_SSBO_VERTEX_FETCH_IBO_INDEX].metal_buffer ==
+              nil) {
+            rps.bind_vertex_buffer(null_buffer, 0, MTL_SSBO_VERTEX_FETCH_IBO_INDEX);
+          }
+
+          /* Submit draw call with modified vertex count, which reflects vertices per primitive
+           * defined in the USE_SSBO_VERTEX_FETCH pragma. */
+          int num_input_primitives = gpu_get_prim_count_from_type(vertex_count, this->prim_type);
+          int output_num_verts = num_input_primitives *
+                                 active_mtl_shader->get_ssbo_vertex_fetch_output_num_verts();
+#ifndef NDEBUG
+          BLI_assert(
+              mtl_vertex_count_fits_primitive_type(
+                  output_num_verts, active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()) &&
+              "Output Vertex count is not compatible with the requested output vertex primitive "
+              "type");
+#endif
+          [rec drawPrimitives:active_mtl_shader->get_ssbo_vertex_fetch_output_prim_type()
+                  vertexStart:0
+                  vertexCount:output_num_verts];
+          context_->main_command_buffer.register_draw_counters(output_num_verts);
+        }
+        else {
+          /* Regular draw. */
+          [rec drawPrimitives:primitive_type vertexStart:0 vertexCount:vertex_count];
+          context_->main_command_buffer.register_draw_counters(vertex_count);
+        }
+      }
+    }
+    if (G.debug & G_DEBUG_GPU) {
+      [rec popDebugGroup];
+    }
+  }
+
+  /* Reset allocation after draw submission. */
+  has_begun_ = false;
+  if (current_allocation_.metal_buffer) {
+    [current_allocation_.metal_buffer release];
+    current_allocation_.metal_buffer = nil;
+  }
+}
+
+}  // blender::gpu
--- a/source/blender/gpu/metal/mtl_memory.hh
+++ b/source/blender/gpu/metal/mtl_memory.hh
@@ -340,13 +340,13 @@ class MTLBufferPool {

 private:
  /* Memory statistics. */
-  long long int total_allocation_bytes_ = 0;
+  int64_t total_allocation_bytes_ = 0;

 #if MTL_DEBUG_MEMORY_STATISTICS == 1
  /* Debug statistics. */
  std::atomic<int> per_frame_allocation_count_;
-  std::atomic<long long int> allocations_in_pool_;
-  std::atomic<long long int> buffers_in_pool_;
+  std::atomic<int64_t> allocations_in_pool_;
+  std::atomic<int64_t> buffers_in_pool_;
 #endif

  /* Metal resources. */
--- a/source/blender/gpu/metal/mtl_shader.hh
+++ b/source/blender/gpu/metal/mtl_shader.hh
@@ -261,8 +261,6 @@ class MTLShader : public Shader {
  bool get_push_constant_is_dirty();
  void push_constant_bindstate_mark_dirty(bool is_dirty);

-  void vertformat_from_shader(GPUVertFormat *format) const override;
-
  /* DEPRECATED: Kept only because of BGL API. (Returning -1 in METAL). */
  int program_handle_get() const override
  {
--- a/source/blender/gpu/metal/mtl_shader.mm
+++ b/source/blender/gpu/metal/mtl_shader.mm
@@ -129,6 +129,7 @@ MTLShader::~MTLShader()

  if (shd_builder_ != nullptr) {
    delete shd_builder_;
+    shd_builder_ = nullptr;
  }
 }

@@ -209,6 +210,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)

      /* Release temporary compilation resources. */
      delete shd_builder_;
+      shd_builder_ = nullptr;
      return false;
    }
  }
@@ -279,6 +281,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)

          /* Release temporary compilation resources. */
          delete shd_builder_;
+          shd_builder_ = nullptr;
          return false;
        }
      }
@@ -324,6 +327,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)

  /* Release temporary compilation resources. */
  delete shd_builder_;
+  shd_builder_ = nullptr;
  return true;
 }

@@ -535,28 +539,6 @@ void MTLShader::push_constant_bindstate_mark_dirty(bool is_dirty)
 {
  push_constant_modified_ = is_dirty;
 }
-
-void MTLShader::vertformat_from_shader(GPUVertFormat *format) const
-{
-  GPU_vertformat_clear(format);
-
-  const MTLShaderInterface *mtl_interface = static_cast<const MTLShaderInterface *>(interface);
-  for (const uint attr_id : IndexRange(mtl_interface->get_total_attributes())) {
-    const MTLShaderInputAttribute &attr = mtl_interface->get_attribute(attr_id);
-
-    /* Extract type parameters from Metal type. */
-    GPUVertCompType comp_type = comp_type_from_vert_format(attr.format);
-    uint comp_len = comp_count_from_vert_format(attr.format);
-    GPUVertFetchMode fetch_mode = fetchmode_from_vert_format(attr.format);
-
-    GPU_vertformat_attr_add(format,
-                            mtl_interface->get_name_at_offset(attr.name_offset),
-                            comp_type,
-                            comp_len,
-                            fetch_mode);
-  }
-}
-
 /** \} */

 /* -------------------------------------------------------------------- */
@@ -1167,6 +1149,7 @@ void MTLShader::ssbo_vertex_fetch_bind_attribute(const MTLSSBOAttribute &ssbo_at
  MTLShaderInterface *mtl_interface = this->get_interface();
  BLI_assert(ssbo_attr.mtl_attribute_index >= 0 &&
             ssbo_attr.mtl_attribute_index < mtl_interface->get_total_attributes());
+  UNUSED_VARS_NDEBUG(mtl_interface);

  /* Update bind-mask to verify this attribute has been used. */
  BLI_assert((ssbo_vertex_attribute_bind_mask_ & (1 << ssbo_attr.mtl_attribute_index)) ==
--- a/source/blender/gpu/metal/mtl_shader_generator.mm
+++ b/source/blender/gpu/metal/mtl_shader_generator.mm
@@ -724,10 +724,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
  }
  if (msl_iface.uses_ssbo_vertex_fetch_mode) {
    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH 1" << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_MAX_VBOS " << MTL_SSBO_VERTEX_FETCH_MAX_VBOS
-              << std::endl;
-    ss_vertex << "#define MTL_SSBO_VERTEX_FETCH_IBO_INDEX " << MTL_SSBO_VERTEX_FETCH_IBO_INDEX
-              << std::endl;
    for (const MSLVertexInputAttribute &attr : msl_iface.vertex_input_attributes) {
      ss_vertex << "#define SSBO_ATTR_TYPE_" << attr.name << " " << attr.type << std::endl;
    }
--- a/source/blender/gpu/metal/mtl_texture.mm
+++ b/source/blender/gpu/metal/mtl_texture.mm
@@ -12,6 +12,7 @@
 #include "GPU_batch_presets.h"
 #include "GPU_capabilities.h"
 #include "GPU_framebuffer.h"
+#include "GPU_immediate.h"
 #include "GPU_platform.h"
 #include "GPU_state.h"

@@ -303,7 +304,6 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,

  /* Execute graphics draw call to perform the blit. */
  GPUBatch *quad = GPU_batch_preset_quad();
-
  GPU_batch_set_shader(quad, shader);

  float w = dst->width_get();
@@ -337,6 +337,20 @@ void gpu::MTLTexture::blit(gpu::MTLTexture *dst,

  GPU_batch_draw(quad);

+  /* TMP draw with IMM TODO(Metal): Remove this once GPUBatch is supported. */
+  GPUVertFormat *imm_format = immVertexFormat();
+  uint pos = GPU_vertformat_attr_add(imm_format, "pos", GPU_COMP_F32, 2, GPU_FETCH_FLOAT);
+
+  immBindShader(shader);
+  immBegin(GPU_PRIM_TRI_STRIP, 4);
+  immVertex2f(pos, 1, 0);
+  immVertex2f(pos, 0, 0);
+  immVertex2f(pos, 1, 1);
+  immVertex2f(pos, 0, 1);
+  immEnd();
+  immUnbindProgram();
+  /**********************/
+
  /* restoring old pipeline state. */
  GPU_depth_mask(depth_write_prev);
  GPU_stencil_write_mask_set(stencil_mask_prev);
@@ -1463,79 +1477,6 @@ bool gpu::MTLTexture::init_internal(GPUVertBuf *vbo)
  BLI_assert_msg(this->format_ != GPU_DEPTH24_STENCIL8,
                 "Apple silicon does not support GPU_DEPTH24_S8");

-  MTLPixelFormat mtl_format = gpu_texture_format_to_metal(this->format_);
-  mtl_max_mips_ = 1;
-  mipmaps_ = 0;
-  this->mip_range_set(0, 0);
-
-  /* Create texture from GPUVertBuf's buffer. */
-  MTLVertBuf *mtl_vbo = static_cast<MTLVertBuf *>(unwrap(vbo));
-  mtl_vbo->bind();
-  mtl_vbo->flag_used();
-
-  /* Get Metal Buffer. */
-  id<MTLBuffer> source_buffer = mtl_vbo->get_metal_buffer();
-  BLI_assert(source_buffer);
-
-  /* Verify size. */
-  if (w_ <= 0) {
-    MTL_LOG_WARNING("Allocating texture buffer of width 0!\n");
-    w_ = 1;
-  }
-
-  /* Verify Texture and vertex buffer alignment. */
-  int bytes_per_pixel = get_mtl_format_bytesize(mtl_format);
-  int bytes_per_row = bytes_per_pixel * w_;
-
-  MTLContext *mtl_ctx = MTLContext::get();
-  uint align_requirement = static_cast<uint>(
-      [mtl_ctx->device minimumLinearTextureAlignmentForPixelFormat:mtl_format]);
-
-  /* Verify per-vertex size aligns with texture size. */
-  const GPUVertFormat *format = GPU_vertbuf_get_format(vbo);
-  BLI_assert(bytes_per_pixel == format->stride &&
-             "Pixel format stride MUST match the texture format stride -- These being different "
-             "is likely caused by Metal's VBO padding to a minimum of 4-bytes per-vertex");
-  UNUSED_VARS_NDEBUG(format);
-
-  /* Create texture descriptor. */
-  BLI_assert(type_ == GPU_TEXTURE_BUFFER);
-  texture_descriptor_ = [[MTLTextureDescriptor alloc] init];
-  texture_descriptor_.pixelFormat = mtl_format;
-  texture_descriptor_.textureType = MTLTextureTypeTextureBuffer;
-  texture_descriptor_.width = w_;
-  texture_descriptor_.height = 1;
-  texture_descriptor_.depth = 1;
-  texture_descriptor_.arrayLength = 1;
-  texture_descriptor_.mipmapLevelCount = mtl_max_mips_;
-  texture_descriptor_.usage =
-      MTLTextureUsageShaderRead | MTLTextureUsageShaderWrite |
-      MTLTextureUsagePixelFormatView; /* TODO(Metal): Optimize usage flags. */
-  texture_descriptor_.storageMode = [source_buffer storageMode];
-  texture_descriptor_.sampleCount = 1;
-  texture_descriptor_.cpuCacheMode = [source_buffer cpuCacheMode];
-  texture_descriptor_.hazardTrackingMode = [source_buffer hazardTrackingMode];
-
-  texture_ = [source_buffer
-      newTextureWithDescriptor:texture_descriptor_
-                        offset:0
-                   bytesPerRow:ceil_to_multiple_u(bytes_per_row, align_requirement)];
-  aligned_w_ = bytes_per_row / bytes_per_pixel;
-
-  BLI_assert(texture_);
-  texture_.label = [NSString stringWithUTF8String:this->get_name()];
-  is_baked_ = true;
-  is_dirty_ = false;
-  resource_mode_ = MTL_TEXTURE_MODE_VBO;
-
-  /* Track Status. */
-  vert_buffer_ = mtl_vbo;
-  vert_buffer_mtl_ = source_buffer;
-
-  /* Cleanup. */
-  [texture_descriptor_ release];
-  texture_descriptor_ = nullptr;
-
  return true;
 }

--- a/source/blender/gpu/metal/mtl_texture_util.mm
+++ b/source/blender/gpu/metal/mtl_texture_util.mm
@@ -22,13 +22,7 @@
 /* Utility file for secondary functionality which supports mtl_texture.mm. */

 extern char datatoc_compute_texture_update_msl[];
-extern char datatoc_depth_2d_update_vert_glsl[];
-extern char datatoc_depth_2d_update_float_frag_glsl[];
-extern char datatoc_depth_2d_update_int24_frag_glsl[];
-extern char datatoc_depth_2d_update_int32_frag_glsl[];
 extern char datatoc_compute_texture_read_msl[];
-extern char datatoc_gpu_shader_fullscreen_blit_vert_glsl[];
-extern char datatoc_gpu_shader_fullscreen_blit_frag_glsl[];

 namespace blender::gpu {

@@ -447,42 +441,34 @@ GPUShader *gpu::MTLTexture::depth_2d_update_sh_get(
    return *result;
  }

-  const char *fragment_source = nullptr;
+  const char *depth_2d_info_variant = nullptr;
  switch (specialization.data_mode) {
    case MTL_DEPTH_UPDATE_MODE_FLOAT:
-      fragment_source = datatoc_depth_2d_update_float_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_float";
      break;
    case MTL_DEPTH_UPDATE_MODE_INT24:
-      fragment_source = datatoc_depth_2d_update_int24_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int24";
      break;
    case MTL_DEPTH_UPDATE_MODE_INT32:
-      fragment_source = datatoc_depth_2d_update_int32_frag_glsl;
+      depth_2d_info_variant = "depth_2d_update_int32";
      break;
    default:
      BLI_assert(false && "Invalid format mode\n");
      return nullptr;
  }

-  GPUShader *shader = GPU_shader_create(datatoc_depth_2d_update_vert_glsl,
-                                        fragment_source,
-                                        nullptr,
-                                        nullptr,
-                                        nullptr,
-                                        "depth_2d_update_sh_get");
+  GPUShader *shader = GPU_shader_create_from_info_name(depth_2d_info_variant);
  mtl_context->get_texture_utils().depth_2d_update_shaders.add_new(specialization, shader);
  return shader;
 }

 GPUShader *gpu::MTLTexture::fullscreen_blit_sh_get()
 {
-
  MTLContext *mtl_context = static_cast<MTLContext *>(unwrap(GPU_context_active_get()));
  BLI_assert(mtl_context != nullptr);
  if (mtl_context->get_texture_utils().fullscreen_blit_shader == nullptr) {
-    const char *vertex_source = datatoc_gpu_shader_fullscreen_blit_vert_glsl;
-    const char *fragment_source = datatoc_gpu_shader_fullscreen_blit_frag_glsl;
-    GPUShader *shader = GPU_shader_create(
-        vertex_source, fragment_source, nullptr, nullptr, nullptr, "fullscreen_blit");
+    GPUShader *shader = GPU_shader_create_from_info_name("fullscreen_blit");
+
    mtl_context->get_texture_utils().fullscreen_blit_shader = shader;
  }
  return mtl_context->get_texture_utils().fullscreen_blit_shader;
@@ -614,7 +600,7 @@ id<MTLComputePipelineState> gpu::MTLTexture::mtl_texture_read_impl(
        stringWithUTF8String:datatoc_compute_texture_read_msl];

    /* Defensive Debug Checks. */
-    long long int depth_scale_factor = 1;
+    int64_t depth_scale_factor = 1;
    if (specialization_params.depth_format_mode > 0) {
      BLI_assert(specialization_params.component_count_input == 1);
      BLI_assert(specialization_params.component_count_output == 1);
--- a/source/blender/gpu/opengl/gl_backend.hh
+++ b/source/blender/gpu/opengl/gl_backend.hh
@@ -61,7 +61,7 @@ class GLBackend : public GPUBackend {
    GLTexture::samplers_update();
  };

-  Context *context_alloc(void *ghost_window) override
+  Context *context_alloc(void *ghost_window, void *ghost_context) override
  {
    return new GLContext(ghost_window, shared_orphan_list_);
  };
--- a/source/blender/gpu/tests/gpu_testing.cc
+++ b/source/blender/gpu/tests/gpu_testing.cc
@@ -19,7 +19,7 @@ void GPUTest::SetUp()
  ghost_system = GHOST_CreateSystem();
  ghost_context = GHOST_CreateOpenGLContext(ghost_system, glSettings);
  GHOST_ActivateOpenGLContext(ghost_context);
-  context = GPU_context_create(nullptr);
+  context = GPU_context_create(nullptr, ghost_context);
  GPU_init();
 }

--- a/source/blender/render/intern/pipeline.cc
+++ b/source/blender/render/intern/pipeline.cc
@@ -926,7 +926,7 @@ void *RE_gl_context_get(Render *re)
 void *RE_gpu_context_get(Render *re)
 {
  if (re->gpu_context == nullptr) {
-    re->gpu_context = GPU_context_create(nullptr);
+    re->gpu_context = GPU_context_create(NULL, re->gl_context);
  }
  return re->gpu_context;
 }
--- a/source/blender/windowmanager/intern/wm_playanim.c
+++ b/source/blender/windowmanager/intern/wm_playanim.c
@@ -1549,7 +1549,7 @@ static char *wm_main_playanim_intern(int argc, const char **argv)
  // GHOST_ActivateWindowDrawingContext(g_WS.ghost_window);

  /* initialize OpenGL immediate mode */
-  g_WS.gpu_context = GPU_context_create(g_WS.ghost_window);
+  g_WS.gpu_context = GPU_context_create(g_WS.ghost_window, NULL);
  GPU_init();

  /* initialize the font */
--- a/source/blender/windowmanager/intern/wm_window.c
+++ b/source/blender/windowmanager/intern/wm_window.c
@@ -579,7 +579,7 @@ static void wm_window_ghostwindow_add(wmWindowManager *wm,
                                                   glSettings);

  if (ghostwin) {
-    win->gpuctx = GPU_context_create(ghostwin);
+    win->gpuctx = GPU_context_create(ghostwin, NULL);

    /* needed so we can detect the graphics card below */
    GPU_init();