2024-02-29 17:15:09 +01:00
41 changed files with 1805 additions and 637 deletions
--- a/build_files/build_environment/install_linux_packages.py
+++ b/build_files/build_environment/install_linux_packages.py
@ -1628,7 +1628,7 @@ DISTRO_IDS_INSTALLERS = {
 def get_distro(settings):
    if settings.distro_id is not ...:
        settings.logger.info(f"Distribution identifier forced by user to {settings.distro_id}.")
-        return
+        return settings.distro_id
    import platform
    info = platform.freedesktop_os_release()
    ids = [info["ID"]]
--- a/intern/ghost/intern/GHOST_SystemWayland.cc
+++ b/intern/ghost/intern/GHOST_SystemWayland.cc
@ -4520,7 +4520,8 @@ static void xdg_output_handle_logical_size(void *data,
     * Until this is fixed, validate that _some_ kind of scaling is being
     * done (we can't match exactly because fractional scaling can't be
     * detected otherwise), then override if necessary. */
-    if ((output->size_logical[0] == width) && (output->scale_fractional == wl_fixed_from_int(1))) {
+    if ((output->size_logical[0] == width) &&
+        (output->scale_fractional == (1 * FRACTIONAL_DENOMINATOR))) {
      GHOST_PRINT("xdg_output scale did not match, overriding with wl_output scale\n");

 #ifdef USE_GNOME_CONFINE_HACK
@ -4667,7 +4668,7 @@ static void output_handle_done(void *data, struct wl_output * /*wl_output*/)
    GHOST_ASSERT(size_native[0] && output->size_logical[0],
                 "Screen size values were not set when they were expected to be.");

-    output->scale_fractional = wl_fixed_from_int(size_native[0]) / output->size_logical[0];
+    output->scale_fractional = (size_native[0] * FRACTIONAL_DENOMINATOR) / output->size_logical[0];
    output->has_scale_fractional = true;
  }
 }
@ -7063,6 +7064,7 @@ bool GHOST_SystemWayland::output_unref(wl_output *wl_output)
    for (GHOST_IWindow *iwin : window_manager->getWindows()) {
      GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
      if (win->outputs_leave(output)) {
+        win->outputs_changed_update_scale_tag();
        changed = true;
      }
    }
@ -7087,7 +7089,7 @@ void GHOST_SystemWayland::output_scale_update(GWL_Output *output)
      GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
      const std::vector<GWL_Output *> &outputs = win->outputs();
      if (!(std::find(outputs.begin(), outputs.end(), output) == outputs.cend())) {
-        win->outputs_changed_update_scale();
+        win->outputs_changed_update_scale_tag();
      }
    }
  }
--- a/intern/ghost/intern/GHOST_SystemWayland.hh
+++ b/intern/ghost/intern/GHOST_SystemWayland.hh
@ -67,6 +67,8 @@ wl_fixed_t gwl_window_scale_wl_fixed_from(const GWL_WindowScaleParams &scale_par
 int gwl_window_scale_int_to(const GWL_WindowScaleParams &scale_params, int value);
 int gwl_window_scale_int_from(const GWL_WindowScaleParams &scale_params, int value);

+#define FRACTIONAL_DENOMINATOR 120
+
 #ifdef WITH_GHOST_WAYLAND_DYNLOAD
 /**
 * Return true when all required WAYLAND libraries are present,
@ -100,10 +102,10 @@ struct GWL_Output {
   * as this is what is used for most API calls.
   * Only use fractional scaling to calculate the DPI.
   *
-   * \note Internally an #wl_fixed_t is used to store the scale of the display,
-   * so use the same value here (avoid floating point arithmetic in general).
+   * \note Use the same scale as #wp_fractional_scale_manager_v1
+   * (avoid floating point arithmetic in general).
   */
-  wl_fixed_t scale_fractional = wl_fixed_from_int(1);
+  int scale_fractional = (1 * FRACTIONAL_DENOMINATOR);
  bool has_scale_fractional = false;

  std::string make;
--- a/intern/ghost/intern/GHOST_WindowWayland.cc
+++ b/intern/ghost/intern/GHOST_WindowWayland.cc
@ -40,7 +40,6 @@
 #include <xdg-activation-v1-client-protocol.h>
 #include <xdg-decoration-unstable-v1-client-protocol.h>
 #include <xdg-shell-client-protocol.h>
-#define FRACTIONAL_DENOMINATOR 120

 #include <atomic>

@ -150,6 +149,8 @@ enum eGWL_PendingWindowActions {
   * this window is visible on may have changed. Recalculate the windows scale.
   */
  PENDING_OUTPUT_SCALE_UPDATE,
+
+  PENDING_WINDOW_SURFACE_SCALE,
  /**
   * The surface needs a commit to run.
   * Use this to avoid committing immediately which can cause flickering when other operations
@ -175,8 +176,16 @@ struct GWL_WindowFrame {
  bool is_active = false;
  /** Disable when the fractional scale is a whole number. */
  int fractional_scale = 0;
+  /**
+   * Store the value of #wp_fractional_scale_v1_listener::preferred_scale
+   * before it's applied.
+   */
+  int fractional_scale_preferred = 0;
  /** The scale passed to #wl_surface_set_buffer_scale. */
  int buffer_scale = 0;
+
+  /** Scale has been set (for the first time). */
+  bool is_scale_init = false;
 };

 struct GWL_Window {
@ -417,7 +426,9 @@ static int gwl_window_fractional_from_viewport_round(const GWL_WindowFrame &fram
  return lroundf(double(value * FRACTIONAL_DENOMINATOR) / double(frame.fractional_scale));
 }

-static bool gwl_window_viewport_set(GWL_Window *win, bool *r_surface_needs_commit)
+static bool gwl_window_viewport_set(GWL_Window *win,
+                                    bool *r_surface_needs_commit,
+                                    bool *r_surface_needs_buffer_scale)
 {
  if (win->viewport != nullptr) {
    return false;
@ -434,7 +445,14 @@ static bool gwl_window_viewport_set(GWL_Window *win, bool *r_surface_needs_commi
  /* Set the buffer scale to 1 since a viewport will be used. */
  if (win->frame.buffer_scale != 1) {
    win->frame.buffer_scale = 1;
-    wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+
+    if (r_surface_needs_buffer_scale) {
+      *r_surface_needs_buffer_scale = true;
+    }
+    else {
+      wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+    }
+
    if (r_surface_needs_commit) {
      *r_surface_needs_commit = true;
    }
@ -446,7 +464,9 @@ static bool gwl_window_viewport_set(GWL_Window *win, bool *r_surface_needs_commi
  return true;
 }

-static bool gwl_window_viewport_unset(GWL_Window *win, bool *r_surface_needs_commit)
+static bool gwl_window_viewport_unset(GWL_Window *win,
+                                      bool *r_surface_needs_commit,
+                                      bool *r_surface_needs_buffer_scale)
 {
  if (win->viewport == nullptr) {
    return false;
@ -458,7 +478,14 @@ static bool gwl_window_viewport_unset(GWL_Window *win, bool *r_surface_needs_com
  GHOST_ASSERT(win->frame.buffer_scale == 1, "Unexpected scale!");
  if (win->frame_pending.buffer_scale != win->frame.buffer_scale) {
    win->frame.buffer_scale = win->frame_pending.buffer_scale;
-    wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+
+    if (r_surface_needs_buffer_scale) {
+      *r_surface_needs_buffer_scale = true;
+    }
+    else {
+      wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+    }
+
    if (r_surface_needs_commit) {
      *r_surface_needs_commit = true;
    }
@ -548,7 +575,8 @@ static void gwl_window_activate(GWL_Window *win)
 * \{ */

 static void gwl_window_frame_pending_fractional_scale_set(GWL_Window *win,
-                                                          bool *r_surface_needs_commit)
+                                                          bool *r_surface_needs_commit,
+                                                          bool *r_surface_needs_buffer_scale)
 {
  if (win->frame_pending.fractional_scale == win->frame.fractional_scale &&
      win->frame_pending.buffer_scale == win->frame.buffer_scale) {
@ -557,16 +585,21 @@ static void gwl_window_frame_pending_fractional_scale_set(GWL_Window *win,

  if (win->frame_pending.fractional_scale) {
    win->frame.fractional_scale = win->frame_pending.fractional_scale;
-    gwl_window_viewport_set(win, r_surface_needs_commit);
+    gwl_window_viewport_set(win, r_surface_needs_commit, r_surface_needs_buffer_scale);
    gwl_window_viewport_size_update(win);
  }
  else {
    if (win->viewport) {
-      gwl_window_viewport_unset(win, r_surface_needs_commit);
+      gwl_window_viewport_unset(win, r_surface_needs_commit, r_surface_needs_buffer_scale);
    }
    else {
      win->frame.buffer_scale = win->frame_pending.buffer_scale;
-      wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+      if (r_surface_needs_buffer_scale) {
+        *r_surface_needs_buffer_scale = true;
+      }
+      else {
+        wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+      }
      if (r_surface_needs_commit) {
        *r_surface_needs_commit = true;
      }
@ -577,7 +610,10 @@ static void gwl_window_frame_pending_fractional_scale_set(GWL_Window *win,
  }
 }

-static void gwl_window_frame_pending_size_set(GWL_Window *win, bool *r_surface_needs_commit)
+static void gwl_window_frame_pending_size_set(GWL_Window *win,
+                                              bool *r_surface_needs_commit,
+                                              bool *r_surface_needs_egl_resize,
+                                              bool *r_surface_needs_buffer_scale)
 {
  if (win->frame_pending.size[0] == 0 || win->frame_pending.size[1] == 0) {
    return;
@ -588,13 +624,19 @@ static void gwl_window_frame_pending_size_set(GWL_Window *win, bool *r_surface_n

  if (win->frame_pending.fractional_scale != win->frame.fractional_scale ||
      win->frame_pending.buffer_scale != win->frame.buffer_scale) {
-    gwl_window_frame_pending_fractional_scale_set(win, r_surface_needs_commit);
+    gwl_window_frame_pending_fractional_scale_set(
+        win, r_surface_needs_commit, r_surface_needs_buffer_scale);
  }
  else {
    gwl_window_viewport_size_update(win);
  }

-  wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
+  if (r_surface_needs_egl_resize) {
+    *r_surface_needs_egl_resize = true;
+  }
+  else {
+    wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
+  }

  win->ghost_window->notify_size();

@ -628,7 +670,6 @@ static void gwl_window_pending_actions_handle(GWL_Window *win)
    gwl_window_frame_update_from_pending(win);
  }
  if (actions[PENDING_EGL_WINDOW_RESIZE]) {
-    gwl_window_viewport_size_update(win);
    wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
  }
 #  ifdef GHOST_OPENGL_ALPHA
@ -639,6 +680,9 @@ static void gwl_window_pending_actions_handle(GWL_Window *win)
  if (actions[PENDING_OUTPUT_SCALE_UPDATE]) {
    win->ghost_window->outputs_changed_update_scale();
  }
+  if (actions[PENDING_WINDOW_SURFACE_SCALE]) {
+    wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+  }
  if (actions[PENDING_WINDOW_SURFACE_COMMIT]) {
    wl_surface_commit(win->wl_surface);
  }
@ -660,16 +704,42 @@ static void gwl_window_frame_update_from_pending_no_lock(GWL_Window *win)

  const bool dpi_changed = win->frame_pending.fractional_scale != win->frame.fractional_scale;
  bool surface_needs_commit = false;
+  bool surface_needs_egl_resize = false;
+  bool surface_needs_buffer_scale = false;

  if (win->frame_pending.size[0] != 0 && win->frame_pending.size[1] != 0) {
    if ((win->frame.size[0] != win->frame_pending.size[0]) ||
        (win->frame.size[1] != win->frame_pending.size[1])) {
-      gwl_window_frame_pending_size_set(win, &surface_needs_commit);
+      gwl_window_frame_pending_size_set(
+          win, &surface_needs_commit, &surface_needs_egl_resize, &surface_needs_buffer_scale);
    }
  }

-  if (win->fractional_scale_handle) {
-    gwl_window_frame_pending_fractional_scale_set(win, &surface_needs_commit);
+  if (win->frame_pending.fractional_scale || win->frame.fractional_scale) {
+    gwl_window_frame_pending_fractional_scale_set(
+        win, &surface_needs_commit, &surface_needs_buffer_scale);
+  }
+  else {
+    if (win->frame_pending.buffer_scale != win->frame.buffer_scale) {
+      win->frame.buffer_scale = win->frame_pending.buffer_scale;
+      surface_needs_buffer_scale = true;
+    }
+  }
+
+  if (surface_needs_egl_resize) {
+#ifdef USE_EVENT_BACKGROUND_THREAD
+    gwl_window_pending_actions_tag(win, PENDING_EGL_WINDOW_RESIZE);
+#else
+    wl_egl_window_resize(win->egl_window, UNPACK2(win->frame.size), 0, 0);
+#endif
+  }
+
+  if (surface_needs_buffer_scale) {
+#ifdef USE_EVENT_BACKGROUND_THREAD
+    gwl_window_pending_actions_tag(win, PENDING_WINDOW_SURFACE_SCALE);
+#else
+    wl_surface_set_buffer_scale(win->wl_surface, win->frame.buffer_scale);
+#endif
  }

  if (surface_needs_commit) {
@ -729,11 +799,26 @@ static int output_scale_cmp(const GWL_Output *output_a, const GWL_Output *output
  if (output_a->scale > output_b->scale) {
    return 1;
  }
+  if (output_a->has_scale_fractional || output_b->has_scale_fractional) {
+    const int scale_fractional_a = output_a->has_scale_fractional ?
+                                       output_a->scale_fractional :
+                                       (output_a->scale * FRACTIONAL_DENOMINATOR);
+    const int scale_fractional_b = output_b->has_scale_fractional ?
+                                       output_b->scale_fractional :
+                                       (output_b->scale * FRACTIONAL_DENOMINATOR);
+    if (scale_fractional_a < scale_fractional_b) {
+      return -1;
+    }
+    if (scale_fractional_a > scale_fractional_b) {
+      return 1;
+    }
+  }
  return 0;
 }

 static int outputs_max_scale_or_default(const std::vector<GWL_Output *> &outputs,
-                                        const int32_t scale_default)
+                                        const int32_t scale_default,
+                                        int *r_scale_fractional)
 {
  const GWL_Output *output_max = nullptr;
  for (const GWL_Output *reg_output : outputs) {
@ -743,9 +828,46 @@ static int outputs_max_scale_or_default(const std::vector<GWL_Output *> &outputs
  }

  if (output_max) {
+    if (r_scale_fractional) {
+      *r_scale_fractional = output_max->has_scale_fractional ?
+                                output_max->scale_fractional :
+                                (output_max->scale * FRACTIONAL_DENOMINATOR);
+    }
    return output_max->scale;
  }
+  if (r_scale_fractional) {
+    *r_scale_fractional = scale_default * FRACTIONAL_DENOMINATOR;
+  }
+  return scale_default;
+}

+static int outputs_uniform_scale_or_default(const std::vector<GWL_Output *> &outputs,
+                                            const int32_t scale_default,
+                                            int *r_scale_fractional)
+{
+  const GWL_Output *output_uniform = nullptr;
+  for (const GWL_Output *reg_output : outputs) {
+    if (!output_uniform) {
+      output_uniform = reg_output;
+    }
+    else if (output_scale_cmp(output_uniform, reg_output) != 0) {
+      /* Non-uniform. */
+      output_uniform = nullptr;
+      break;
+    }
+  }
+
+  if (output_uniform) {
+    if (r_scale_fractional) {
+      *r_scale_fractional = output_uniform->has_scale_fractional ?
+                                output_uniform->scale_fractional :
+                                (output_uniform->scale * FRACTIONAL_DENOMINATOR);
+    }
+    return output_uniform->scale;
+  }
+  if (r_scale_fractional) {
+    *r_scale_fractional = scale_default * FRACTIONAL_DENOMINATOR;
+  }
  return scale_default;
 }

@ -876,35 +998,10 @@ static void wp_fractional_scale_handle_preferred_scale(
            double(preferred_scale) / FRACTIONAL_DENOMINATOR);

  GWL_Window *win = static_cast<GWL_Window *>(data);
-  const bool is_fractional = (preferred_scale % FRACTIONAL_DENOMINATOR) != 0;

-  /* When non-fractional, never use fractional scaling! */
-  win->frame_pending.fractional_scale = is_fractional ? preferred_scale : 0;
-  win->frame_pending.buffer_scale = is_fractional ? 1 : preferred_scale / FRACTIONAL_DENOMINATOR;
-
-  const int scale_prev = win->frame.fractional_scale ?
-                             win->frame.fractional_scale :
-                             win->frame.buffer_scale * FRACTIONAL_DENOMINATOR;
-  const int scale_next = preferred_scale;
-
-  if (scale_prev != scale_next) {
-    /* Resize the window failing to do so results in severe flickering with a
-     * multi-monitor setup when multiple monitors have different scales.
-     *
-     * NOTE: some flickering is still possible even when resizing this
-     * happens when dragging the right hand side of the title-bar in KDE
-     * as expanding changed the size on the RHS, this may be up to the compositor to fix. */
-    for (size_t i = 0; i < ARRAY_SIZE(win->frame_pending.size); i++) {
-      const int value = win->frame_pending.size[i] ? win->frame_pending.size[i] :
-                                                     win->frame.size[i];
-      win->frame_pending.size[i] = lroundf(value * (double(scale_next) / double(scale_prev)));
-    }
-
-#ifdef USE_EVENT_BACKGROUND_THREAD
-    gwl_window_pending_actions_tag(win, PENDING_WINDOW_FRAME_CONFIGURE);
-#else
-    gwl_window_frame_update_from_pending(win);
-#endif
+  if (win->frame_pending.fractional_scale_preferred != int(preferred_scale)) {
+    win->frame_pending.fractional_scale_preferred = preferred_scale;
+    win->ghost_window->outputs_changed_update_scale_tag();
  }
 }

@ -1124,7 +1221,7 @@ static void surface_handle_enter(void *data,
  GWL_Output *reg_output = ghost_wl_output_user_data(wl_output);
  GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(data);
  if (win->outputs_enter(reg_output)) {
-    win->outputs_changed_update_scale();
+    win->outputs_changed_update_scale_tag();
  }
 }

@ -1141,7 +1238,7 @@ static void surface_handle_leave(void *data,
  GWL_Output *reg_output = ghost_wl_output_user_data(wl_output);
  GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(data);
  if (win->outputs_leave(reg_output)) {
-    win->outputs_changed_update_scale();
+    win->outputs_changed_update_scale_tag();
  }
 }

@ -1197,7 +1294,13 @@ GHOST_WindowWayland::GHOST_WindowWayland(GHOST_SystemWayland *system,
   *
   * Using the maximum scale is best as it results in the window first being smaller,
   * avoiding a large window flashing before it's made smaller. */
-  window_->frame.buffer_scale = outputs_max_scale_or_default(system_->outputs(), 1);
+  int fractional_scale = 0;
+  window_->frame.buffer_scale = outputs_uniform_scale_or_default(
+      system_->outputs(), 1, &fractional_scale);
+
+  if (fractional_scale / FRACTIONAL_DENOMINATOR != window_->frame.buffer_scale) {
+    window_->frame.buffer_scale = 1;
+  }
  window_->frame_pending.buffer_scale = window_->frame.buffer_scale;

  window_->frame.size[0] = int32_t(width);
@ -1443,7 +1546,7 @@ GHOST_TSuccess GHOST_WindowWayland::setClientSize(const uint32_t width, const ui
  window_->frame_pending.size[0] = width;
  window_->frame_pending.size[1] = height;

-  gwl_window_frame_pending_size_set(window_, nullptr);
+  gwl_window_frame_pending_size_set(window_, nullptr, nullptr, nullptr);

  return GHOST_kSuccess;
 }
@ -1830,6 +1933,15 @@ GHOST_TSuccess GHOST_WindowWayland::notify_decor_redraw()
 * Functionality only used for the WAYLAND implementation.
 * \{ */

+void GHOST_WindowWayland::outputs_changed_update_scale_tag()
+{
+#ifdef USE_EVENT_BACKGROUND_THREAD
+  gwl_window_pending_actions_tag(window_, PENDING_OUTPUT_SCALE_UPDATE);
+#else
+  outputs_changed_update_scale();
+#endif
+}
+
 bool GHOST_WindowWayland::outputs_changed_update_scale()
 {
 #ifdef USE_EVENT_BACKGROUND_THREAD
@ -1838,46 +1950,109 @@ bool GHOST_WindowWayland::outputs_changed_update_scale()
    return false;
  }
 #endif
+  int fractional_scale_next = -1;
+  int fractional_scale_from_output = 0;

-  if (window_->fractional_scale_handle) {
-    /* Let the #wp_fractional_scale_v1_listener::preferred_scale callback handle
-     * changes to the windows scale. */
-    return false;
-  }
+  int scale_next = outputs_max_scale_or_default(outputs(), 0, &fractional_scale_from_output);

-  const int scale_next = outputs_max_scale_or_default(outputs(), 0);
  if (UNLIKELY(scale_next == 0)) {
    return false;
  }
-  const int scale_curr = window_->frame.buffer_scale;
-  bool changed = false;
-
-  if (scale_next != scale_curr) {
-    window_->frame.buffer_scale = scale_next;
-    wl_surface_set_buffer_scale(window_->wl_surface, scale_next);

+  if (window_->fractional_scale_handle) {
 #ifdef USE_EVENT_BACKGROUND_THREAD
    std::lock_guard lock_frame_guard{window_->frame_pending_mutex};
 #endif
+    /* Let the #wp_fractional_scale_v1_listener::preferred_scale callback handle
+     * changes to the windows scale. */
+    if (window_->frame_pending.fractional_scale_preferred != 0) {
+      fractional_scale_next = window_->frame_pending.fractional_scale_preferred;
+      scale_next = fractional_scale_next / FRACTIONAL_DENOMINATOR;
+    }
+  }

-    /* It's important to resize the window immediately, to avoid the window changing size
-     * and flickering in a constant feedback loop (in some bases). */
+  if (fractional_scale_next == -1) {
+    fractional_scale_next = fractional_scale_from_output;
+    scale_next = fractional_scale_next / FRACTIONAL_DENOMINATOR;
+  }
+  else {
+    /* NOTE(@ideasman42): This often overrides #wp_fractional_scale_v1_listener::preferred_scale
+     * in favor of using the greatest overlapping scale.
+     * This was requested by the studio to prevent a tablet's built-in display of 75%
+     * from causing the main-display being up-scaled (showing pixelated). */
+    if (fractional_scale_next < fractional_scale_from_output) {
+      fractional_scale_next = fractional_scale_from_output;
+      scale_next = fractional_scale_next / FRACTIONAL_DENOMINATOR;
+    }
+  }

-    if ((window_->frame_pending.size[0] != 0) && (window_->frame_pending.size[1] != 0)) {
-      /* Unlikely but possible there is a pending size change is set. */
-      window_->frame.size[0] = window_->frame_pending.size[0];
-      window_->frame.size[1] = window_->frame_pending.size[1];
+  bool changed = false;
+
+#ifdef USE_EVENT_BACKGROUND_THREAD
+  std::lock_guard lock_frame_guard{window_->frame_pending_mutex};
+#endif
+
+  bool force_frame_update = false;
+
+  bool is_fractional_prev = window_->frame.fractional_scale != 0;
+  const bool is_fractional_next = (fractional_scale_next % FRACTIONAL_DENOMINATOR) != 0;
+
+  /* When non-fractional, never use fractional scaling! */
+  window_->frame_pending.fractional_scale = is_fractional_next ? fractional_scale_next : 0;
+  window_->frame_pending.buffer_scale = is_fractional_next ?
+                                            1 :
+                                            fractional_scale_next / FRACTIONAL_DENOMINATOR;
+
+  int fractional_scale_prev = window_->frame.fractional_scale ?
+                                  window_->frame.fractional_scale :
+                                  window_->frame.buffer_scale * FRACTIONAL_DENOMINATOR;
+  int scale_prev = fractional_scale_prev / FRACTIONAL_DENOMINATOR;
+
+  if (window_->frame_pending.is_scale_init == false) {
+    window_->frame_pending.is_scale_init = true;
+
+    /* NOTE(@ideasman42): Needed because new windows are created at their previous pixel-dimensions
+     * as the window doesn't save it's DPI. Restore the window size under the assumption it's
+     * opening on the same monitor so a window keeps it's previous size on a users system.
+     *
+     * To support anything more sophisticated, windows would need to be created with a scale
+     * argument (representing the scale used when the window was stored, for e.g.). */
+    is_fractional_prev = is_fractional_next;
+    scale_prev = scale_next;
+    fractional_scale_prev = fractional_scale_next;
+
+    /* Leave `window_->frame_pending` as-is, so changes are detected and updates are applied. */
+
+    force_frame_update = true;
+  }
+
+  if ((fractional_scale_prev != fractional_scale_next) ||
+      (window_->frame_pending.buffer_scale != window_->frame.buffer_scale) ||
+      (force_frame_update == true)) {
+    /* Resize the window failing to do so results in severe flickering with a
+     * multi-monitor setup when multiple monitors have different scales.
+     *
+     * NOTE: some flickering is still possible even when resizing this
+     * happens when dragging the right hand side of the title-bar in KDE
+     * as expanding changed the size on the RHS, this may be up to the compositor to fix. */
+    for (size_t i = 0; i < ARRAY_SIZE(window_->frame_pending.size); i++) {
+      const int value = window_->frame_pending.size[i] ? window_->frame_pending.size[i] :
+                                                         window_->frame.size[i];
+      if (is_fractional_prev || is_fractional_next) {
+        window_->frame_pending.size[i] = lroundf(
+            value * (double(fractional_scale_next) / double(fractional_scale_prev)));
+      }
+      else {
+        window_->frame_pending.size[i] = (value * scale_next) / scale_prev;
+      }
+      if (window_->frame_pending.buffer_scale > 1) {
+        window_->frame_pending.size[i] = (window_->frame_pending.size[i] /
+                                          window_->frame_pending.buffer_scale) *
+                                         window_->frame_pending.buffer_scale;
+      }
    }

-    /* Write to the pending values as these are what is applied. */
-    window_->frame_pending.size[0] = (window_->frame.size[0] / scale_curr) * scale_next;
-    window_->frame_pending.size[1] = (window_->frame.size[1] / scale_curr) * scale_next;
-
-    gwl_window_frame_pending_size_set(window_, nullptr);
-
-    GHOST_SystemWayland *system = window_->ghost_system;
-    system->pushEvent(
-        new GHOST_Event(system->getMilliSeconds(), GHOST_kEventWindowDPIHintChanged, this));
+    gwl_window_frame_update_from_pending_no_lock(window_);

    changed = true;
  }
--- a/intern/ghost/intern/GHOST_WindowWayland.hh
+++ b/intern/ghost/intern/GHOST_WindowWayland.hh
@ -165,6 +165,7 @@ class GHOST_WindowWayland : public GHOST_Window {
   * Return true when the windows scale or DPI changes.
   */
  bool outputs_changed_update_scale();
+  void outputs_changed_update_scale_tag();

 #ifdef USE_EVENT_BACKGROUND_THREAD
  void pending_actions_handle();
--- a/source/blender/blenkernel/intern/editmesh.cc
+++ b/source/blender/blenkernel/intern/editmesh.cc
@ -111,17 +111,6 @@ static void editmesh_tessface_calc_intern(BMEditMesh *em,
 void BKE_editmesh_looptri_calc_ex(BMEditMesh *em, const BMeshCalcTessellation_Params *params)
 {
  editmesh_tessface_calc_intern(em, params);
-
-  /* commented because editbmesh_build_data() ensures we get tessfaces */
-#if 0
-  if (em->mesh_eval_final && em->mesh_eval_final == em->mesh_eval_cage) {
-    BKE_mesh_runtime_looptri_ensure(em->mesh_eval_final);
-  }
-  else if (em->mesh_eval_final) {
-    BKE_mesh_runtime_looptri_ensure(em->mesh_eval_final);
-    BKE_mesh_runtime_looptri_ensure(em->mesh_eval_cage);
-  }
-#endif
 }

 void BKE_editmesh_looptri_calc(BMEditMesh *em)
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@ -101,10 +101,13 @@ set(GLSL_SRC
  shaders/compositor_convert.glsl
  shaders/compositor_despeckle.glsl
  shaders/compositor_directional_blur.glsl
+  shaders/compositor_displace.glsl
  shaders/compositor_edge_filter.glsl
  shaders/compositor_ellipse_mask.glsl
  shaders/compositor_filter.glsl
  shaders/compositor_flip.glsl
+  shaders/compositor_glare_fog_glow_downsample.glsl
+  shaders/compositor_glare_fog_glow_upsample.glsl
  shaders/compositor_glare_ghost_accumulate.glsl
  shaders/compositor_glare_ghost_base.glsl
  shaders/compositor_glare_highlights.glsl
@ -116,6 +119,7 @@ set(GLSL_SRC
  shaders/compositor_glare_streaks_accumulate.glsl
  shaders/compositor_glare_streaks_filter.glsl
  shaders/compositor_image_crop.glsl
+  shaders/compositor_map_uv.glsl
  shaders/compositor_morphological_distance.glsl
  shaders/compositor_morphological_distance_feather.glsl
  shaders/compositor_morphological_distance_threshold.glsl
@ -136,6 +140,9 @@ set(GLSL_SRC
  shaders/compositor_tone_map_photoreceptor.glsl
  shaders/compositor_tone_map_simple.glsl
  shaders/compositor_write_output.glsl
+  shaders/compositor_z_combine_compute_mask.glsl
+  shaders/compositor_z_combine_from_mask.glsl
+  shaders/compositor_z_combine_simple.glsl

  shaders/library/gpu_shader_compositor_alpha_over.glsl
  shaders/library/gpu_shader_compositor_blur_common.glsl
@ -202,12 +209,14 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_convert_info.hh
  shaders/infos/compositor_despeckle_info.hh
  shaders/infos/compositor_directional_blur_info.hh
+  shaders/infos/compositor_displace_info.hh
  shaders/infos/compositor_edge_filter_info.hh
  shaders/infos/compositor_ellipse_mask_info.hh
  shaders/infos/compositor_filter_info.hh
  shaders/infos/compositor_flip_info.hh
  shaders/infos/compositor_glare_info.hh
  shaders/infos/compositor_image_crop_info.hh
+  shaders/infos/compositor_map_uv_info.hh
  shaders/infos/compositor_morphological_distance_feather_info.hh
  shaders/infos/compositor_morphological_distance_info.hh
  shaders/infos/compositor_morphological_distance_threshold_info.hh
@ -226,6 +235,7 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_tone_map_photoreceptor_info.hh
  shaders/infos/compositor_tone_map_simple_info.hh
  shaders/infos/compositor_write_output_info.hh
+  shaders/infos/compositor_z_combine_info.hh
 )

 set(SHADER_CREATE_INFOS_CONTENT "")
--- a/source/blender/compositor/realtime_compositor/COM_operation.hh
+++ b/source/blender/compositor/realtime_compositor/COM_operation.hh
@ -170,6 +170,12 @@ class Operation {
   * evaluation of the operation to declare that the results are no longer needed by this
   * operation. */
  void release_inputs();
+
+  /* Release the results that were allocated in the execute method but are not actually needed.
+   * This can be the case if the execute method allocated a dummy texture for an unndeeded result,
+   * see the description of Result::allocate_texture() for more information. This is called after
+   * the evaluation of the operation. */
+  void release_unneeded_results();
 };

 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/COM_result.hh
+++ b/source/blender/compositor/realtime_compositor/COM_result.hh
@ -112,7 +112,18 @@ class Result {

  /* Declare the result to be a texture result, allocate a texture of an appropriate type with
   * the size of the given domain from the result's texture pool, and set the domain of the result
-   * to the given domain. */
+   * to the given domain.
+   *
+   * If the result should not be computed, that is, should_compute() returns false, yet this method
+   * is called, that means the result is only being allocated because the shader that computes it
+   * also computes another result that is actually needed, and shaders needs to have a texture
+   * bound to all their images units for a correct invocation, even if some of those textures are
+   * not needed and will eventually be discarded. In that case, since allocating the full texture
+   * is not needed, allocate_single_value() is called instead and the reference count is set to 1.
+   * This essentially allocates a dummy 1x1 texture, which works because out of bound shader writes
+   * to images are safe. Since this result is not referenced by any other operation, it should be
+   * manually released after the operation is evaluated, which is implemented by calling the
+   * Operation::release_unneeded_results() method. */
  void allocate_texture(Domain domain);

  /* Declare the result to be a single value result, allocate a texture of an appropriate
@ -228,6 +239,9 @@ class Result {
  /* Returns true if the result is a single value and false of it is a texture. */
  bool is_single_value() const;

+  /* Returns true if the result is allocated. */
+  bool is_allocated() const;
+
  /* Returns the allocated GPU texture of the result. */
  GPUTexture *texture() const;

--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_smaa.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_smaa.hh
@ -13,8 +13,8 @@ namespace blender::realtime_compositor {
 void smaa(Context &context,
          Result &input,
          Result &output,
-          float threshold,
-          float local_contrast_adaptation_factor,
-          int corner_rounding);
+          float threshold = 0.1f,
+          float local_contrast_adaptation_factor = 2.0f,
+          int corner_rounding = 25);

 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/smaa.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/smaa.cc
@ -6,6 +6,7 @@
 #include "GPU_texture.h"

 #include "COM_context.hh"
+#include "COM_result.hh"
 #include "COM_utilities.hh"

 #include "COM_algorithm_smaa.hh"
@ -22,10 +23,25 @@ static Result detect_edges(Context &context,
  GPUShader *shader = context.shader_manager().get("compositor_smaa_edge_detection");
  GPU_shader_bind(shader);

-  float luminance_coefficients[3];
-  IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
+  switch (input.type()) {
+    case ResultType::Color: {
+      float luminance_coefficients[3];
+      IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
+      GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
+      break;
+    }
+    case ResultType::Vector: {
+      float luminance_coefficients[3] = {1.0f, 1.0f, 1.0f};
+      GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
+      break;
+    }
+    case ResultType::Float: {
+      float luminance_coefficients[3] = {1.0f, 0.0f, 0.0f};
+      GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
+      break;
+    }
+  }

-  GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
  GPU_shader_uniform_1f(shader, "smaa_threshold", threshold);
  GPU_shader_uniform_1f(
      shader, "smaa_local_contrast_adaptation_factor", local_contrast_adaptation_factor);
@ -78,7 +94,9 @@ static Result calculate_blending_weights(Context &context, Result &edges, int co

 static void blend_neighborhood(Context &context, Result &input, Result &weights, Result &output)
 {
-  GPUShader *shader = context.shader_manager().get("compositor_smaa_neighborhood_blending");
+  GPUShader *shader = context.shader_manager().get(
+      input.type() == ResultType::Float ? "compositor_smaa_neighborhood_blending_float" :
+                                          "compositor_smaa_neighborhood_blending_color");
  GPU_shader_bind(shader);

  GPU_texture_filter_mode(input.texture(), true);
--- a/source/blender/compositor/realtime_compositor/intern/operation.cc
+++ b/source/blender/compositor/realtime_compositor/intern/operation.cc
@ -34,6 +34,8 @@ void Operation::evaluate()
  execute();

  release_inputs();
+
+  release_unneeded_results();
 }

 Result &Operation::get_result(StringRef identifier)
@ -201,4 +203,13 @@ void Operation::release_inputs()
  }
 }

+void Operation::release_unneeded_results()
+{
+  for (Result &result : results_.values()) {
+    if (!result.should_compute() && result.is_allocated()) {
+      result.release();
+    }
+  }
+}
+
 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/intern/result.cc
+++ b/source/blender/compositor/realtime_compositor/intern/result.cc
@ -21,12 +21,21 @@ Result::Result(ResultType type, TexturePool &texture_pool)
 Result Result::Temporary(ResultType type, TexturePool &texture_pool)
 {
  Result result = Result(type, texture_pool);
-  result.increment_reference_count();
+  result.set_initial_reference_count(1);
+  result.reset();
  return result;
 }

 void Result::allocate_texture(Domain domain)
 {
+  /* The result is not actually needed, so allocate a dummy single value texture instead. See the
+   * method description for more information. */
+  if (!should_compute()) {
+    allocate_single_value();
+    increment_reference_count();
+    return;
+  }
+
  is_single_value_ = false;
  switch (type_) {
    case ResultType::Float:
@ -247,6 +256,11 @@ bool Result::is_single_value() const
  return is_single_value_;
 }

+bool Result::is_allocated() const
+{
+  return texture_ != nullptr;
+}
+
 GPUTexture *Result::texture() const
 {
  return texture_;
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_displace.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_displace.glsl
@ -0,0 +1,55 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+/* A shared table that stores the displaced coordinates of all pixels in the work group. This is
+ * necessary to avoid recomputing displaced coordinates when computing the gradients necessary for
+ * anisotropic filtering, see the implementation for more information. */
+shared vec2 displaced_coordinates_table[gl_WorkGroupSize.x][gl_WorkGroupSize.y];
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  ivec2 input_size = texture_size(input_tx);
+
+  /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image size
+   * to get the coordinates into the sampler's expected [0, 1] range. */
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(input_size);
+
+  /* Note that the input displacement is in pixel space, so divide by the input size to transform
+   * it into the normalized sampler space. */
+  vec2 scale = vec2(texture_load(x_scale_tx, texel).x, texture_load(y_scale_tx, texel).x);
+  vec2 displacement = texture_load(displacement_tx, texel).xy * scale / vec2(input_size);
+  vec2 displaced_coordinates = coordinates - displacement;
+
+  /* Store the displaced coordinates into the shared table and issue a barrier to later compute the
+   * gradients from the table. */
+  ivec2 table_index = ivec2(gl_LocalInvocationID.xy);
+  displaced_coordinates_table[table_index.x][table_index.y] = displaced_coordinates;
+  barrier();
+
+  /* Compute the partial derivative of the displaced coordinates along the x direction using a
+   * finite difference approximation. Odd invocations use a forward finite difference equation
+   * while even invocations use a backward finite difference equation. This is done such that
+   * invocations at the edges of the work group wouldn't need access to pixels that are outside of
+   * the work group.
+   *
+   * The x_step value is 1 for even invocations and when added to the x table index and multiplied
+   * by the result yields a standard forward finite difference equation. The x_step value is -1 for
+   * odd invocations and when added to the x table index and multiplied by the result yields a
+   * standard backward finite difference equation, because multiplication by -1 flips the order of
+   * subtraction. */
+  int x_step = (table_index.x % 2) * -2 + 1;
+  vec2 x_neighbour = displaced_coordinates_table[table_index.x + x_step][table_index.y];
+  vec2 x_gradient = (x_neighbour - displaced_coordinates) * x_step;
+
+  /* Compute the partial derivative of the displaced coordinates along the y direction using a
+   * finite difference approximation. See the previous code section for more information. */
+  int y_step = (table_index.y % 2) * -2 + 1;
+  vec2 y_neighbour = displaced_coordinates_table[table_index.x][table_index.y + y_step];
+  vec2 y_gradient = (y_neighbour - displaced_coordinates) * y_step;
+
+  /* Sample the input using the displaced coordinates passing in the computed gradients in order to
+   * utilize the anisotropic filtering capabilities of the sampler. */
+  vec4 displaced_color = textureGrad(input_tx, displaced_coordinates, x_gradient, y_gradient);
+
+  imageStore(output_img, texel, displaced_color);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_downsample.glsl
@ -0,0 +1,102 @@
+#pragma BLENDER_REQUIRE(common_math_lib.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+#if defined(KARIS_AVERAGE)
+/* Computes the weighted average of the given four colors, which are assumed to the colors of
+ * spatially neighbouring pixels. The weights are computed so as to reduce the contributions of
+ * fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
+ * the article "Graphic Rants: Tone Mapping".
+ *
+ * https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
+vec4 karis_brightness_weighted_sum(vec4 color1, vec4 color2, vec4 color3, vec4 color4)
+{
+  vec4 brightness = vec4(max_v3(color1), max_v3(color2), max_v3(color3), max_v3(color4));
+  vec4 weights = 1.0 / (brightness + 1.0);
+  return weighted_sum(color1, color2, color3, color4, weights);
+}
+#endif
+
+void main()
+{
+  /* Each invocation corresponds to one output pixel, where the output has half the size of the
+   * input. */
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
+   * the coordinates into the sampler's expected [0, 1] range. */
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
+
+  /* All the offsets in the following code section are in the normalized pixel space of the input
+   * texture, so compute its normalized pixel size. */
+  vec2 pixel_size = 1.0 / vec2(texture_size(input_tx));
+
+  /* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding output
+   * pixel, but instead of sampling each of the 36 pixels in the area, we only sample 13 positions
+   * using bilinear fetches at the center of a number of overlapping square 4-pixel groups. This
+   * downsampling strategy is described in the talk:
+   *
+   *   Next Generation Post Processing in Call of Duty: Advanced Warfare
+   *   https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+   *
+   * In particular, the downsampling strategy is described and illustrated in slide 153 titled
+   * "Downsampling - Our Solution". This is employed as it significantly improves the stability of
+   * the glare as can be seen in the videos in the talk. */
+  vec4 center = texture(input_tx, coordinates);
+  vec4 upper_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
+  vec4 upper_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
+  vec4 lower_left_near = texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
+  vec4 lower_right_near = texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
+  vec4 left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 0.0));
+  vec4 right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 0.0));
+  vec4 upper_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, 2.0));
+  vec4 lower_far = texture(input_tx, coordinates + pixel_size * vec2(0.0, -2.0));
+  vec4 upper_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, 2.0));
+  vec4 upper_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, 2.0));
+  vec4 lower_left_far = texture(input_tx, coordinates + pixel_size * vec2(-2.0, -2.0));
+  vec4 lower_right_far = texture(input_tx, coordinates + pixel_size * vec2(2.0, -2.0));
+
+#if defined(SIMPLE_AVERAGE)
+  /* The original weights equation mentioned in slide 153 is:
+   *   0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
+   * The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the other
+   * groups of pixels. The center is sampled 4 times, the far non corner pixels are sampled 2
+   * times, the near corner pixels are sampled only once; but their weight is quadruple the weights
+   * of other groups; so they count as sampled 4 times, finally the far corner pixels are sampled
+   * only once, essentially totalling 32 samples. So the weights are as used in the following code
+   * section. */
+  vec4 result = (4.0 / 32.0) * center +
+                (4.0 / 32.0) *
+                    (upper_left_near + upper_right_near + lower_left_near + lower_right_near) +
+                (2.0 / 32.0) * (left_far + right_far + upper_far + lower_far) +
+                (1.0 / 32.0) *
+                    (upper_left_far + upper_right_far + lower_left_far + lower_right_far);
+#elif defined(KARIS_AVERAGE)
+  /* Reduce the contributions of fireflies on the result by reducing each group of pixels using a
+   * Karis brightness weighted sum. This is described in slide 168 titled "Fireflies - Partial
+   * Karis Average".
+   *
+   * This needn't be done on all downsampling passes, but only the first one, since fireflies
+   * will not survive the first pass, later passes can use the weighted average. */
+  vec4 center_weighted_sum = karis_brightness_weighted_sum(
+      upper_left_near, upper_right_near, lower_right_near, lower_left_near);
+  vec4 upper_left_weighted_sum = karis_brightness_weighted_sum(
+      upper_left_far, upper_far, center, left_far);
+  vec4 upper_right_weighted_sum = karis_brightness_weighted_sum(
+      upper_far, upper_right_far, right_far, center);
+  vec4 lower_right_weighted_sum = karis_brightness_weighted_sum(
+      center, right_far, lower_right_far, lower_far);
+  vec4 lower_left_weighted_sum = karis_brightness_weighted_sum(
+      left_far, center, lower_far, lower_left_far);
+
+  /* The original weights equation mentioned in slide 153 is:
+   *   0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
+   * Multiply both sides by 8 and you get:
+   *   4 + 1 + 1 + 1 + 1 = 8
+   * So the weights are as used in the following code section. */
+  vec4 result = (4.0 / 8.0) * center_weighted_sum +
+                (1.0 / 8.0) * (upper_left_weighted_sum + upper_right_weighted_sum +
+                               lower_left_weighted_sum + lower_right_weighted_sum);
+#endif
+
+  imageStore(output_img, texel, result);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_fog_glow_upsample.glsl
@ -0,0 +1,37 @@
+void main()
+{
+  /* Each invocation corresponds to one output pixel, where the output has twice the size of the
+   * input. */
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Add 0.5 to evaluate the sampler at the center of the pixel and divide by the image size to get
+   * the coordinates into the sampler's expected [0, 1] range. */
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(output_img));
+
+  /* All the offsets in the following code section are in the normalized pixel space of the output
+   * image, so compute its normalized pixel size. */
+  vec2 pixel_size = 1.0 / vec2(imageSize(output_img));
+
+  /* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated at the
+   * center of neighbouring output pixels. As more tent filter upsampling passes are applied, the
+   * result approximates a large sized Gaussian filter. This upsampling strategy is described in
+   * the talk:
+   *
+   *   Next Generation Post Processing in Call of Duty: Advanced Warfare
+   *   https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
+   *
+   * In particular, the upsampling strategy is described and illustrated in slide 162 titled
+   * "Upsampling - Our Solution". */
+  vec4 upsampled = vec4(0.0);
+  upsampled += (4.0 / 16.0) * texture(input_tx, coordinates);
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 0.0));
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, 1.0));
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 0.0));
+  upsampled += (2.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(0.0, -1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, -1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(-1.0, 1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, -1.0));
+  upsampled += (1.0 / 16.0) * texture(input_tx, coordinates + pixel_size * vec2(1.0, 1.0));
+
+  imageStore(output_img, texel, imageLoad(output_img, texel) + upsampled);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_map_uv.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_map_uv.glsl
@ -0,0 +1,66 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+/* A shared table that stores the UV coordinates of all pixels in the work group. This is necessary
+ * to avoid recomputing UV coordinates when computing the gradients necessary for anisotropic
+ * filtering, see the implementation for more information. */
+shared vec2 uv_coordinates_table[gl_WorkGroupSize.x][gl_WorkGroupSize.y];
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  vec2 uv_coordinates = texture_load(uv_tx, texel).xy;
+
+  /* Store the UV coordinates into the shared table and issue a barrier to later compute the
+   * gradients from the table. */
+  ivec2 table_index = ivec2(gl_LocalInvocationID.xy);
+  uv_coordinates_table[table_index.x][table_index.y] = uv_coordinates;
+  barrier();
+
+  /* Compute the partial derivative of the UV coordinates along the x direction using a finite
+   * difference approximation. Odd invocations use a forward finite difference equation while even
+   * invocations use a backward finite difference equation. This is done such that invocations at
+   * the edges of the work group wouldn't need access to pixels that are outside of the work group.
+   *
+   * The x_step value is 1 for even invocations and when added to the x table index and multiplied
+   * by the result yields a standard forward finite difference equation. The x_step value is -1 for
+   * odd invocations and when added to the x table index and multiplied by the result yields a
+   * standard backward finite difference equation, because multiplication by -1 flips the order of
+   * subtraction. */
+  int x_step = (table_index.x % 2) * -2 + 1;
+  vec2 x_neighbour = uv_coordinates_table[table_index.x + x_step][table_index.y];
+  vec2 x_gradient = (x_neighbour - uv_coordinates) * x_step;
+
+  /* Compute the partial derivative of the UV coordinates along the y direction using a
+   * finite difference approximation. See the previous code section for more information. */
+  int y_step = (table_index.y % 2) * -2 + 1;
+  vec2 y_neighbour = uv_coordinates_table[table_index.x][table_index.y + y_step];
+  vec2 y_gradient = (y_neighbour - uv_coordinates) * y_step;
+
+  /* Sample the input using the UV coordinates passing in the computed gradients in order to
+   * utilize the anisotropic filtering capabilities of the sampler. */
+  vec4 sampled_color = textureGrad(input_tx, uv_coordinates, x_gradient, y_gradient);
+
+  /* The UV coordinates might be defined in only a subset area of the UV textures, in which case,
+   * the gradients would be infinite at the boundary of that area, which would produce erroneous
+   * results due to anisotropic filtering. To workaround this, we attenuate the result if its
+   * computed gradients are too high such that the result tends to zero when the magnitude of the
+   * gradients tends to one, that is when their sum tends to 2. One is chosen as the threshold
+   * because that's the maximum gradient magnitude when the boundary is the maximum sampler value
+   * of one and the out of bound values are zero. Additionally, the user supplied gradient
+   * attenuation factor can be used to control this attenuation or even disable it when it is zero,
+   * ranging between zero and one. */
+  float gradient_magnitude = (length(x_gradient) + length(y_gradient)) / 2.0;
+  float gradient_attenuation = max(0.0, 1.0 - gradient_attenuation_factor * gradient_magnitude);
+
+  /* The UV texture is assumed to contain an alpha channel as its third channel, since the UV
+   * coordinates might be defined in only a subset area of the UV texture as mentioned. In that
+   * case, the alpha is typically opaque at the subset area and transparent everywhere else, and
+   * alpha pre-multiplication is then performed. This format of having an alpha channel in the UV
+   * coordinates is the format used by UV passes in render engines, hence the mentioned logic. */
+  float alpha = texture_load(uv_tx, texel).z;
+
+  vec4 result = sampled_color * gradient_attenuation * alpha;
+
+  imageStore(output_img, texel, result);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_z_combine_compute_mask.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_z_combine_compute_mask.glsl
@ -0,0 +1,18 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  vec4 first_color = texture_load(first_tx, texel);
+  float first_z_value = texture_load(first_z_tx, texel).x;
+  float second_z_value = texture_load(second_z_tx, texel).x;
+
+  /* The same logic as in compositor_z_combine_simple.glsl but only computes the mask to be later
+   * anti-aliased and used for mixing, see the logic in that file for more information. */
+  float z_combine_factor = float(first_z_value < second_z_value);
+  float alpha_factor = use_alpha ? first_color.a : 1.0;
+  float mix_factor = z_combine_factor * alpha_factor;
+
+  imageStore(mask_img, texel, vec4(mix_factor));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_z_combine_from_mask.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_z_combine_from_mask.glsl
@ -0,0 +1,21 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  vec4 first_color = texture_load(first_tx, texel);
+  vec4 second_color = texture_load(second_tx, texel);
+  float first_z_value = texture_load(first_z_tx, texel).x;
+  float second_z_value = texture_load(second_z_tx, texel).x;
+  float mask_value = texture_load(mask_tx, texel).x;
+
+  vec4 combined_color = mix(second_color, first_color, mask_value);
+  /* Use the more opaque alpha from the two images. */
+  combined_color.a = use_alpha ? max(second_color.a, first_color.a) : combined_color.a;
+
+  float combined_z = mix(second_z_value, first_z_value, mask_value);
+
+  imageStore(combined_img, texel, combined_color);
+  imageStore(combined_z_img, texel, vec4(combined_z));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_z_combine_simple.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_z_combine_simple.glsl
@ -0,0 +1,29 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  vec4 first_color = texture_load(first_tx, texel);
+  vec4 second_color = texture_load(second_tx, texel);
+  float first_z_value = texture_load(first_z_tx, texel).x;
+  float second_z_value = texture_load(second_z_tx, texel).x;
+
+  /* Mix between the first and second images using a mask such that the image with the object
+   * closer to the camera is returned. The mask value is then 1, and thus returns the first image
+   * if its Z value is less than that of the second image. Otherwise, its value is 0, and thus
+   * returns the second image. Furthermore, if the object in the first image is closer but has a
+   * non-opaque alpha, then the alpha is used as a mask, but only if Use Alpha is enabled. */
+  float z_combine_factor = float(first_z_value < second_z_value);
+  float alpha_factor = use_alpha ? first_color.a : 1.0;
+  float mix_factor = z_combine_factor * alpha_factor;
+
+  vec4 combined_color = mix(second_color, first_color, mix_factor);
+  /* Use the more opaque alpha from the two images. */
+  combined_color.a = use_alpha ? max(second_color.a, first_color.a) : combined_color.a;
+
+  float combined_z = mix(second_z_value, first_z_value, mix_factor);
+
+  imageStore(combined_img, texel, combined_color);
+  imageStore(combined_z_img, texel, vec4(combined_z));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_displace_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_displace_info.hh
@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_displace)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "displacement_tx")
+    .sampler(2, ImageType::FLOAT_2D, "x_scale_tx")
+    .sampler(3, ImageType::FLOAT_2D, "y_scale_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_displace.glsl")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
@ -104,3 +104,30 @@ GPU_SHADER_CREATE_INFO(compositor_glare_streaks_accumulate)
    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_streaks_img")
    .compute_source("compositor_glare_streaks_accumulate.glsl")
    .do_static_compilation(true);
+
+/* --------
+ * Fog Glow
+ * -------- */
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_shared)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_glare_fog_glow_downsample.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_simple_average)
+    .define("SIMPLE_AVERAGE")
+    .additional_info("compositor_glare_fog_glow_downsample_shared")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_downsample_karis_average)
+    .define("KARIS_AVERAGE")
+    .additional_info("compositor_glare_fog_glow_downsample_shared")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_fog_glow_upsample)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_glare_fog_glow_upsample.glsl")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_map_uv_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_map_uv_info.hh
@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_map_uv)
+    .local_group_size(16, 16)
+    .push_constant(Type::FLOAT, "gradient_attenuation_factor")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "uv_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_map_uv.glsl")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_smaa_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_smaa_info.hh
@ -32,13 +32,21 @@ GPU_SHADER_CREATE_INFO(compositor_smaa_blending_weight_calculation)
    .compute_source("compositor_smaa_blending_weight_calculation.glsl")
    .do_static_compilation(true);

-GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending)
+GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_shared)
    .local_group_size(16, 16)
    .define("SMAA_GLSL_3")
    .define("SMAA_RT_METRICS",
            "vec4(1.0 / vec2(textureSize(input_tx, 0)), vec2(textureSize(input_tx, 0)))")
    .sampler(0, ImageType::FLOAT_2D, "input_tx")
    .sampler(1, ImageType::FLOAT_2D, "weights_tx")
+    .compute_source("compositor_smaa_neighborhood_blending.glsl");
+
+GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_color)
+    .additional_info("compositor_smaa_neighborhood_blending_shared")
    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
-    .compute_source("compositor_smaa_neighborhood_blending.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_float)
+    .additional_info("compositor_smaa_neighborhood_blending_shared")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_z_combine_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_z_combine_info.hh
@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_z_combine_simple)
+    .local_group_size(16, 16)
+    .push_constant(Type::BOOL, "use_alpha")
+    .sampler(0, ImageType::FLOAT_2D, "first_tx")
+    .sampler(1, ImageType::FLOAT_2D, "first_z_tx")
+    .sampler(2, ImageType::FLOAT_2D, "second_tx")
+    .sampler(3, ImageType::FLOAT_2D, "second_z_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_z_img")
+    .compute_source("compositor_z_combine_simple.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_z_combine_compute_mask)
+    .local_group_size(16, 16)
+    .push_constant(Type::BOOL, "use_alpha")
+    .sampler(0, ImageType::FLOAT_2D, "first_tx")
+    .sampler(1, ImageType::FLOAT_2D, "first_z_tx")
+    .sampler(2, ImageType::FLOAT_2D, "second_z_tx")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "mask_img")
+    .compute_source("compositor_z_combine_compute_mask.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_z_combine_from_mask)
+    .local_group_size(16, 16)
+    .push_constant(Type::BOOL, "use_alpha")
+    .sampler(0, ImageType::FLOAT_2D, "first_tx")
+    .sampler(1, ImageType::FLOAT_2D, "first_z_tx")
+    .sampler(2, ImageType::FLOAT_2D, "second_tx")
+    .sampler(3, ImageType::FLOAT_2D, "second_z_tx")
+    .sampler(4, ImageType::FLOAT_2D, "mask_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_img")
+    .image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_z_img")
+    .compute_source("compositor_z_combine_from_mask.glsl")
+    .do_static_compilation(true);
--- a/source/blender/draw/engines/eevee/shaders/infos/eevee_legacy_volume_info.hh
+++ b/source/blender/draw/engines/eevee/shaders/infos/eevee_legacy_volume_info.hh
@ -102,7 +102,9 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_no_geom)
 #endif

 /* EEVEE_shaders_volumes_scatter_with_lights_sh_get */
-GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights_common).define("VOLUME_LIGHTING");
+GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights_common)
+    .define("VOLUME_LIGHTING")
+    .define("IRRADIANCE_HL2");

 GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights)
    .additional_info("eevee_legacy_volumes_scatter_with_lights_common")
--- a/source/blender/draw/engines/select/select_engine.c
+++ b/source/blender/draw/engines/select/select_engine.c
@ -9,6 +9,8 @@

 #include "DNA_screen_types.h"

+#include "ED_view3d.h"
+
 #include "UI_resources.h"

 #include "DRW_engine.h"
@ -138,29 +140,44 @@ static void select_cache_init(void *vedata)
  DRWState state = DRW_STATE_DEFAULT;
  state |= RV3D_CLIPPING_ENABLED(draw_ctx->v3d, draw_ctx->rv3d) ? DRW_STATE_CLIP_PLANES : 0;

+  bool retopology_occlusion = RETOPOLOGY_ENABLED(draw_ctx->v3d) && !XRAY_ENABLED(draw_ctx->v3d);
+  float retopology_offset = RETOPOLOGY_OFFSET(draw_ctx->v3d);
+
  {
    DRW_PASS_CREATE(psl->depth_only_pass, state);
    pd->shgrp_depth_only = DRW_shgroup_create(sh->select_id_uniform, psl->depth_only_pass);
+    /* Not setting ID because this pass only draws to the depth buffer. */
+    DRW_shgroup_uniform_float_copy(pd->shgrp_depth_only, "retopologyOffset", retopology_offset);
+
+    if (retopology_occlusion) {
+      pd->shgrp_occlude = DRW_shgroup_create(sh->select_id_uniform, psl->depth_only_pass);
+      /* Not setting ID because this pass only draws to the depth buffer. */
+      DRW_shgroup_uniform_float_copy(pd->shgrp_occlude, "retopologyOffset", 0.0f);
+    }

    DRW_PASS_CREATE(psl->select_id_face_pass, state);
    if (e_data.context.select_mode & SCE_SELECT_FACE) {
      pd->shgrp_face_flat = DRW_shgroup_create(sh->select_id_flat, psl->select_id_face_pass);
+      DRW_shgroup_uniform_float_copy(pd->shgrp_face_flat, "retopologyOffset", retopology_offset);
    }
    else {
      pd->shgrp_face_unif = DRW_shgroup_create(sh->select_id_uniform, psl->select_id_face_pass);
      DRW_shgroup_uniform_int_copy(pd->shgrp_face_unif, "id", 0);
+      DRW_shgroup_uniform_float_copy(pd->shgrp_face_unif, "retopologyOffset", retopology_offset);
    }

    if (e_data.context.select_mode & SCE_SELECT_EDGE) {
      DRW_PASS_CREATE(psl->select_id_edge_pass, state | DRW_STATE_FIRST_VERTEX_CONVENTION);

      pd->shgrp_edge = DRW_shgroup_create(sh->select_id_flat, psl->select_id_edge_pass);
+      DRW_shgroup_uniform_float_copy(pd->shgrp_edge, "retopologyOffset", retopology_offset);
    }

    if (e_data.context.select_mode & SCE_SELECT_VERTEX) {
      DRW_PASS_CREATE(psl->select_id_vert_pass, state);
      pd->shgrp_vert = DRW_shgroup_create(sh->select_id_flat, psl->select_id_vert_pass);
      DRW_shgroup_uniform_float_copy(pd->shgrp_vert, "sizeVertex", 2 * G_draw.block.size_vertex);
+      DRW_shgroup_uniform_float_copy(pd->shgrp_vert, "retopologyOffset", retopology_offset);
    }
  }

@ -197,6 +214,16 @@ static void select_cache_populate(void *vedata, Object *ob)
  SELECTID_StorageList *stl = ((SELECTID_Data *)vedata)->stl;
  const DRWContextState *draw_ctx = DRW_context_state_get();

+  const bool retopology_occlusion = RETOPOLOGY_ENABLED(draw_ctx->v3d) &&
+                                    !XRAY_ENABLED(draw_ctx->v3d);
+  if (retopology_occlusion && !DRW_object_is_in_edit_mode(ob)) {
+    if (ob->dt >= OB_SOLID) {
+      struct GPUBatch *geom_faces = DRW_mesh_batch_cache_get_surface(ob->data);
+      DRW_shgroup_call_obmat(stl->g_data->shgrp_occlude, geom_faces, ob->object_to_world);
+    }
+    return;
+  }
+
  SELECTID_ObjectData *sel_data = (SELECTID_ObjectData *)DRW_drawdata_get(
      &ob->id, &draw_engine_select_type);

--- a/source/blender/draw/engines/select/select_private.h
+++ b/source/blender/draw/engines/select/select_private.h
@ -40,6 +40,7 @@ typedef struct SELECTID_Shaders {

 typedef struct SELECTID_PrivateData {
  DRWShadingGroup *shgrp_depth_only;
+  DRWShadingGroup *shgrp_occlude;
  DRWShadingGroup *shgrp_face_unif;
  DRWShadingGroup *shgrp_face_flat;
  DRWShadingGroup *shgrp_edge;
--- a/source/blender/draw/engines/select/shaders/infos/select_id_info.hh
+++ b/source/blender/draw/engines/select/shaders/infos/select_id_info.hh
@ -11,6 +11,7 @@ GPU_SHADER_INTERFACE_INFO(select_id_iface, "").flat(Type::INT, "id");
 GPU_SHADER_CREATE_INFO(select_id_flat)
    .push_constant(Type::FLOAT, "sizeVertex")
    .push_constant(Type::INT, "offset")
+    .push_constant(Type::FLOAT, "retopologyOffset")
    .vertex_in(0, Type::VEC3, "pos")
    .vertex_in(1, Type::INT, "index")
    .vertex_out(select_id_iface)
@ -24,6 +25,7 @@ GPU_SHADER_CREATE_INFO(select_id_uniform)
    .define("UNIFORM_ID")
    .push_constant(Type::FLOAT, "sizeVertex")
    .push_constant(Type::INT, "id")
+    .push_constant(Type::FLOAT, "retopologyOffset")
    .vertex_in(0, Type::VEC3, "pos")
    .fragment_out(0, Type::UINT, "fragColor")
    .vertex_source("select_id_vert.glsl")
--- a/source/blender/draw/engines/select/shaders/select_id_vert.glsl
+++ b/source/blender/draw/engines/select/shaders/select_id_vert.glsl
@ -8,8 +8,12 @@ void main()
 #endif

  vec3 world_pos = point_object_to_world(pos);
-  gl_Position = point_world_to_ndc(world_pos);
+  vec3 view_pos = point_world_to_view(world_pos);
+  gl_Position = point_view_to_ndc(view_pos);
  gl_PointSize = sizeVertex;

+  /* Offset Z position for retopology selection occlusion. */
+  gl_Position.z += get_homogenous_z_offset(view_pos.z, gl_Position.w, retopologyOffset);
+
  view_clipping_distances(world_pos);
 }
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@ -2770,6 +2770,25 @@ void DRW_draw_select_id(Depsgraph *depsgraph, ARegion *region, View3D *v3d, cons
      drw_engines_cache_populate(obj_eval);
    }

+    if (RETOPOLOGY_ENABLED(v3d) && !XRAY_ENABLED(v3d)) {
+      DEGObjectIterSettings deg_iter_settings = {0};
+      deg_iter_settings.depsgraph = depsgraph;
+      deg_iter_settings.flags = DEG_OBJECT_ITER_FOR_RENDER_ENGINE_FLAGS;
+      DEG_OBJECT_ITER_BEGIN (&deg_iter_settings, ob) {
+        if (ob->type != OB_MESH) {
+          /* The iterator has evaluated meshes for all solid objects.
+           * It also has non-mesh objects however, which are not supported here. */
+          continue;
+        }
+        if (DRW_object_is_in_edit_mode(ob)) {
+          /* Only background (non-edit) objects are used for occlusion. */
+          continue;
+        }
+        drw_engines_cache_populate(ob);
+      }
+      DEG_OBJECT_ITER_END;
+    }
+
    drw_engines_cache_finish();

    drw_task_graph_deinit();
--- a/source/blender/editors/interface/interface_handlers.cc
+++ b/source/blender/editors/interface/interface_handlers.cc
@ -1918,6 +1918,7 @@ static void ui_selectcontext_apply(bContext *C,
      bool b;
      int i;
      float f;
+      char *str;
      PointerRNA p;
    } delta, min, max;

@ -1950,6 +1951,10 @@ static void ui_selectcontext_apply(bContext *C,
      /* Not a delta in fact. */
      delta.p = RNA_property_pointer_get(&but->rnapoin, prop);
    }
+    else if (rna_type == PROP_STRING) {
+      /* Not a delta in fact. */
+      delta.str = RNA_property_string_get_alloc(&but->rnapoin, prop, nullptr, 0, nullptr);
+    }

 #  ifdef USE_ALLSELECT_LAYER_HACK
    /* make up for not having 'handle_layer_buttons' */
@ -2023,9 +2028,16 @@ static void ui_selectcontext_apply(bContext *C,
        const PointerRNA other_value = delta.p;
        RNA_property_pointer_set(&lptr, lprop, other_value, nullptr);
      }
+      else if (rna_type == PROP_STRING) {
+        const char *other_value = delta.str;
+        RNA_property_string_set(&lptr, lprop, other_value);
+      }

      RNA_property_update(C, &lptr, prop);
    }
+    if (rna_type == PROP_STRING) {
+      MEM_freeN(delta.str);
+    }
  }
 }

--- a/source/blender/geometry/GEO_uv_pack.hh
+++ b/source/blender/geometry/GEO_uv_pack.hh
@ -72,6 +72,7 @@ class UVPackIsland_Params {
  eUVPackIsland_ShapeMethod shape_method;
 };

+class uv_phi;
 class PackIsland {
 public:
  /** Aspect ratio, required for rotation. */
@ -102,6 +103,8 @@ class PackIsland {
  /** Half of the diagonal of the AABB. */
  float2 half_diagonal_;

+  void place_(const float scale, const uv_phi phi);
+
 private:
  void calculate_pivot(); /* Calculate `pivot_` and `half_diagonal_` based on added triangles. */
  blender::Vector<float2> triangle_vertices_;
--- a/source/blender/geometry/intern/uv_pack.cc
+++ b/source/blender/geometry/intern/uv_pack.cc
@ -106,6 +106,8 @@ void PackIsland::add_triangle(const float2 uv0, const float2 uv1, const float2 u

 void PackIsland::add_polygon(const blender::Span<float2> uvs, MemArena *arena, Heap *heap)
 {
+  /* Internally, PackIsland uses triangles as the primitive, so we have to triangulate. */
+
  int vert_count = int(uvs.size());
  BLI_assert(vert_count >= 3);
  int nfilltri = vert_count - 2;
@ -118,13 +120,7 @@ void PackIsland::add_polygon(const blender::Span<float2> uvs, MemArena *arena, H
  /* Storage. */
  uint(*tris)[3] = static_cast<uint(*)[3]>(
      BLI_memarena_alloc(arena, sizeof(*tris) * size_t(nfilltri)));
-  float(*source)[2] = static_cast<float(*)[2]>(
-      BLI_memarena_alloc(arena, sizeof(*source) * size_t(vert_count)));
-
-  /* Copy input. */
-  for (int i = 0; i < vert_count; i++) {
-    copy_v2_v2(source[i], uvs[i]);
-  }
+  const float(*source)[2] = reinterpret_cast<const float(*)[2]>(uvs.data());

  /* Triangulate. */
  BLI_polyfill_calc_arena(source, vert_count, 0, tris, arena);
@ -163,7 +159,7 @@ void PackIsland::finalize_geometry(const UVPackIsland_Params &params, MemArena *
        BLI_memarena_alloc(arena, sizeof(*index_map) * vert_count));

    /* Prepare input for convex hull. */
-    float(*source)[2] = reinterpret_cast<float(*)[2]>(triangle_vertices_.data());
+    const float(*source)[2] = reinterpret_cast<const float(*)[2]>(triangle_vertices_.data());

    /* Compute convex hull. */
    int convex_len = BLI_convexhull_2d(source, vert_count, index_map);
@ -183,14 +179,25 @@ void PackIsland::finalize_geometry(const UVPackIsland_Params &params, MemArena *

 void PackIsland::calculate_pivot()
 {
+  /* `pivot_` is calculated as the center of the AABB,
+   * However `pivot_` cannot be outside of the convex hull. */
  Bounds<float2> triangle_bounds = *bounds::min_max(triangle_vertices_.as_span());
  pivot_ = (triangle_bounds.min + triangle_bounds.max) * 0.5f;
  half_diagonal_ = (triangle_bounds.max - triangle_bounds.min) * 0.5f;
 }

+void PackIsland::place_(const float scale, const uv_phi phi)
+{
+  angle = phi.rotation;
+
+  float matrix_inverse[2][2];
+  build_inverse_transformation(scale, phi.rotation, matrix_inverse);
+  mul_v2_m2v2(pre_translate, matrix_inverse, phi.translation);
+  pre_translate -= pivot_;
+}
+
 UVPackIsland_Params::UVPackIsland_Params()
 {
-  /* TEMPORARY, set every thing to "zero" for backwards compatibility. */
  rotate = false;
  only_selected_uvs = false;
  only_selected_faces = false;
@ -235,7 +242,7 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
  /* Exclude an initial AABB near the origin. */
  float next_u1 = *r_max_u;
  float next_v1 = *r_max_v;
-  bool zigzag = next_u1 / target_aspect_y < next_v1; /* Horizontal or Vertical strip? */
+  bool zigzag = next_u1 < next_v1 * target_aspect_y; /* Horizontal or Vertical strip? */

  float u0 = zigzag ? next_u1 : 0.0f;
  float v0 = zigzag ? 0.0f : next_v1;
@ -254,7 +261,7 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
    }
    if (restart) {
      /* We're at the end of a strip. Restart from U axis or V axis. */
-      zigzag = next_u1 / target_aspect_y < next_v1;
+      zigzag = next_u1 < next_v1 * target_aspect_y;
      u0 = zigzag ? next_u1 : 0.0f;
      v0 = zigzag ? 0.0f : next_v1;
    }
@ -281,420 +288,6 @@ static void pack_islands_alpaca_turbo(const Span<UVAABBIsland *> islands,
  *r_max_v = next_v1;
 }

-/* Wrapper around #BLI_box_pack_2d. */
-static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
-                                    const Span<PackIsland *> islands,
-                                    const float scale,
-                                    const float margin,
-                                    const float target_aspect_y,
-                                    float *r_max_u,
-                                    float *r_max_v)
-{
-  /* Allocate storage. */
-  BoxPack *box_array = static_cast<BoxPack *>(
-      MEM_mallocN(sizeof(*box_array) * islands.size(), __func__));
-
-  /* Prepare for box_pack_2d. */
-  for (const int64_t i : aabbs.index_range()) {
-    PackIsland *island = islands[aabbs[i]->index];
-    BoxPack *box = box_array + i;
-    box->w = (island->half_diagonal_.x * 2 * scale + 2 * margin) / target_aspect_y;
-    box->h = island->half_diagonal_.y * 2 * scale + 2 * margin;
-  }
-
-  const bool sort_boxes = false; /* Use existing ordering from `aabbs`. */
-
-  /* \note Writes to `*r_max_u` and `*r_max_v`. */
-  BLI_box_pack_2d(box_array, int(aabbs.size()), sort_boxes, r_max_u, r_max_v);
-
-  *r_max_u *= target_aspect_y;
-
-  /* Write back box_pack UVs. */
-  for (const int64_t i : aabbs.index_range()) {
-    PackIsland *island = islands[aabbs[i]->index];
-    BoxPack *box = box_array + i;
-    island->angle = 0.0f; /* #BLI_box_pack_2d never rotates. */
-    island->pre_translate.x = (box->x + box->w * 0.5f) * target_aspect_y / scale -
-                              island->pivot_.x;
-    island->pre_translate.y = (box->y + box->h * 0.5f) / scale - island->pivot_.y;
-  }
-
-  /* Housekeeping. */
-  MEM_freeN(box_array);
-}
-
-/**
- * Helper class for the `xatlas` strategy.
- * Accelerates geometry queries by approximating exact queries with a bitmap.
- * Includes some book keeping variables to simplify the algorithm.
- */
-class Occupancy {
- public:
-  Occupancy(const float initial_scale);
-
-  void increase_scale(); /* Resize the scale of the bitmap and clear it. */
-
-  /* Write or Query a triangle on the bitmap. */
-  float trace_triangle(const float2 &uv0,
-                       const float2 &uv1,
-                       const float2 &uv2,
-                       const float margin,
-                       const bool write) const;
-
-  /* Write or Query an island on the bitmap. */
-  float trace_island(const PackIsland *island,
-                     const uv_phi phi,
-                     const float scale,
-                     const float margin,
-                     const bool write) const;
-
-  int bitmap_radix;              /* Width and Height of `bitmap`. */
-  float bitmap_scale_reciprocal; /* == 1.0f / `bitmap_scale`. */
- private:
-  mutable blender::Array<float> bitmap_;
-
-  mutable float2 witness_;         /* Witness to a previously known occupied pixel. */
-  mutable float witness_distance_; /* Signed distance to nearest placed island. */
-  mutable uint triangle_hint_;     /* Hint to a previously suspected overlapping triangle. */
-
-  const float terminal = 1048576.0f; /* 4 * bitmap_radix < terminal < INT_MAX / 4. */
-};
-
-Occupancy::Occupancy(const float initial_scale)
-    : bitmap_radix(800), bitmap_(bitmap_radix * bitmap_radix, false)
-{
-  increase_scale();
-  bitmap_scale_reciprocal = bitmap_radix / initial_scale;
-}
-
-void Occupancy::increase_scale()
-{
-  bitmap_scale_reciprocal *= 0.5f;
-  for (int i = 0; i < bitmap_radix * bitmap_radix; i++) {
-    bitmap_[i] = terminal;
-  }
-  witness_.x = -1;
-  witness_.y = -1;
-  witness_distance_ = 0.0f;
-  triangle_hint_ = 0;
-}
-
-static float signed_distance_fat_triangle(const float2 probe,
-                                          const float2 uv0,
-                                          const float2 uv1,
-                                          const float2 uv2)
-{
-  /* Be careful with ordering, uv0 <- uv1 <- uv2 <- uv0 <- uv1 etc. */
-  const float dist01_ssq = dist_signed_squared_to_edge(probe, uv0, uv1);
-  const float dist12_ssq = dist_signed_squared_to_edge(probe, uv1, uv2);
-  const float dist20_ssq = dist_signed_squared_to_edge(probe, uv2, uv0);
-  float result_ssq = max_fff(dist01_ssq, dist12_ssq, dist20_ssq);
-  if (result_ssq < 0.0f) {
-    return -sqrtf(-result_ssq);
-  }
-  BLI_assert(result_ssq >= 0.0f);
-  result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv0));
-  result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv1));
-  result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv2));
-  BLI_assert(result_ssq >= 0.0f);
-  return sqrtf(result_ssq);
-}
-
-float Occupancy::trace_triangle(const float2 &uv0,
-                                const float2 &uv1,
-                                const float2 &uv2,
-                                const float margin,
-                                const bool write) const
-{
-  const float x0 = min_fff(uv0.x, uv1.x, uv2.x);
-  const float y0 = min_fff(uv0.y, uv1.y, uv2.y);
-  const float x1 = max_fff(uv0.x, uv1.x, uv2.x);
-  const float y1 = max_fff(uv0.y, uv1.y, uv2.y);
-  float spread = write ? margin * 2 : 0.0f;
-  int ix0 = std::max(int(floorf((x0 - spread) * bitmap_scale_reciprocal)), 0);
-  int iy0 = std::max(int(floorf((y0 - spread) * bitmap_scale_reciprocal)), 0);
-  int ix1 = std::min(int(floorf((x1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
-  int iy1 = std::min(int(floorf((y1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
-
-  const float2 uv0s = uv0 * bitmap_scale_reciprocal;
-  const float2 uv1s = uv1 * bitmap_scale_reciprocal;
-  const float2 uv2s = uv2 * bitmap_scale_reciprocal;
-
-  /* TODO: Better epsilon handling here could reduce search size. */
-  float epsilon = 0.7071f; /* == sqrt(0.5f), rounded up by 0.00002f. */
-  epsilon = std::max(epsilon, 2 * margin * bitmap_scale_reciprocal);
-
-  if (!write) {
-    if (ix0 <= witness_.x && witness_.x < ix1) {
-      if (iy0 <= witness_.y && witness_.y < iy1) {
-        const float distance = signed_distance_fat_triangle(witness_, uv0s, uv1s, uv2s);
-        const float extent = epsilon - distance - witness_distance_;
-        const float pixel_round_off = -0.1f; /* Go faster on nearly-axis aligned edges. */
-        if (extent > pixel_round_off) {
-          return std::max(0.0f, extent); /* Witness observes occupied. */
-        }
-      }
-    }
-  }
-
-  /* Iterate in opposite direction to outer search to improve witness effectiveness. */
-  for (int y = iy1 - 1; y >= iy0; y--) {
-    for (int x = ix1 - 1; x >= ix0; x--) {
-      float *hotspot = &bitmap_[y * bitmap_radix + x];
-      if (!write && *hotspot > epsilon) {
-        continue;
-      }
-      const float2 probe(x, y);
-      const float distance = signed_distance_fat_triangle(probe, uv0s, uv1s, uv2s);
-      if (write) {
-        *hotspot = min_ff(distance, *hotspot);
-        continue;
-      }
-      const float extent = epsilon - distance - *hotspot;
-      if (extent > 0.0f) {
-        witness_ = probe;
-        witness_distance_ = *hotspot;
-        return extent; /* Occupied. */
-      }
-    }
-  }
-  return -1.0f; /* Available. */
-}
-
-float2 PackIsland::get_diagonal_support_d4(const float scale,
-                                           const float rotation,
-                                           const float margin) const
-{
-  if (rotation == 0.0f) {
-    return half_diagonal_ * scale + margin; /* Fast path for common case. */
-  }
-
-  /* TODO: BLI_assert rotation is a "Dihedral Group D4" transform. */
-  float matrix[2][2];
-  build_transformation(scale, rotation, matrix);
-
-  float diagonal_rotated[2];
-  mul_v2_m2v2(diagonal_rotated, matrix, half_diagonal_);
-  return float2(fabsf(diagonal_rotated[0]) + margin, fabsf(diagonal_rotated[1]) + margin);
-}
-
-float2 PackIsland::get_diagonal_support(const float scale,
-                                        const float rotation,
-                                        const float margin) const
-{
-  /* Only "D4" transforms are currently supported. */
-  return get_diagonal_support_d4(scale, rotation, margin);
-}
-
-float Occupancy::trace_island(const PackIsland *island,
-                              const uv_phi phi,
-                              const float scale,
-                              const float margin,
-                              const bool write) const
-{
-  float2 diagonal_support = island->get_diagonal_support(scale, phi.rotation, margin);
-
-  if (!write) {
-    if (phi.translation.x < diagonal_support.x || phi.translation.y < diagonal_support.y) {
-      return terminal; /* Occupied. */
-    }
-  }
-  float matrix[2][2];
-  island->build_transformation(scale, phi.rotation, matrix);
-  float2 pivot_transformed;
-  mul_v2_m2v2(pivot_transformed, matrix, island->pivot_);
-
-  float2 delta = phi.translation - pivot_transformed;
-  uint vert_count = uint(island->triangle_vertices_.size()); /* `uint` is faster than `int`. */
-  for (uint i = 0; i < vert_count; i += 3) {
-    uint j = (i + triangle_hint_) % vert_count;
-    float2 uv0;
-    float2 uv1;
-    float2 uv2;
-    mul_v2_m2v2(uv0, matrix, island->triangle_vertices_[j]);
-    mul_v2_m2v2(uv1, matrix, island->triangle_vertices_[j + 1]);
-    mul_v2_m2v2(uv2, matrix, island->triangle_vertices_[j + 2]);
-    float extent = trace_triangle(uv0 + delta, uv1 + delta, uv2 + delta, margin, write);
-
-    if (!write && extent >= 0.0f) {
-      triangle_hint_ = j;
-      return extent; /* Occupied. */
-    }
-  }
-  return -1.0f; /* Available. */
-}
-
-static uv_phi find_best_fit_for_island(const PackIsland *island,
-                                       const int scan_line,
-                                       Occupancy &occupancy,
-                                       const float scale,
-                                       const int angle_90_multiple,
-                                       const float margin,
-                                       const float target_aspect_y)
-{
-  const float bitmap_scale = 1.0f / occupancy.bitmap_scale_reciprocal;
-
-  const float sqrt_target_aspect_y = sqrtf(target_aspect_y);
-  const int scan_line_x = int(scan_line * sqrt_target_aspect_y);
-  const int scan_line_y = int(scan_line / sqrt_target_aspect_y);
-
-  uv_phi phi;
-  phi.rotation = DEG2RADF(angle_90_multiple * 90);
-  float matrix[2][2];
-  island->build_transformation(scale, phi.rotation, matrix);
-
-  /* Caution, margin is zero for support_diagonal as we're tracking the top-right corner. */
-  float2 support_diagonal = island->get_diagonal_support_d4(scale, phi.rotation, 0.0f);
-
-  /* Scan using an "Alpaca"-style search, first horizontally using "less-than". */
-  int t = int(ceilf((2 * support_diagonal.x + margin) * occupancy.bitmap_scale_reciprocal));
-  while (t < scan_line_x) {
-    phi.translation = float2(t * bitmap_scale, scan_line_y * bitmap_scale) - support_diagonal;
-    const float extent = occupancy.trace_island(island, phi, scale, margin, false);
-    if (extent < 0.0f) {
-      return phi; /* Success. */
-    }
-    t = t + std::max(1, int(extent));
-  }
-
-  /* Then scan vertically using "less-than-or-equal" */
-  t = int(ceilf((2 * support_diagonal.y + margin) * occupancy.bitmap_scale_reciprocal));
-  while (t <= scan_line_y) {
-    phi.translation = float2(scan_line_x * bitmap_scale, t * bitmap_scale) - support_diagonal;
-    const float extent = occupancy.trace_island(island, phi, scale, margin, false);
-    if (extent < 0.0f) {
-      return phi; /* Success. */
-    }
-    t = t + std::max(1, int(extent));
-  }
-
-  return uv_phi(); /* Unable to find a place to fit. */
-}
-
-static float guess_initial_scale(const Span<PackIsland *> islands,
-                                 const float scale,
-                                 const float margin)
-{
-  float sum = 1e-40f;
-  for (int64_t i : islands.index_range()) {
-    PackIsland *island = islands[i];
-    sum += island->half_diagonal_.x * 2 * scale + 2 * margin;
-    sum += island->half_diagonal_.y * 2 * scale + 2 * margin;
-  }
-  return sqrtf(sum) / 6.0f;
-}
-
-/**
- * Pack irregular islands using the `xatlas` strategy, with no rotation.
- *
- * Loosely based on the 'xatlas' code by Jonathan Young
- * from https://github.com/jpcy/xatlas
- *
- * A brute force packer (BF-Packer) with accelerators:
- * - Uses a Bitmap Occupancy class.
- * - Uses a "Witness Pixel" and a "Triangle Hint".
- * - Write with `margin * 2`, read with `margin == 0`.
- * - Lazy resetting of BF search.
- *
- * Performance would normally be `O(n^4)`, however the occupancy
- * bitmap_radix is fixed, which gives a reduced time complexity of `O(n^3)`.
- */
-static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
-                               const Span<PackIsland *> islands,
-                               const float scale,
-                               const float margin,
-                               const UVPackIsland_Params &params,
-                               float *r_max_u,
-                               float *r_max_v)
-{
-  Occupancy occupancy(guess_initial_scale(islands, scale, margin));
-  float max_u = 0.0f;
-  float max_v = 0.0f;
-
-  blender::Array<uv_phi> phis(island_indices.size());
-  int scan_line = 0;
-  int i = 0;
-
-  /* The following `while` loop is setting up a three-way race:
-   * `for (scan_line = 0; scan_line < bitmap_radix; scan_line++)`
-   * `for (i : island_indices.index_range())`
-   * `while (bitmap_scale_reciprocal > 0) { bitmap_scale_reciprocal *= 0.5f; }`
-   */
-
-  while (i < island_indices.size()) {
-    PackIsland *island = islands[island_indices[i]->index];
-    uv_phi phi;
-
-    int max_90_multiple = params.rotate && (i < 50) ? 4 : 1;
-    for (int angle_90_multiple = 0; angle_90_multiple < max_90_multiple; angle_90_multiple++) {
-      phi = find_best_fit_for_island(
-          island, scan_line, occupancy, scale, angle_90_multiple, margin, params.target_aspect_y);
-      if (phi.is_valid()) {
-        break;
-      }
-    }
-
-    if (!phi.is_valid()) {
-      /* Unable to find a fit on this scan_line. */
-
-      island = nullptr; /* Just mark it as null, we won't use it further. */
-
-      if (i < 10) {
-        scan_line++;
-      }
-      else {
-        /* Increasing by 2 here has the effect of changing the sampling pattern.
-         * The parameter '2' is not "free" in the sense that changing it requires
-         * a change to `bitmap_radix` and then re-tuning `alpaca_cutoff`.
-         * Possible values here *could* be 1, 2 or 3, however the only *reasonable*
-         * choice is 2. */
-        scan_line += 2;
-      }
-      if (scan_line < occupancy.bitmap_radix *
-                          sqrtf(std::min(params.target_aspect_y, 1.0f / params.target_aspect_y))) {
-        continue; /* Try again on next scan_line. */
-      }
-
-      /* Enlarge search parameters. */
-      scan_line = 0;
-      occupancy.increase_scale();
-
-      /* Redraw already placed islands. (Greedy.) */
-      for (int j = 0; j < i; j++) {
-        occupancy.trace_island(islands[island_indices[j]->index], phis[j], scale, margin, true);
-      }
-      continue;
-    }
-
-    phis[i] = phi; /* Place island. */
-    occupancy.trace_island(island, phi, scale, margin, true);
-
-    i++; /* Next island. */
-
-    island->angle = phi.rotation;
-
-    float matrix_inverse[2][2];
-    island->build_inverse_transformation(scale, phi.rotation, matrix_inverse);
-    mul_v2_m2v2(island->pre_translate, matrix_inverse, phi.translation);
-    island->pre_translate -= island->pivot_;
-
-    float2 support = island->get_diagonal_support(scale, phi.rotation, margin);
-    float2 top_right = phi.translation + support;
-    max_u = std::max(top_right.x, max_u);
-    max_v = std::max(top_right.y, max_v);
-
-    if (i < 128 || (i & 31) == 16) {
-      scan_line = 0; /* Restart completely. */
-    }
-    else {
-      scan_line = std::max(0, scan_line - 25); /* `-25` must by odd. */
-    }
-  }
-
-  *r_max_u = max_u;
-  *r_max_v = max_v;
-}
-
 /**
 * Helper function for #pack_islands_alpaca_rotate
 *
@ -852,6 +445,423 @@ static void pack_islands_alpaca_rotate(const Span<UVAABBIsland *> islands,
  *r_max_v = next_v1;
 }

+/* Wrapper around #BLI_box_pack_2d. */
+static void pack_island_box_pack_2d(const Span<UVAABBIsland *> aabbs,
+                                    const Span<PackIsland *> islands,
+                                    const float scale,
+                                    const float margin,
+                                    const float target_aspect_y,
+                                    float *r_max_u,
+                                    float *r_max_v)
+{
+  /* Allocate storage. */
+  BoxPack *box_array = static_cast<BoxPack *>(
+      MEM_mallocN(sizeof(*box_array) * islands.size(), __func__));
+
+  /* Prepare for box_pack_2d. */
+  for (const int64_t i : aabbs.index_range()) {
+    PackIsland *island = islands[aabbs[i]->index];
+    BoxPack *box = box_array + i;
+    box->w = (island->half_diagonal_.x * 2 * scale + 2 * margin) / target_aspect_y;
+    box->h = island->half_diagonal_.y * 2 * scale + 2 * margin;
+  }
+
+  const bool sort_boxes = false; /* Use existing ordering from `aabbs`. */
+
+  /* \note Writes to `*r_max_u` and `*r_max_v`. */
+  BLI_box_pack_2d(box_array, int(aabbs.size()), sort_boxes, r_max_u, r_max_v);
+
+  *r_max_u *= target_aspect_y;
+
+  /* Write back box_pack UVs. */
+  for (const int64_t i : aabbs.index_range()) {
+    PackIsland *island = islands[aabbs[i]->index];
+    BoxPack *box = box_array + i;
+    uv_phi phi;
+    phi.rotation = 0.0f; /* #BLI_box_pack_2d never rotates. */
+    phi.translation.x = (box->x + box->w * 0.5f) * target_aspect_y;
+    phi.translation.y = (box->y + box->h * 0.5f);
+    island->place_(scale, phi);
+  }
+
+  /* Housekeeping. */
+  MEM_freeN(box_array);
+}
+
+/**
+ * Helper class for the `xatlas` strategy.
+ * Accelerates geometry queries by approximating exact queries with a bitmap.
+ * Includes some book keeping variables to simplify the algorithm.
+ */
+class Occupancy {
+ public:
+  Occupancy(const float initial_scale);
+
+  void increase_scale(); /* Resize the scale of the bitmap and clear it. */
+
+  /* Write or Query a triangle on the bitmap. */
+  float trace_triangle(const float2 &uv0,
+                       const float2 &uv1,
+                       const float2 &uv2,
+                       const float margin,
+                       const bool write) const;
+
+  /* Write or Query an island on the bitmap. */
+  float trace_island(const PackIsland *island,
+                     const uv_phi phi,
+                     const float scale,
+                     const float margin,
+                     const bool write) const;
+
+  int bitmap_radix;              /* Width and Height of `bitmap`. */
+  float bitmap_scale_reciprocal; /* == 1.0f / `bitmap_scale`. */
+ private:
+  mutable blender::Array<float> bitmap_;
+
+  mutable float2 witness_;         /* Witness to a previously known occupied pixel. */
+  mutable float witness_distance_; /* Signed distance to nearest placed island. */
+  mutable uint triangle_hint_;     /* Hint to a previously suspected overlapping triangle. */
+
+  const float terminal = 1048576.0f; /* 4 * bitmap_radix < terminal < INT_MAX / 4. */
+};
+
+Occupancy::Occupancy(const float initial_scale)
+    : bitmap_radix(800), bitmap_(bitmap_radix * bitmap_radix, false)
+{
+  bitmap_scale_reciprocal = 1.0f; /* lint, prevent uninitialized memory access. */
+  increase_scale();
+  bitmap_scale_reciprocal = bitmap_radix / initial_scale; /* Actually set the value. */
+}
+
+void Occupancy::increase_scale()
+{
+  BLI_assert(bitmap_scale_reciprocal > 0.0f); /* TODO: Packing has failed, report error. */
+
+  bitmap_scale_reciprocal *= 0.5f;
+  for (int i = 0; i < bitmap_radix * bitmap_radix; i++) {
+    bitmap_[i] = terminal;
+  }
+  witness_.x = -1;
+  witness_.y = -1;
+  witness_distance_ = 0.0f;
+  triangle_hint_ = 0;
+}
+
+static float signed_distance_fat_triangle(const float2 probe,
+                                          const float2 uv0,
+                                          const float2 uv1,
+                                          const float2 uv2)
+{
+  /* Be careful with ordering, uv0 <- uv1 <- uv2 <- uv0 <- uv1 etc. */
+  const float dist01_ssq = dist_signed_squared_to_edge(probe, uv0, uv1);
+  const float dist12_ssq = dist_signed_squared_to_edge(probe, uv1, uv2);
+  const float dist20_ssq = dist_signed_squared_to_edge(probe, uv2, uv0);
+  float result_ssq = max_fff(dist01_ssq, dist12_ssq, dist20_ssq);
+  if (result_ssq < 0.0f) {
+    return -sqrtf(-result_ssq);
+  }
+  BLI_assert(result_ssq >= 0.0f);
+  result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv0));
+  result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv1));
+  result_ssq = std::min(result_ssq, blender::math::length_squared(probe - uv2));
+  BLI_assert(result_ssq >= 0.0f);
+  return sqrtf(result_ssq);
+}
+
+float Occupancy::trace_triangle(const float2 &uv0,
+                                const float2 &uv1,
+                                const float2 &uv2,
+                                const float margin,
+                                const bool write) const
+{
+  const float x0 = min_fff(uv0.x, uv1.x, uv2.x);
+  const float y0 = min_fff(uv0.y, uv1.y, uv2.y);
+  const float x1 = max_fff(uv0.x, uv1.x, uv2.x);
+  const float y1 = max_fff(uv0.y, uv1.y, uv2.y);
+  float spread = write ? margin * 2 : 0.0f;
+  int ix0 = std::max(int(floorf((x0 - spread) * bitmap_scale_reciprocal)), 0);
+  int iy0 = std::max(int(floorf((y0 - spread) * bitmap_scale_reciprocal)), 0);
+  int ix1 = std::min(int(floorf((x1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
+  int iy1 = std::min(int(floorf((y1 + spread) * bitmap_scale_reciprocal + 2)), bitmap_radix);
+
+  const float2 uv0s = uv0 * bitmap_scale_reciprocal;
+  const float2 uv1s = uv1 * bitmap_scale_reciprocal;
+  const float2 uv2s = uv2 * bitmap_scale_reciprocal;
+
+  /* TODO: Better epsilon handling here could reduce search size. */
+  float epsilon = 0.7071f; /* == sqrt(0.5f), rounded up by 0.00002f. */
+  epsilon = std::max(epsilon, 2 * margin * bitmap_scale_reciprocal);
+
+  if (!write) {
+    if (ix0 <= witness_.x && witness_.x < ix1) {
+      if (iy0 <= witness_.y && witness_.y < iy1) {
+        const float distance = signed_distance_fat_triangle(witness_, uv0s, uv1s, uv2s);
+        const float extent = epsilon - distance - witness_distance_;
+        const float pixel_round_off = -0.1f; /* Go faster on nearly-axis aligned edges. */
+        if (extent > pixel_round_off) {
+          return std::max(0.0f, extent); /* Witness observes occupied. */
+        }
+      }
+    }
+  }
+
+  /* Iterate in opposite direction to outer search to improve witness effectiveness. */
+  for (int y = iy1 - 1; y >= iy0; y--) {
+    for (int x = ix1 - 1; x >= ix0; x--) {
+      float *hotspot = &bitmap_[y * bitmap_radix + x];
+      if (!write && *hotspot > epsilon) {
+        continue;
+      }
+      const float2 probe(x, y);
+      const float distance = signed_distance_fat_triangle(probe, uv0s, uv1s, uv2s);
+      if (write) {
+        *hotspot = min_ff(distance, *hotspot);
+        continue;
+      }
+      const float extent = epsilon - distance - *hotspot;
+      if (extent > 0.0f) {
+        witness_ = probe;
+        witness_distance_ = *hotspot;
+        return extent; /* Occupied. */
+      }
+    }
+  }
+  return -1.0f; /* Available. */
+}
+
+float2 PackIsland::get_diagonal_support_d4(const float scale,
+                                           const float rotation,
+                                           const float margin) const
+{
+  if (rotation == 0.0f) {
+    return half_diagonal_ * scale + margin; /* Fast path for common case. */
+  }
+
+  if (rotation == DEG2RADF(180.0f)) {
+    return get_diagonal_support_d4(scale, 0.0f, margin); /* Same as 0.0f */
+  }
+
+  /* TODO: BLI_assert rotation is a "Dihedral Group D4" transform. */
+  float matrix[2][2];
+  build_transformation(scale, rotation, matrix);
+
+  float diagonal_rotated[2];
+  mul_v2_m2v2(diagonal_rotated, matrix, half_diagonal_);
+  return float2(fabsf(diagonal_rotated[0]) + margin, fabsf(diagonal_rotated[1]) + margin);
+}
+
+float2 PackIsland::get_diagonal_support(const float scale,
+                                        const float rotation,
+                                        const float margin) const
+{
+  /* Only "D4" transforms are currently supported. */
+  return get_diagonal_support_d4(scale, rotation, margin);
+}
+
+float Occupancy::trace_island(const PackIsland *island,
+                              const uv_phi phi,
+                              const float scale,
+                              const float margin,
+                              const bool write) const
+{
+  float2 diagonal_support = island->get_diagonal_support(scale, phi.rotation, margin);
+
+  if (!write) {
+    if (phi.translation.x < diagonal_support.x || phi.translation.y < diagonal_support.y) {
+      return terminal; /* Occupied. */
+    }
+  }
+  float matrix[2][2];
+  island->build_transformation(scale, phi.rotation, matrix);
+  float2 pivot_transformed;
+  mul_v2_m2v2(pivot_transformed, matrix, island->pivot_);
+
+  float2 delta = phi.translation - pivot_transformed;
+  uint vert_count = uint(island->triangle_vertices_.size()); /* `uint` is faster than `int`. */
+  for (uint i = 0; i < vert_count; i += 3) {
+    uint j = (i + triangle_hint_) % vert_count;
+    float2 uv0;
+    float2 uv1;
+    float2 uv2;
+    mul_v2_m2v2(uv0, matrix, island->triangle_vertices_[j]);
+    mul_v2_m2v2(uv1, matrix, island->triangle_vertices_[j + 1]);
+    mul_v2_m2v2(uv2, matrix, island->triangle_vertices_[j + 2]);
+    float extent = trace_triangle(uv0 + delta, uv1 + delta, uv2 + delta, margin, write);
+
+    if (!write && extent >= 0.0f) {
+      triangle_hint_ = j;
+      return extent; /* Occupied. */
+    }
+  }
+  return -1.0f; /* Available. */
+}
+
+static uv_phi find_best_fit_for_island(const PackIsland *island,
+                                       const int scan_line,
+                                       Occupancy &occupancy,
+                                       const float scale,
+                                       const int angle_90_multiple,
+                                       const float margin,
+                                       const float target_aspect_y)
+{
+  const float bitmap_scale = 1.0f / occupancy.bitmap_scale_reciprocal;
+
+  const float sqrt_target_aspect_y = sqrtf(target_aspect_y);
+  const int scan_line_x = int(scan_line * sqrt_target_aspect_y);
+  const int scan_line_y = int(scan_line / sqrt_target_aspect_y);
+
+  uv_phi phi;
+  phi.rotation = DEG2RADF(angle_90_multiple * 90);
+  float matrix[2][2];
+  island->build_transformation(scale, phi.rotation, matrix);
+
+  /* Caution, margin is zero for support_diagonal as we're tracking the top-right corner. */
+  float2 support_diagonal = island->get_diagonal_support_d4(scale, phi.rotation, 0.0f);
+
+  /* Scan using an "Alpaca"-style search, first horizontally using "less-than". */
+  int t = int(ceilf((2 * support_diagonal.x + margin) * occupancy.bitmap_scale_reciprocal));
+  while (t < scan_line_x) {
+    phi.translation = float2(t * bitmap_scale, scan_line_y * bitmap_scale) - support_diagonal;
+    const float extent = occupancy.trace_island(island, phi, scale, margin, false);
+    if (extent < 0.0f) {
+      return phi; /* Success. */
+    }
+    t = t + std::max(1, int(extent));
+  }
+
+  /* Then scan vertically using "less-than-or-equal" */
+  t = int(ceilf((2 * support_diagonal.y + margin) * occupancy.bitmap_scale_reciprocal));
+  while (t <= scan_line_y) {
+    phi.translation = float2(scan_line_x * bitmap_scale, t * bitmap_scale) - support_diagonal;
+    const float extent = occupancy.trace_island(island, phi, scale, margin, false);
+    if (extent < 0.0f) {
+      return phi; /* Success. */
+    }
+    t = t + std::max(1, int(extent));
+  }
+
+  return uv_phi(); /* Unable to find a place to fit. */
+}
+
+static float guess_initial_scale(const Span<PackIsland *> islands,
+                                 const float scale,
+                                 const float margin)
+{
+  float sum = 1e-40f;
+  for (int64_t i : islands.index_range()) {
+    PackIsland *island = islands[i];
+    sum += island->half_diagonal_.x * 2 * scale + 2 * margin;
+    sum += island->half_diagonal_.y * 2 * scale + 2 * margin;
+  }
+  return sqrtf(sum) / 6.0f;
+}
+
+/**
+ * Pack irregular islands using the `xatlas` strategy, and optional D4 transforms.
+ *
+ * Loosely based on the 'xatlas' code by Jonathan Young
+ * from https://github.com/jpcy/xatlas
+ *
+ * A brute force packer (BF-Packer) with accelerators:
+ * - Uses a Bitmap Occupancy class.
+ * - Uses a "Witness Pixel" and a "Triangle Hint".
+ * - Write with `margin * 2`, read with `margin == 0`.
+ * - Lazy resetting of BF search.
+ *
+ * Performance would normally be `O(n^4)`, however the occupancy
+ * bitmap_radix is fixed, which gives a reduced time complexity of `O(n^3)`.
+ */
+static void pack_island_xatlas(const Span<UVAABBIsland *> island_indices,
+                               const Span<PackIsland *> islands,
+                               const float scale,
+                               const float margin,
+                               const UVPackIsland_Params &params,
+                               float *r_max_u,
+                               float *r_max_v)
+{
+  Occupancy occupancy(guess_initial_scale(islands, scale, margin));
+  float max_u = 0.0f;
+  float max_v = 0.0f;
+
+  blender::Array<uv_phi> phis(island_indices.size());
+  int scan_line = 0;
+  int i = 0;
+
+  /* The following `while` loop is setting up a three-way race:
+   * `for (scan_line = 0; scan_line < bitmap_radix; scan_line++)`
+   * `for (i : island_indices.index_range())`
+   * `while (bitmap_scale_reciprocal > 0) { bitmap_scale_reciprocal *= 0.5f; }`
+   */
+
+  while (i < island_indices.size()) {
+    PackIsland *island = islands[island_indices[i]->index];
+    uv_phi phi;
+
+    int max_90_multiple = params.rotate && (i < 50) ? 4 : 1;
+    for (int angle_90_multiple = 0; angle_90_multiple < max_90_multiple; angle_90_multiple++) {
+      phi = find_best_fit_for_island(
+          island, scan_line, occupancy, scale, angle_90_multiple, margin, params.target_aspect_y);
+      if (phi.is_valid()) {
+        break;
+      }
+    }
+
+    if (!phi.is_valid()) {
+      /* Unable to find a fit on this scan_line. */
+
+      island = nullptr; /* Just mark it as null, we won't use it further. */
+
+      if (i < 10) {
+        scan_line++;
+      }
+      else {
+        /* Increasing by 2 here has the effect of changing the sampling pattern.
+         * The parameter '2' is not "free" in the sense that changing it requires
+         * a change to `bitmap_radix` and then re-tuning `alpaca_cutoff`.
+         * Possible values here *could* be 1, 2 or 3, however the only *reasonable*
+         * choice is 2. */
+        scan_line += 2;
+      }
+      if (scan_line < occupancy.bitmap_radix *
+                          sqrtf(std::min(params.target_aspect_y, 1.0f / params.target_aspect_y))) {
+        continue; /* Try again on next scan_line. */
+      }
+
+      /* Enlarge search parameters. */
+      scan_line = 0;
+      occupancy.increase_scale();
+
+      /* Redraw already placed islands. (Greedy.) */
+      for (int j = 0; j < i; j++) {
+        occupancy.trace_island(islands[island_indices[j]->index], phis[j], scale, margin, true);
+      }
+      continue;
+    }
+
+    /* Place island. */
+    phis[i] = phi;
+    island->place_(scale, phi);
+    occupancy.trace_island(island, phi, scale, margin, true);
+    i++; /* Next island. */
+
+    /* Update top-right corner. */
+    float2 top_right = island->get_diagonal_support(scale, phi.rotation, margin) + phi.translation;
+    max_u = std::max(top_right.x, max_u);
+    max_v = std::max(top_right.y, max_v);
+
+    /* Heuristics to reduce size of brute-force search. */
+    if (i < 128 || (i & 31) == 16) {
+      scan_line = 0; /* Restart completely. */
+    }
+    else {
+      scan_line = std::max(0, scan_line - 25); /* `-25` must by odd. */
+    }
+  }
+
+  *r_max_u = max_u;
+  *r_max_v = max_v;
+}
+
 /**
 * Pack islands using a mix of other strategies.
 * \param islands: The islands to be packed. Will be modified with results.
@ -1171,15 +1181,31 @@ void PackIsland::build_transformation(const float scale,
  r_matrix[0][1] = -sin_angle * scale * aspect_y;
  r_matrix[1][0] = sin_angle * scale / aspect_y;
  r_matrix[1][1] = cos_angle * scale;
+  /*
+  if (reflect) {
+    r_matrix[0][0] *= -1.0f;
+    r_matrix[0][1] *= -1.0f;
+  }
+  */
 }

 void PackIsland::build_inverse_transformation(const float scale,
                                              const float angle,
                                              float (*r_matrix)[2]) const
 {
-  /* TODO: Generate inverse transform directly. */
-  build_transformation(scale, angle, r_matrix);
-  invert_m2_m2(r_matrix, r_matrix);
+  const float cos_angle = cosf(angle);
+  const float sin_angle = sinf(angle);
+
+  r_matrix[0][0] = cos_angle / scale;
+  r_matrix[0][1] = sin_angle / scale * aspect_y;
+  r_matrix[1][0] = -sin_angle / scale / aspect_y;
+  r_matrix[1][1] = cos_angle / scale;
+  /*
+  if (reflect) {
+    r_matrix[0][0] *= -1.0f;
+    r_matrix[1][0] *= -1.0f;
+  }
+  */
 }

 }  // namespace blender::geometry
--- a/source/blender/gpu/intern/gpu_codegen.cc
+++ b/source/blender/gpu/intern/gpu_codegen.cc
@ -92,6 +92,8 @@ struct GPUPass {
  GPUCodegenCreateInfo *create_info = nullptr;
  /** Orphaned GPUPasses gets freed by the garbage collector. */
  uint refcount;
+  /** The last time the refcount was greater than 0. */
+  int gc_timestamp;
  /** Identity hash generated from all GLSL code. */
  uint32_t hash;
  /** Did we already tried to compile the attached GPUShader. */
@ -909,28 +911,23 @@ void GPU_pass_release(GPUPass *pass)

 void GPU_pass_cache_garbage_collect(void)
 {
-  static int lasttime = 0;
  const int shadercollectrate = 60; /* hardcoded for now. */
  int ctime = int(PIL_check_seconds_timer());

-  if (ctime < shadercollectrate + lasttime) {
-    return;
-  }
-
-  lasttime = ctime;
-
  BLI_spin_lock(&pass_cache_spin);
  GPUPass *next, **prev_pass = &pass_cache;
  for (GPUPass *pass = pass_cache; pass; pass = next) {
    next = pass->next;
-    if (pass->refcount == 0) {
+    if (pass->refcount > 0) {
+      pass->gc_timestamp = ctime;
+    }
+    else if (pass->gc_timestamp + shadercollectrate < ctime) {
      /* Remove from list */
      *prev_pass = next;
      gpu_pass_free(pass);
+      continue;
    }
-    else {
-      prev_pass = &pass->next;
-    }
+    prev_pass = &pass->next;
  }
  BLI_spin_unlock(&pass_cache_spin);
 }
--- a/source/blender/makesrna/intern/rna_space.c
+++ b/source/blender/makesrna/intern/rna_space.c
@ -4551,7 +4551,10 @@ static void rna_def_space_view3d_overlay(BlenderRNA *brna)

  prop = RNA_def_property(srna, "show_retopology", PROP_BOOLEAN, PROP_NONE);
  RNA_def_property_boolean_sdna(prop, NULL, "overlay.edit_flag", V3D_OVERLAY_EDIT_RETOPOLOGY);
-  RNA_def_property_ui_text(prop, "Retopology", "Use retopology display");
+  RNA_def_property_ui_text(prop,
+                           "Retopology",
+                           "Hide the solid mesh and offset the overlay towards the view. "
+                           "Selection is occluded by inactive geometry, unless X-Ray is enabled");
  RNA_def_property_update(prop, NC_SPACE | ND_SPACE_VIEW3D | NS_VIEW3D_SHADING, NULL);

  prop = RNA_def_property(srna, "retopology_offset", PROP_FLOAT, PROP_DISTANCE);
--- a/source/blender/nodes/composite/nodes/node_composite_displace.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_displace.cc
@ -5,9 +5,15 @@
 * \ingroup cmpnodes
 */

+#include "BLI_math_vector.hh"
+
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+
 #include "BLT_translation.h"

 #include "COM_node_operation.hh"
+#include "COM_utilities.hh"

 #include "node_composite_util.hh"

@ -17,14 +23,25 @@ namespace blender::nodes::node_composite_displace_cc {

 static void cmp_node_displace_declare(NodeDeclarationBuilder &b)
 {
-  b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
+  b.add_input<decl::Color>(N_("Image"))
+      .default_value({1.0f, 1.0f, 1.0f, 1.0f})
+      .compositor_domain_priority(0);
  b.add_input<decl::Vector>(N_("Vector"))
      .default_value({1.0f, 1.0f, 1.0f})
      .min(0.0f)
      .max(1.0f)
-      .subtype(PROP_TRANSLATION);
-  b.add_input<decl::Float>(N_("X Scale")).default_value(0.0f).min(-1000.0f).max(1000.0f);
-  b.add_input<decl::Float>(N_("Y Scale")).default_value(0.0f).min(-1000.0f).max(1000.0f);
+      .subtype(PROP_TRANSLATION)
+      .compositor_domain_priority(1);
+  b.add_input<decl::Float>(N_("X Scale"))
+      .default_value(0.0f)
+      .min(-1000.0f)
+      .max(1000.0f)
+      .compositor_domain_priority(2);
+  b.add_input<decl::Float>(N_("Y Scale"))
+      .default_value(0.0f)
+      .min(-1000.0f)
+      .max(1000.0f)
+      .compositor_domain_priority(3);
  b.add_output<decl::Color>(N_("Image"));
 }

@ -36,8 +53,63 @@ class DisplaceOperation : public NodeOperation {

  void execute() override
  {
-    get_input("Image").pass_through(get_result("Image"));
-    context().set_info_message("Viewport compositor setup not fully supported");
+    if (is_identity()) {
+      get_input("Image").pass_through(get_result("Image"));
+      return;
+    }
+
+    GPUShader *shader = shader_manager().get("compositor_displace");
+    GPU_shader_bind(shader);
+
+    const Result &input_image = get_input("Image");
+    GPU_texture_mipmap_mode(input_image.texture(), true, true);
+    GPU_texture_anisotropic_filter(input_image.texture(), true);
+    GPU_texture_extend_mode(input_image.texture(), GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER);
+    input_image.bind_as_texture(shader, "input_tx");
+
+    const Result &input_displacement = get_input("Vector");
+    input_displacement.bind_as_texture(shader, "displacement_tx");
+    const Result &input_x_scale = get_input("X Scale");
+    input_x_scale.bind_as_texture(shader, "x_scale_tx");
+    const Result &input_y_scale = get_input("Y Scale");
+    input_y_scale.bind_as_texture(shader, "y_scale_tx");
+
+    const Domain domain = compute_domain();
+    Result &output_image = get_result("Image");
+    output_image.allocate_texture(domain);
+    output_image.bind_as_image(shader, "output_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    input_image.unbind_as_texture();
+    input_displacement.unbind_as_texture();
+    input_x_scale.unbind_as_texture();
+    input_y_scale.unbind_as_texture();
+    output_image.unbind_as_image();
+    GPU_shader_unbind();
+  }
+
+  bool is_identity()
+  {
+    const Result &input_image = get_input("Image");
+    if (input_image.is_single_value()) {
+      return true;
+    }
+
+    const Result &input_displacement = get_input("Vector");
+    if (input_displacement.is_single_value() &&
+        math::is_zero(input_displacement.get_vector_value())) {
+      return true;
+    }
+
+    const Result &input_x_scale = get_input("X Scale");
+    const Result &input_y_scale = get_input("Y Scale");
+    if (input_x_scale.is_single_value() && input_x_scale.get_float_value() == 0.0f &&
+        input_y_scale.is_single_value() && input_y_scale.get_float_value() == 0.0f) {
+      return true;
+    }
+
+    return false;
  }
 };

@ -57,8 +129,6 @@ void register_node_type_cmp_displace()
  cmp_node_type_base(&ntype, CMP_NODE_DISPLACE, "Displace", NODE_CLASS_DISTORT);
  ntype.declare = file_ns::cmp_node_displace_declare;
  ntype.get_compositor_operation = file_ns::get_compositor_operation;
-  ntype.realtime_compositor_unsupported_message = N_(
-      "Node not supported in the Viewport compositor");

  nodeRegisterType(&ntype);
 }
--- a/source/blender/nodes/composite/nodes/node_composite_glare.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_glare.cc
@ -7,6 +7,7 @@

 #include <array>

+#include "BLI_array.hh"
 #include "BLI_assert.h"
 #include "BLI_index_range.hh"
 #include "BLI_math_base.h"
@ -33,6 +34,7 @@
 #include "node_composite_util.hh"

 #define MAX_GLARE_ITERATIONS 5
+#define MAX_GLARE_SIZE 9

 namespace blender::nodes::node_composite_glare_cc {

@ -131,11 +133,6 @@ class GlareOperation : public NodeOperation {
      return true;
    }

-    /* The fog glow mode is currently unsupported. */
-    if (node_storage(bnode()).type == CMP_NODE_GLARE_FOG_GLOW) {
-      return true;
-    }
-
    return false;
  }

@ -693,11 +690,132 @@ class GlareOperation : public NodeOperation {
   * Fog Glow Glare.
   * --------------- */

-  /* Not yet implemented. Unreachable code due to the is_identity method. */
-  Result execute_fog_glow(Result & /*highlights_result*/)
+  /* Fog glow is computed by first progressively half-downsampling the highlights down to a certain
+   * size, then progressively double-upsampling the last downsampled result up to the original size
+   * of the highlights, adding the downsampled result of the same size in each upsampling step.
+   * This can be illustrated as follows:
+   *
+   *              Highlights  ---+---> Fog Glare
+   *                  |                   |
+   *              Downsampled ---+---> Upsampled
+   *                  |                   |
+   *              Downsampled ---+---> Upsampled
+   *                  |                   |
+   *              Downsampled ---+---> Upsampled
+   *                  |                   ^
+   *                 ...                  |
+   *              Downsampled ------------'
+   *
+   * The smooth downsampling followed by smooth upsampling can be thought of as a cheap way to
+   * approximate a large radius blur, and adding the corresponding downsampled result while
+   * upsampling is done to counter the attenuation that happens during downsampling.
+   *
+   * Smaller downsampled results contribute to larger glare size, so controlling the size can be
+   * done by stopping downsampling down to a certain size, where the maximum possible size is
+   * achieved when downsampling happens down to the smallest size of 2. */
+  Result execute_fog_glow(Result &highlights_result)
  {
-    BLI_assert_unreachable();
-    return Result(ResultType::Color, texture_pool());
+    /* The maximum possible glare size is achieved when we downsampled down to the smallest size of
+     * 2, which would result in a downsampling chain length of the binary logarithm of the smaller
+     * dimension of the size of the highlights.
+     *
+     * However, as users might want a smaller glare size, we reduce the chain length by the halving
+     * count supplied by the user. */
+    const int2 glare_size = get_glare_size();
+    const int smaller_glare_dimension = math::min(glare_size.x, glare_size.y);
+    const int chain_length = int(std::log2(smaller_glare_dimension)) -
+                             compute_fog_glare_size_halving_count();
+
+    Array<Result> downsample_chain = compute_fog_glow_downsample_chain(highlights_result,
+                                                                       chain_length);
+
+    /* Notice that for a chain length of n, we need (n - 1) upsampling passes. */
+    const IndexRange upsample_passes_range(chain_length - 1);
+    GPUShader *shader = shader_manager().get("compositor_glare_fog_glow_upsample");
+    GPU_shader_bind(shader);
+
+    for (const int i : upsample_passes_range) {
+      Result &input = downsample_chain[upsample_passes_range.last() - i + 1];
+      input.bind_as_texture(shader, "input_tx");
+      GPU_texture_filter_mode(input.texture(), true);
+
+      const Result &output = downsample_chain[upsample_passes_range.last() - i];
+      output.bind_as_image(shader, "output_img", true);
+
+      compute_dispatch_threads_at_least(shader, output.domain().size);
+
+      input.unbind_as_texture();
+      output.unbind_as_image();
+      input.release();
+    }
+
+    GPU_shader_unbind();
+
+    return downsample_chain[0];
+  }
+
+  /* Progressively downsample the given result into a result with half the size for the given chain
+   * length, returning an array containing the chain of downsampled results. The first result of
+   * the chain is the given result itself for easier handling. The chain length is expected not
+   * to exceed the binary logarithm of the smaller dimension of the given result, because that
+   * would result in downsampling passes that produce useless textures with just one pixel. */
+  Array<Result> compute_fog_glow_downsample_chain(Result &highlights_result, int chain_length)
+  {
+    const Result downsampled_result = Result::Temporary(ResultType::Color, texture_pool());
+    Array<Result> downsample_chain(chain_length, downsampled_result);
+
+    /* We assign the original highlights result to the first result of the chain to make the code
+     * easier. In turn, the number of passes is one less than the chain length, because the first
+     * result needn't be computed. */
+    downsample_chain[0] = highlights_result;
+    const IndexRange downsample_passes_range(chain_length - 1);
+
+    GPUShader *shader;
+    for (const int i : downsample_passes_range) {
+      /* For the first downsample pass, we use a special "Karis" downsample pass that applies a
+       * form of local tone mapping to reduce the contributions of fireflies, see the shader for
+       * more information. Later passes use a simple average downsampling filter because fireflies
+       * doesn't service the first pass. */
+      if (i == downsample_passes_range.first()) {
+        shader = shader_manager().get("compositor_glare_fog_glow_downsample_karis_average");
+        GPU_shader_bind(shader);
+      }
+      else {
+        shader = shader_manager().get("compositor_glare_fog_glow_downsample_simple_average");
+        GPU_shader_bind(shader);
+      }
+
+      const Result &input = downsample_chain[i];
+      input.bind_as_texture(shader, "input_tx");
+      GPU_texture_filter_mode(input.texture(), true);
+
+      Result &output = downsample_chain[i + 1];
+      output.allocate_texture(input.domain().size / 2);
+      output.bind_as_image(shader, "output_img");
+
+      compute_dispatch_threads_at_least(shader, output.domain().size);
+
+      input.unbind_as_texture();
+      output.unbind_as_image();
+      GPU_shader_unbind();
+    }
+
+    return downsample_chain;
+  }
+
+  /* The fog glow has a maximum possible size when the fog glow size is equal to MAX_GLARE_SIZE and
+   * halves for every unit decrement of the fog glow size. This method computes the number of
+   * halving that should take place, which is simply the difference to MAX_GLARE_SIZE. */
+  int compute_fog_glare_size_halving_count()
+  {
+    return MAX_GLARE_SIZE - get_fog_glow_size();
+  }
+
+  /* The size of the fog glow relative to its maximum possible size, see the
+   * compute_fog_glare_size_halving_count() method for more information. */
+  int get_fog_glow_size()
+  {
+    return node_storage(bnode()).size;
  }

  /* ----------
--- a/source/blender/nodes/composite/nodes/node_composite_map_uv.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_map_uv.cc
@ -7,10 +7,14 @@

 #include "BLT_translation.h"

+#include "GPU_shader.h"
+#include "GPU_texture.h"
+
 #include "UI_interface.h"
 #include "UI_resources.h"

 #include "COM_node_operation.hh"
+#include "COM_utilities.hh"

 #include "node_composite_util.hh"

@ -20,8 +24,14 @@ namespace blender::nodes::node_composite_map_uv_cc {

 static void cmp_node_map_uv_declare(NodeDeclarationBuilder &b)
 {
-  b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
-  b.add_input<decl::Vector>(N_("UV")).default_value({1.0f, 0.0f, 0.0f}).min(0.0f).max(1.0f);
+  b.add_input<decl::Color>(N_("Image"))
+      .default_value({1.0f, 1.0f, 1.0f, 1.0f})
+      .compositor_skip_realization();
+  b.add_input<decl::Vector>(N_("UV"))
+      .default_value({1.0f, 0.0f, 0.0f})
+      .min(0.0f)
+      .max(1.0f)
+      .compositor_domain_priority(0);
  b.add_output<decl::Color>(N_("Image"));
 }

@ -38,8 +48,45 @@ class MapUVOperation : public NodeOperation {

  void execute() override
  {
-    get_input("Image").pass_through(get_result("Image"));
-    context().set_info_message("Viewport compositor setup not fully supported");
+    if (get_input("Image").is_single_value()) {
+      get_input("Image").pass_through(get_result("Image"));
+      return;
+    }
+
+    GPUShader *shader = shader_manager().get("compositor_map_uv");
+    GPU_shader_bind(shader);
+
+    GPU_shader_uniform_1f(
+        shader, "gradient_attenuation_factor", get_gradient_attenuation_factor());
+
+    const Result &input_image = get_input("Image");
+    GPU_texture_mipmap_mode(input_image.texture(), true, true);
+    GPU_texture_anisotropic_filter(input_image.texture(), true);
+    GPU_texture_extend_mode(input_image.texture(), GPU_SAMPLER_EXTEND_MODE_CLAMP_TO_BORDER);
+    input_image.bind_as_texture(shader, "input_tx");
+
+    const Result &input_uv = get_input("UV");
+    input_uv.bind_as_texture(shader, "uv_tx");
+
+    const Domain domain = compute_domain();
+    Result &output_image = get_result("Image");
+    output_image.allocate_texture(domain);
+    output_image.bind_as_image(shader, "output_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    input_image.unbind_as_texture();
+    input_uv.unbind_as_texture();
+    output_image.unbind_as_image();
+    GPU_shader_unbind();
+  }
+
+  /* A factor that controls the attenuation of the result at the pixels where the gradients of the
+   * UV texture are too high, see the shader for more information. The factor ranges between zero
+   * and one, where it has no effect when it is zero and performs full attenuation when it is 1. */
+  float get_gradient_attenuation_factor()
+  {
+    return bnode().custom1 / 100.0f;
  }
 };

@ -60,8 +107,6 @@ void register_node_type_cmp_mapuv()
  ntype.declare = file_ns::cmp_node_map_uv_declare;
  ntype.draw_buttons = file_ns::node_composit_buts_map_uv;
  ntype.get_compositor_operation = file_ns::get_compositor_operation;
-  ntype.realtime_compositor_unsupported_message = N_(
-      "Node not supported in the Viewport compositor");

  nodeRegisterType(&ntype);
 }
--- a/source/blender/nodes/composite/nodes/node_composite_zcombine.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_zcombine.cc
@ -5,12 +5,20 @@
 * \ingroup cmpnodes
 */

+#include "BLI_math_base.hh"
+#include "BLI_math_vector.hh"
+#include "BLI_math_vector_types.hh"
+
 #include "BLT_translation.h"

 #include "UI_interface.h"
 #include "UI_resources.h"

+#include "COM_algorithm_smaa.hh"
 #include "COM_node_operation.hh"
+#include "COM_utilities.hh"
+
+#include "GPU_shader.h"

 #include "node_composite_util.hh"

@ -20,10 +28,22 @@ namespace blender::nodes::node_composite_zcombine_cc {

 static void cmp_node_zcombine_declare(NodeDeclarationBuilder &b)
 {
-  b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
-  b.add_input<decl::Float>(N_("Z")).default_value(1.0f).min(0.0f).max(10000.0f);
-  b.add_input<decl::Color>(N_("Image"), "Image_001").default_value({1.0f, 1.0f, 1.0f, 1.0f});
-  b.add_input<decl::Float>(N_("Z"), "Z_001").default_value(1.0f).min(0.0f).max(10000.0f);
+  b.add_input<decl::Color>(N_("Image"))
+      .default_value({1.0f, 1.0f, 1.0f, 1.0f})
+      .compositor_domain_priority(0);
+  b.add_input<decl::Float>(N_("Z"))
+      .default_value(1.0f)
+      .min(0.0f)
+      .max(10000.0f)
+      .compositor_domain_priority(2);
+  b.add_input<decl::Color>(N_("Image"), "Image_001")
+      .default_value({1.0f, 1.0f, 1.0f, 1.0f})
+      .compositor_domain_priority(1);
+  b.add_input<decl::Float>(N_("Z"), "Z_001")
+      .default_value(1.0f)
+      .min(0.0f)
+      .max(10000.0f)
+      .compositor_domain_priority(3);
  b.add_output<decl::Color>(N_("Image"));
  b.add_output<decl::Float>(N_("Z"));
 }
@ -45,9 +65,171 @@ class ZCombineOperation : public NodeOperation {

  void execute() override
  {
-    get_input("Image").pass_through(get_result("Image"));
-    get_result("Z").allocate_invalid();
-    context().set_info_message("Viewport compositor setup not fully supported");
+    if (compute_domain().size == int2(1)) {
+      execute_single_value();
+    }
+    else if (use_anti_aliasing()) {
+      execute_anti_aliased();
+    }
+    else {
+      execute_simple();
+    }
+  }
+
+  void execute_single_value()
+  {
+    const float4 first_color = get_input("Image").get_color_value();
+    const float4 second_color = get_input("Image_001").get_color_value();
+    const float first_z_value = get_input("Z").get_float_value();
+    const float second_z_value = get_input("Z_001").get_float_value();
+
+    /* Mix between the first and second images using a mask such that the image with the object
+     * closer to the camera is returned. The mask value is then 1, and thus returns the first image
+     * if its Z value is less than that of the second image. Otherwise, its value is 0, and thus
+     * returns the second image. Furthermore, if the object in the first image is closer but has a
+     * non-opaque alpha, then the alpha is used as a mask, but only if Use Alpha is enabled. */
+    const float z_combine_factor = float(first_z_value < second_z_value);
+    const float alpha_factor = use_alpha() ? first_color.w : 1.0f;
+    const float mix_factor = z_combine_factor * alpha_factor;
+
+    Result &combined = get_result("Image");
+    if (combined.should_compute()) {
+      float4 combined_color = math::interpolate(second_color, first_color, mix_factor);
+      /* Use the more opaque alpha from the two images. */
+      combined_color.w = use_alpha() ? math::max(second_color.w, first_color.w) : combined_color.w;
+
+      combined.allocate_single_value();
+      combined.set_color_value(combined_color);
+    }
+
+    Result &combined_z = get_result("Z");
+    if (combined_z.should_compute()) {
+      const float combined_z_value = math::interpolate(second_z_value, first_z_value, mix_factor);
+      combined_z.allocate_single_value();
+      combined_z.set_float_value(combined_z_value);
+    }
+  }
+
+  void execute_simple()
+  {
+    GPUShader *shader = shader_manager().get("compositor_z_combine_simple");
+    GPU_shader_bind(shader);
+
+    GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
+
+    const Result &first = get_input("Image");
+    first.bind_as_texture(shader, "first_tx");
+    const Result &first_z = get_input("Z");
+    first_z.bind_as_texture(shader, "first_z_tx");
+    const Result &second = get_input("Image_001");
+    second.bind_as_texture(shader, "second_tx");
+    const Result &second_z = get_input("Z_001");
+    second_z.bind_as_texture(shader, "second_z_tx");
+
+    Result &combined = get_result("Image");
+    const Domain domain = compute_domain();
+    combined.allocate_texture(domain);
+    combined.bind_as_image(shader, "combined_img");
+
+    Result &combined_z = get_result("Z");
+    combined_z.allocate_texture(domain);
+    combined_z.bind_as_image(shader, "combined_z_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    first.unbind_as_texture();
+    first_z.unbind_as_texture();
+    second.unbind_as_texture();
+    second_z.unbind_as_texture();
+    combined.unbind_as_image();
+    combined_z.unbind_as_image();
+    GPU_shader_unbind();
+  }
+
+  void execute_anti_aliased()
+  {
+    Result mask = compute_mask();
+
+    GPUShader *shader = shader_manager().get("compositor_z_combine_from_mask");
+    GPU_shader_bind(shader);
+
+    GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
+
+    const Result &first = get_input("Image");
+    first.bind_as_texture(shader, "first_tx");
+    const Result &first_z = get_input("Z");
+    first_z.bind_as_texture(shader, "first_z_tx");
+    const Result &second = get_input("Image_001");
+    second.bind_as_texture(shader, "second_tx");
+    const Result &second_z = get_input("Z_001");
+    second_z.bind_as_texture(shader, "second_z_tx");
+    mask.bind_as_texture(shader, "mask_tx");
+
+    Result &combined = get_result("Image");
+    const Domain domain = compute_domain();
+    combined.allocate_texture(domain);
+    combined.bind_as_image(shader, "combined_img");
+
+    Result &combined_z = get_result("Z");
+    combined_z.allocate_texture(domain);
+    combined_z.bind_as_image(shader, "combined_z_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    first.unbind_as_texture();
+    first_z.unbind_as_texture();
+    second.unbind_as_texture();
+    second_z.unbind_as_texture();
+    mask.unbind_as_texture();
+    combined.unbind_as_image();
+    combined_z.unbind_as_image();
+    GPU_shader_unbind();
+
+    mask.release();
+  }
+
+  Result compute_mask()
+  {
+    GPUShader *shader = shader_manager().get("compositor_z_combine_compute_mask");
+    GPU_shader_bind(shader);
+
+    GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
+
+    const Result &first = get_input("Image");
+    first.bind_as_texture(shader, "first_tx");
+    const Result &first_z = get_input("Z");
+    first_z.bind_as_texture(shader, "first_z_tx");
+    const Result &second_z = get_input("Z_001");
+    second_z.bind_as_texture(shader, "second_z_tx");
+
+    const Domain domain = compute_domain();
+    Result mask = Result::Temporary(ResultType::Float, texture_pool());
+    mask.allocate_texture(domain);
+    mask.bind_as_image(shader, "mask_img");
+
+    compute_dispatch_threads_at_least(shader, domain.size);
+
+    first.unbind_as_texture();
+    first_z.unbind_as_texture();
+    second_z.unbind_as_texture();
+    mask.unbind_as_image();
+    GPU_shader_unbind();
+
+    Result anti_aliased_mask = Result::Temporary(ResultType::Float, texture_pool());
+    smaa(context(), mask, anti_aliased_mask);
+    mask.release();
+
+    return anti_aliased_mask;
+  }
+
+  bool use_alpha()
+  {
+    return bnode().custom1 != 0;
+  }
+
+  bool use_anti_aliasing()
+  {
+    return bnode().custom2 == 0;
  }
 };

@ -68,8 +250,6 @@ void register_node_type_cmp_zcombine()
  ntype.declare = file_ns::cmp_node_zcombine_declare;
  ntype.draw_buttons = file_ns::node_composit_buts_zcombine;
  ntype.get_compositor_operation = file_ns::get_compositor_operation;
-  ntype.realtime_compositor_unsupported_message = N_(
-      "Node not supported in the Viewport compositor");

  nodeRegisterType(&ntype);
 }
--- a/source/blender/python/mathutils/mathutils_bvhtree.cc
+++ b/source/blender/python/mathutils/mathutils_bvhtree.cc
@ -1110,12 +1110,6 @@ static PyObject *C_BVHTree_FromObject(PyObject * /*cls*/, PyObject *args, PyObje
  bool use_cage = false;
  bool free_mesh = false;

-  const MLoopTri *lt;
-  const int *corner_verts;
-
-  float(*coords)[3] = nullptr;
-  uint(*tris)[3] = nullptr;
-  uint coords_len, tris_len;
  float epsilon = 0.0f;

  if (!PyArg_ParseTupleAndKeywords(args,
@ -1142,69 +1136,66 @@ static PyObject *C_BVHTree_FromObject(PyObject * /*cls*/, PyObject *args, PyObje
    return nullptr;
  }

+  const blender::Span<int> corner_verts = mesh->corner_verts();
+  const blender::Span<MLoopTri> looptris = mesh->looptris();
+
  /* Get data for tessellation */
-  {
-    lt = BKE_mesh_runtime_looptri_ensure(mesh);

-    tris_len = uint(BKE_mesh_runtime_looptri_len(mesh));
-    coords_len = uint(mesh->totvert);
+  const uint coords_len = uint(mesh->totvert);

-    coords = static_cast<float(*)[3]>(MEM_mallocN(sizeof(*coords) * size_t(coords_len), __func__));
-    tris = static_cast<uint(*)[3]>(MEM_mallocN(sizeof(*tris) * size_t(tris_len), __func__));
-    memcpy(coords, BKE_mesh_vert_positions(mesh), sizeof(float[3]) * size_t(mesh->totvert));
+  float(*coords)[3] = static_cast<float(*)[3]>(
+      MEM_mallocN(sizeof(*coords) * size_t(coords_len), __func__));
+  uint(*tris)[3] = static_cast<uint(*)[3]>(
+      MEM_mallocN(sizeof(*tris) * size_t(looptris.size()), __func__));
+  memcpy(coords, BKE_mesh_vert_positions(mesh), sizeof(float[3]) * size_t(mesh->totvert));

-    corner_verts = BKE_mesh_corner_verts(mesh);
-  }
+  BVHTree *tree;

-  {
-    BVHTree *tree;
-    uint i;
+  int *orig_index = nullptr;
+  blender::float3 *orig_normal = nullptr;

-    int *orig_index = nullptr;
-    blender::float3 *orig_normal = nullptr;
-
-    tree = BLI_bvhtree_new(int(tris_len), epsilon, PY_BVH_TREE_TYPE_DEFAULT, PY_BVH_AXIS_DEFAULT);
-    if (tree) {
-      orig_index = static_cast<int *>(
-          MEM_mallocN(sizeof(*orig_index) * size_t(tris_len), __func__));
-      if (!BKE_mesh_poly_normals_are_dirty(mesh)) {
-        const blender::Span<blender::float3> poly_normals = mesh->poly_normals();
-        orig_normal = static_cast<blender::float3 *>(
-            MEM_malloc_arrayN(size_t(mesh->totpoly), sizeof(blender::float3), __func__));
-        blender::MutableSpan(orig_normal, poly_normals.size()).copy_from(poly_normals);
-      }
-
-      for (i = 0; i < tris_len; i++, lt++) {
-        float co[3][3];
-
-        tris[i][0] = uint(corner_verts[lt->tri[0]]);
-        tris[i][1] = uint(corner_verts[lt->tri[1]]);
-        tris[i][2] = uint(corner_verts[lt->tri[2]]);
-
-        copy_v3_v3(co[0], coords[tris[i][0]]);
-        copy_v3_v3(co[1], coords[tris[i][1]]);
-        copy_v3_v3(co[2], coords[tris[i][2]]);
-
-        BLI_bvhtree_insert(tree, int(i), co[0], 3);
-        orig_index[i] = int(lt->poly);
-      }
-
-      BLI_bvhtree_balance(tree);
+  tree = BLI_bvhtree_new(
+      int(looptris.size()), epsilon, PY_BVH_TREE_TYPE_DEFAULT, PY_BVH_AXIS_DEFAULT);
+  if (tree) {
+    orig_index = static_cast<int *>(
+        MEM_mallocN(sizeof(*orig_index) * size_t(looptris.size()), __func__));
+    if (!BKE_mesh_poly_normals_are_dirty(mesh)) {
+      const blender::Span<blender::float3> poly_normals = mesh->poly_normals();
+      orig_normal = static_cast<blender::float3 *>(
+          MEM_malloc_arrayN(size_t(mesh->totpoly), sizeof(blender::float3), __func__));
+      blender::MutableSpan(orig_normal, poly_normals.size()).copy_from(poly_normals);
    }

-    if (free_mesh) {
-      BKE_id_free(nullptr, mesh);
+    for (const int64_t i : looptris.index_range()) {
+      float co[3][3];
+
+      tris[i][0] = uint(corner_verts[looptris[i].tri[0]]);
+      tris[i][1] = uint(corner_verts[looptris[i].tri[1]]);
+      tris[i][2] = uint(corner_verts[looptris[i].tri[2]]);
+
+      copy_v3_v3(co[0], coords[tris[i][0]]);
+      copy_v3_v3(co[1], coords[tris[i][1]]);
+      copy_v3_v3(co[2], coords[tris[i][2]]);
+
+      BLI_bvhtree_insert(tree, int(i), co[0], 3);
+      orig_index[i] = int(looptris[i].poly);
    }

-    return bvhtree_CreatePyObject(tree,
-                                  epsilon,
-                                  coords,
-                                  coords_len,
-                                  tris,
-                                  tris_len,
-                                  orig_index,
-                                  reinterpret_cast<float(*)[3]>(orig_normal));
+    BLI_bvhtree_balance(tree);
  }
+
+  if (free_mesh) {
+    BKE_id_free(nullptr, mesh);
+  }
+
+  return bvhtree_CreatePyObject(tree,
+                                epsilon,
+                                coords,
+                                coords_len,
+                                tris,
+                                uint(looptris.size()),
+                                orig_index,
+                                reinterpret_cast<float(*)[3]>(orig_normal));
 }
 #endif /* MATH_STANDALONE */