2023-06-29 13:58:01 +02:00
24 changed files with 642 additions and 564 deletions
--- a/scripts/startup/bl_ui/space_node.py
+++ b/scripts/startup/bl_ui/space_node.py
@ -1160,7 +1160,6 @@ classes = (
    NODE_PT_node_color_presets,
    NODE_PT_active_node_generic,
    NODE_PT_active_node_color,
-    NODE_PT_active_node_properties,
    NODE_PT_texture_mapping,
    NODE_PT_active_tool,
    NODE_PT_backdrop,
@ -1174,6 +1173,7 @@ classes = (
    NODE_PT_panels,
    NODE_UL_simulation_zone_items,
    NODE_PT_simulation_zone_items,
+    NODE_PT_active_node_properties,

    node_panel(EEVEE_MATERIAL_PT_settings),
    node_panel(MATERIAL_PT_viewport),
--- a/source/blender/compositor/intern/COM_compositor.cc
+++ b/source/blender/compositor/intern/COM_compositor.cc
@ -80,11 +80,7 @@ void COM_execute(Render *render,
      node_tree->execution_mode == NTREE_EXECUTION_MODE_REALTIME)
  {
    /* Realtime GPU compositor. */
-
-    /* TODO: add persistence and depsgraph updates for better performance. */
-    blender::render::RealtimeCompositor compositor(
-        *render, *scene, *render_data, *node_tree, rendering, view_name);
-    compositor.execute();
+    RE_compositor_execute(*render, *scene, *render_data, *node_tree, rendering, view_name);
  }
  else {
    /* Tiled and Full Frame compositors. */
--- a/source/blender/compositor/realtime_compositor/COM_context.hh
+++ b/source/blender/compositor/realtime_compositor/COM_context.hh
@ -84,7 +84,9 @@ class Context {

  /* Get the texture where the given render pass is stored. This should be called by the Render
   * Layer node to populate its outputs. */
-  virtual GPUTexture *get_input_texture(int view_layer, const char *pass_name) = 0;
+  virtual GPUTexture *get_input_texture(const Scene *scene,
+                                        int view_layer,
+                                        const char *pass_name) = 0;

  /* Get the name of the view currently being rendered. */
  virtual StringRef get_view_name() = 0;
--- a/source/blender/draw/engines/compositor/compositor_engine.cc
+++ b/source/blender/draw/engines/compositor/compositor_engine.cc
@ -163,9 +163,12 @@ class Context : public realtime_compositor::Context {
    return DRW_viewport_texture_list_get()->color;
  }

-  GPUTexture *get_input_texture(int view_layer, const char *pass_name) override
+  GPUTexture *get_input_texture(const Scene *scene, int view_layer, const char *pass_name) override
  {
-    if (view_layer == 0 && STREQ(pass_name, RE_PASSNAME_COMBINED)) {
+    if ((DEG_get_original_id(const_cast<ID *>(&scene->id)) ==
+         DEG_get_original_id(&DRW_context_state_get()->scene->id)) &&
+        view_layer == 0 && STREQ(pass_name, RE_PASSNAME_COMBINED))
+    {
      return get_output_texture();
    }
    else {
--- a/source/blender/draw/intern/draw_manager.c
+++ b/source/blender/draw/intern/draw_manager.c
@ -3187,8 +3187,7 @@ void DRW_render_context_enable(Render *render)
  if (re_system_gpu_context != NULL) {
    DRW_system_gpu_render_context_enable(re_system_gpu_context);
    /* We need to query gpu context after a gl context has been bound. */
-    void *re_blender_gpu_context = NULL;
-    re_blender_gpu_context = RE_blender_gpu_context_get(render);
+    void *re_blender_gpu_context = RE_blender_gpu_context_ensure(render);
    DRW_blender_gpu_render_context_enable(re_blender_gpu_context);
  }
  else {
@ -3208,8 +3207,7 @@ void DRW_render_context_disable(Render *render)
  void *re_system_gpu_context = RE_system_gpu_context_get(render);

  if (re_system_gpu_context != NULL) {
-    void *re_blender_gpu_context = NULL;
-    re_blender_gpu_context = RE_blender_gpu_context_get(render);
+    void *re_blender_gpu_context = RE_blender_gpu_context_ensure(render);
    /* GPU rendering may occur during context disable. */
    DRW_blender_gpu_render_context_disable(re_blender_gpu_context);
    GPU_render_end();
--- a/source/blender/editors/render/render_internal.cc
+++ b/source/blender/editors/render/render_internal.cc
@ -1088,7 +1088,7 @@ static int screen_render_invoke(bContext *C, wmOperator *op, const wmEvent *even
  RE_current_scene_update_cb(re, rj, current_scene_update);
  RE_stats_draw_cb(re, rj, image_renderinfo_cb);
  RE_progress_cb(re, rj, render_progress_update);
-  RE_system_gpu_context_create(re);
+  RE_system_gpu_context_ensure(re);

  rj->re = re;
  G.is_break = false;
--- a/source/blender/editors/space_node/node_edit.cc
+++ b/source/blender/editors/space_node/node_edit.cc
@ -250,7 +250,7 @@ static void compo_initjob(void *cjv)
  }

  cj->re = RE_NewSceneRender(scene);
-  RE_system_gpu_context_create(cj->re);
+  RE_system_gpu_context_ensure(cj->re);
 }

 /* Called before redraw notifiers, it moves finished previews over. */
@ -309,8 +309,6 @@ static void compo_startjob(void *cjv,
    }
  }

-  RE_system_gpu_context_destroy(cj->re);
-
  ntree->runtime->test_break = nullptr;
  ntree->runtime->stats_draw = nullptr;
  ntree->runtime->progress = nullptr;
--- a/source/blender/nodes/composite/nodes/node_composite_image.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_image.cc
@ -824,13 +824,15 @@ class RenderLayerOperation : public NodeOperation {

  void execute() override
  {
+    const Scene *scene = reinterpret_cast<const Scene *>(bnode().id);
    const int view_layer = bnode().custom1;

    Result &image_result = get_result("Image");
    Result &alpha_result = get_result("Alpha");

    if (image_result.should_compute() || alpha_result.should_compute()) {
-      GPUTexture *combined_texture = context().get_input_texture(view_layer, RE_PASSNAME_COMBINED);
+      GPUTexture *combined_texture = context().get_input_texture(
+          scene, view_layer, RE_PASSNAME_COMBINED);
      if (image_result.should_compute()) {
        execute_pass(image_result, combined_texture, "compositor_read_pass_color");
      }
@ -850,7 +852,8 @@ class RenderLayerOperation : public NodeOperation {
        continue;
      }

-      GPUTexture *pass_texture = context().get_input_texture(view_layer, output->identifier);
+      GPUTexture *pass_texture = context().get_input_texture(
+          scene, view_layer, output->identifier);
      if (output->type == SOCK_FLOAT) {
        execute_pass(result, pass_texture, "compositor_read_pass_float");
      }
--- a/source/blender/nodes/geometry/nodes/node_geo_attribute_statistic.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_attribute_statistic.cc
@ -8,6 +8,7 @@
 #include "UI_interface.h"
 #include "UI_resources.h"

+#include "BLI_array_utils.hh"
 #include "BLI_math_base_safe.h"

 #include "NOD_socket_search_link.hh"
@ -215,9 +216,7 @@ static void node_geo_exec(GeoNodeExecParams params)
          data.resize(next_data_index + selection.size());
          MutableSpan<float> selected_data = data.as_mutable_span().slice(next_data_index,
                                                                          selection.size());
-          for (const int i : selection.index_range()) {
-            selected_data[i] = component_data[selection[i]];
-          }
+          array_utils::gather(component_data, selection, selected_data);
        }
      }

@ -295,9 +294,7 @@ static void node_geo_exec(GeoNodeExecParams params)
          data.resize(data.size() + selection.size());
          MutableSpan<float3> selected_data = data.as_mutable_span().slice(next_data_index,
                                                                           selection.size());
-          for (const int i : selection.index_range()) {
-            selected_data[i] = component_data[selection[i]];
-          }
+          array_utils::gather(component_data, selection, selected_data);
        }
      }

--- a/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_duplicate_elements.cc
@ -78,10 +78,7 @@ static OffsetIndices<int> accumulate_counts_to_offsets(const IndexMask &selectio
    r_offset_data.last() = count * selection.size();
  }
  else {
-    threading::parallel_for(selection.index_range(), 1024, [&](const IndexRange range) {
-      counts.materialize_compressed(selection.slice(range),
-                                    r_offset_data.as_mutable_span().slice(range));
-    });
+    array_utils::gather(counts, selection, r_offset_data.as_mutable_span(), 1024);
    offset_indices::accumulate_counts_to_offsets(r_offset_data);
  }
  return OffsetIndices<int>(r_offset_data);
@ -95,10 +92,8 @@ static void threaded_slice_fill(const OffsetIndices<int> offsets,
                                MutableSpan<T> dst)
 {
  BLI_assert(offsets.total_size() == dst.size());
-  threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-    for (const int i : range) {
-      dst.slice(offsets[i]).fill(src[selection[i]]);
-    }
+  selection.foreach_index(GrainSize(512), [&](const int64_t index, const int64_t i) {
+    dst.slice(offsets[i]).fill(src[index]);
  });
 }

@ -231,15 +226,13 @@ static void copy_curve_attributes_without_id(
          using T = decltype(dummy);
          const Span<T> src = attribute.src.typed<T>();
          MutableSpan<T> dst = attribute.dst.span.typed<T>();
-          threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-            for (const int i_selection : range) {
-              const int i_src_curve = selection[i_selection];
-              const Span<T> curve_src = src.slice(src_points_by_curve[i_src_curve]);
-              for (const int i_dst_curve : curve_offsets[i_selection]) {
-                dst.slice(dst_points_by_curve[i_dst_curve]).copy_from(curve_src);
-              }
-            }
-          });
+          selection.foreach_index(
+              GrainSize(512), [&](const int64_t index, const int64_t i_selection) {
+                const Span<T> curve_src = src.slice(src_points_by_curve[index]);
+                for (const int dst_curve_index : curve_offsets[i_selection]) {
+                  dst.slice(dst_points_by_curve[dst_curve_index]).copy_from(curve_src);
+                }
+              });
        });
        break;
      default:
@ -278,17 +271,16 @@ static void copy_stable_id_curves(const bke::CurvesGeometry &src_curves,
  const OffsetIndices src_points_by_curve = src_curves.points_by_curve();
  const OffsetIndices dst_points_by_curve = dst_curves.points_by_curve();

-  threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const int i_src_curve = selection[i_selection];
-      const Span<int> curve_src = src.slice(src_points_by_curve[i_src_curve]);
-      const IndexRange duplicates_range = offsets[i_selection];
-      for (const int i_duplicate : IndexRange(offsets[i_selection].size()).drop_front(1)) {
-        const int i_dst_curve = duplicates_range[i_duplicate];
-        copy_hashed_ids(curve_src, i_duplicate, dst.slice(dst_points_by_curve[i_dst_curve]));
-      }
-    }
-  });
+  selection.foreach_index(
+      GrainSize(512), [&](const int64_t i_src_curve, const int64_t i_selection) {
+        const Span<int> curve_src = src.slice(src_points_by_curve[i_src_curve]);
+        const IndexRange duplicates_range = offsets[i_selection];
+        for (const int i_duplicate : IndexRange(offsets[i_selection].size()).drop_front(1)) {
+          const int i_dst_curve = duplicates_range[i_duplicate];
+          copy_hashed_ids(curve_src, i_duplicate, dst.slice(dst_points_by_curve[i_dst_curve]));
+        }
+      });
+
  dst_attribute.finish();
 }

@ -324,13 +316,14 @@ static void duplicate_curves(GeometrySet &geometry_set,

  int dst_curves_num = 0;
  int dst_points_num = 0;
-  for (const int i_curve : selection.index_range()) {
-    const int count = counts[selection[i_curve]];
+
+  selection.foreach_index_optimized<int>([&](const int index, const int i_curve) {
+    const int count = counts[index];
    curve_offset_data[i_curve] = dst_curves_num;
    point_offset_data[i_curve] = dst_points_num;
    dst_curves_num += count;
-    dst_points_num += count * points_by_curve[selection[i_curve]].size();
-  }
+    dst_points_num += count * points_by_curve[index].size();
+  });

  if (dst_points_num == 0) {
    geometry_set.remove_geometry_during_modify();
@ -348,18 +341,17 @@ static void duplicate_curves(GeometrySet &geometry_set,
  bke::CurvesGeometry &new_curves = new_curves_id->geometry.wrap();
  MutableSpan<int> all_dst_offsets = new_curves.offsets_for_write();

-  threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const int i_src_curve = selection[i_selection];
-      const IndexRange src_curve_range = points_by_curve[i_src_curve];
-      const IndexRange dst_curves_range = curve_offsets[i_selection];
-      MutableSpan<int> dst_offsets = all_dst_offsets.slice(dst_curves_range);
-      for (const int i_duplicate : IndexRange(dst_curves_range.size())) {
-        dst_offsets[i_duplicate] = point_offsets[i_selection].start() +
-                                   src_curve_range.size() * i_duplicate;
-      }
-    }
-  });
+  selection.foreach_index(GrainSize(512),
+                          [&](const int64_t i_src_curve, const int64_t i_selection) {
+                            const IndexRange src_curve_range = points_by_curve[i_src_curve];
+                            const IndexRange dst_curves_range = curve_offsets[i_selection];
+                            MutableSpan<int> dst_offsets = all_dst_offsets.slice(dst_curves_range);
+                            for (const int i_duplicate : IndexRange(dst_curves_range.size())) {
+                              dst_offsets[i_duplicate] = point_offsets[i_selection].start() +
+                                                         src_curve_range.size() * i_duplicate;
+                            }
+                          });
+
  all_dst_offsets.last() = dst_points_num;

  copy_curve_attributes_without_id(curves, selection, curve_offsets, propagation_info, new_curves);
@ -505,12 +497,12 @@ static void duplicate_faces(GeometrySet &geometry_set,
  int total_polys = 0;
  int total_loops = 0;
  Array<int> offset_data(selection.size() + 1);
-  for (const int i_selection : selection.index_range()) {
-    const int count = counts[selection[i_selection]];
+  selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
+    const int count = counts[index];
    offset_data[i_selection] = total_polys;
    total_polys += count;
-    total_loops += count * polys[selection[i_selection]].size();
-  }
+    total_loops += count * polys[index].size();
+  });
  offset_data[selection.size()] = total_polys;

  const OffsetIndices<int> duplicates(offset_data);
@ -527,19 +519,17 @@ static void duplicate_faces(GeometrySet &geometry_set,

  int poly_index = 0;
  int loop_index = 0;
-  for (const int i_selection : selection.index_range()) {
+  selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
    const IndexRange poly_range = duplicates[i_selection];
-
-    const IndexRange source = polys[selection[i_selection]];
-    for ([[maybe_unused]] const int i_duplicate : IndexRange(poly_range.size())) {
+    const IndexRange source = polys[index];
+    for ([[maybe_unused]] const int i_duplicate : poly_range.index_range()) {
      new_poly_offsets[poly_index] = loop_index;
-      for (const int i_loops : IndexRange(source.size())) {
-        const int src_corner = source[i_loops];
+      for (const int src_corner : source) {
        loop_mapping[loop_index] = src_corner;
        vert_mapping[loop_index] = corner_verts[src_corner];
        edge_mapping[loop_index] = corner_edges[src_corner];
        new_edges[loop_index][0] = loop_index;
-        if (i_loops + 1 != source.size()) {
+        if (src_corner != source.last()) {
          new_edges[loop_index][1] = loop_index + 1;
        }
        else {
@ -549,7 +539,7 @@ static void duplicate_faces(GeometrySet &geometry_set,
      }
      poly_index++;
    }
-  }
+  });
  std::iota(new_corner_verts.begin(), new_corner_verts.end(), 0);
  std::iota(new_corner_edges.begin(), new_corner_edges.end(), 0);

@ -647,21 +637,19 @@ static void copy_stable_id_edges(const Mesh &mesh,

  VArraySpan<int> src{src_attribute.varray.typed<int>()};
  MutableSpan<int> dst = dst_attribute.span.typed<int>();
-  threading::parallel_for(IndexRange(selection.size()), 1024, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const IndexRange edge_range = offsets[i_selection];
-      if (edge_range.size() == 0) {
-        continue;
-      }
-      const int2 &edge = edges[selection[i_selection]];
-      const IndexRange vert_range = {edge_range.start() * 2, edge_range.size() * 2};
+  selection.foreach_index(GrainSize(1024), [&](const int64_t index, const int64_t i_selection) {
+    const IndexRange edge_range = offsets[i_selection];
+    if (edge_range.is_empty()) {
+      return;
+    }
+    const int2 &edge = edges[index];
+    const IndexRange vert_range = {edge_range.start() * 2, edge_range.size() * 2};

-      dst[vert_range[0]] = src[edge[0]];
-      dst[vert_range[1]] = src[edge[1]];
-      for (const int i_duplicate : IndexRange(1, edge_range.size() - 1)) {
-        dst[vert_range[i_duplicate * 2]] = noise::hash(src[edge[0]], i_duplicate);
-        dst[vert_range[i_duplicate * 2 + 1]] = noise::hash(src[edge[1]], i_duplicate);
-      }
+    dst[vert_range[0]] = src[edge[0]];
+    dst[vert_range[1]] = src[edge[1]];
+    for (const int i_duplicate : IndexRange(1, edge_range.size() - 1)) {
+      dst[vert_range[i_duplicate * 2]] = noise::hash(src[edge[0]], i_duplicate);
+      dst[vert_range[i_duplicate * 2 + 1]] = noise::hash(src[edge[1]], i_duplicate);
    }
  });
  dst_attribute.finish();
@ -697,16 +685,14 @@ static void duplicate_edges(GeometrySet &geometry_set,
  MutableSpan<int2> new_edges = new_mesh->edges_for_write();

  Array<int> vert_orig_indices(output_edges_num * 2);
-  threading::parallel_for(selection.index_range(), 1024, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const int2 &edge = edges[selection[i_selection]];
-      const IndexRange edge_range = duplicates[i_selection];
-      const IndexRange vert_range(edge_range.start() * 2, edge_range.size() * 2);
+  selection.foreach_index(GrainSize(1024), [&](const int64_t index, const int64_t i_selection) {
+    const int2 &edge = edges[index];
+    const IndexRange edge_range = duplicates[i_selection];
+    const IndexRange vert_range(edge_range.start() * 2, edge_range.size() * 2);

-      for (const int i_duplicate : IndexRange(edge_range.size())) {
-        vert_orig_indices[vert_range[i_duplicate * 2]] = edge[0];
-        vert_orig_indices[vert_range[i_duplicate * 2 + 1]] = edge[1];
-      }
+    for (const int i_duplicate : IndexRange(edge_range.size())) {
+      vert_orig_indices[vert_range[i_duplicate * 2]] = edge[0];
+      vert_orig_indices[vert_range[i_duplicate * 2 + 1]] = edge[1];
    }
  });

@ -794,12 +780,11 @@ static void duplicate_points_curve(GeometrySet &geometry_set,
          using T = decltype(dummy);
          const Span<T> src = attribute.src.typed<T>();
          MutableSpan<T> dst = attribute.dst.span.typed<T>();
-          threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-            for (const int i_selection : range) {
-              const T &src_value = src[point_to_curve_map[selection[i_selection]]];
-              dst.slice(duplicates[i_selection]).fill(src_value);
-            }
-          });
+          selection.foreach_index(GrainSize(512),
+                                  [&](const int64_t index, const int64_t i_selection) {
+                                    const T &src_value = src[point_to_curve_map[index]];
+                                    dst.slice(duplicates[i_selection]).fill(src_value);
+                                  });
        });
        break;
      case ATTR_DOMAIN_POINT:
--- a/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_extrude_mesh.cc
@ -284,9 +284,9 @@ static void extrude_mesh_vertices(Mesh &mesh,
  const IndexRange new_edge_range{orig_edge_size, selection.size()};

  MutableSpan<int2> new_edges = mesh.edges_for_write().slice(new_edge_range);
-  for (const int i_selection : selection.index_range()) {
-    new_edges[i_selection] = int2(selection[i_selection], new_vert_range[i_selection]);
-  }
+  selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
+    new_edges[i_selection] = int2(index, new_vert_range[i_selection]);
+  });

  MutableAttributeAccessor attributes = mesh.attributes_for_write();

@ -309,10 +309,8 @@ static void extrude_mesh_vertices(Mesh &mesh,

  MutableSpan<float3> positions = mesh.vert_positions_for_write();
  MutableSpan<float3> new_positions = positions.slice(new_vert_range);
-  threading::parallel_for(selection.index_range(), 1024, [&](const IndexRange range) {
-    for (const int i : range) {
-      new_positions[i] = positions[selection[i]] + offsets[selection[i]];
-    }
+  selection.foreach_index_optimized<int>(GrainSize(1024), [&](const int index, const int i) {
+    new_positions[i] = positions[index] + offsets[index];
  });

  MutableSpan<int> vert_orig_indices = get_orig_index_layer(mesh, ATTR_DOMAIN_POINT);
@ -504,9 +502,7 @@ static void extrude_mesh_edges(Mesh &mesh,
    duplicate_edges[i] = int2(new_vert_range[i_new_vert_1], new_vert_range[i_new_vert_2]);
  }

-  for (const int i : edge_selection.index_range()) {
-    const int orig_edge_index = edge_selection[i];
-
+  edge_selection.foreach_index([&](const int64_t orig_edge_index, const int64_t i) {
    const int2 &duplicate_edge = duplicate_edges[i];
    const int new_vert_1 = duplicate_edge[0];
    const int new_vert_2 = duplicate_edge[1];
@ -537,7 +533,7 @@ static void extrude_mesh_edges(Mesh &mesh,
                                   connect_edge_range[extrude_index_1],
                                   duplicate_edge_range[i],
                                   connect_edge_range[extrude_index_2]);
-  }
+  });

  /* Create a map of indices in the extruded vertices array to all of the indices of edges
   * in the duplicate edges array that connect to that vertex. This can be used to simplify the
@ -591,58 +587,55 @@ static void extrude_mesh_edges(Mesh &mesh,
          using T = decltype(dummy);
          MutableSpan<T> data = attribute.span.typed<T>();
          MutableSpan<T> new_data = data.slice(new_loop_range);
-          threading::parallel_for(edge_selection.index_range(), 256, [&](const IndexRange range) {
-            for (const int i_edge_selection : range) {
-              const int orig_edge_index = edge_selection[i_edge_selection];
+          edge_selection.foreach_index(
+              GrainSize(256), [&](const int64_t orig_edge_index, const int64_t i_edge_selection) {
+                const Span<int> connected_polys = edge_to_poly_map[orig_edge_index];
+                if (connected_polys.is_empty()) {
+                  /* If there are no connected polygons, there is no corner data to
+                   * interpolate. */
+                  new_data.slice(4 * i_edge_selection, 4).fill(T());
+                  return;
+                }

-              const Span<int> connected_polys = edge_to_poly_map[orig_edge_index];
-              if (connected_polys.is_empty()) {
-                /* If there are no connected polygons, there is no corner data to
-                 * interpolate. */
-                new_data.slice(4 * i_edge_selection, 4).fill(T());
-                continue;
-              }
+                /* Both corners on each vertical edge of the side polygon get the same value,
+                 * so there are only two unique values to mix. */
+                Array<T> side_poly_corner_data(2);
+                bke::attribute_math::DefaultPropagationMixer<T> mixer{side_poly_corner_data};

-              /* Both corners on each vertical edge of the side polygon get the same value,
-               * so there are only two unique values to mix. */
-              Array<T> side_poly_corner_data(2);
-              bke::attribute_math::DefaultPropagationMixer<T> mixer{side_poly_corner_data};
+                const int2 &duplicate_edge = duplicate_edges[i_edge_selection];
+                const int new_vert_1 = duplicate_edge[0];
+                const int new_vert_2 = duplicate_edge[1];
+                const int orig_vert_1 = new_vert_indices[new_vert_1 - orig_vert_size];
+                const int orig_vert_2 = new_vert_indices[new_vert_2 - orig_vert_size];

-              const int2 &duplicate_edge = duplicate_edges[i_edge_selection];
-              const int new_vert_1 = duplicate_edge[0];
-              const int new_vert_2 = duplicate_edge[1];
-              const int orig_vert_1 = new_vert_indices[new_vert_1 - orig_vert_size];
-              const int orig_vert_2 = new_vert_indices[new_vert_2 - orig_vert_size];
-
-              /* Average the corner data from the corners that share a vertex from the
-               * polygons that share an edge with the extruded edge. */
-              for (const int i_connected_poly : connected_polys.index_range()) {
-                const IndexRange connected_poly = polys[connected_polys[i_connected_poly]];
-                for (const int i_loop : IndexRange(connected_poly)) {
-                  if (corner_verts[i_loop] == orig_vert_1) {
-                    mixer.mix_in(0, data[i_loop]);
-                  }
-                  if (corner_verts[i_loop] == orig_vert_2) {
-                    mixer.mix_in(1, data[i_loop]);
+                /* Average the corner data from the corners that share a vertex from the
+                 * polygons that share an edge with the extruded edge. */
+                for (const int i_connected_poly : connected_polys.index_range()) {
+                  const IndexRange connected_poly = polys[connected_polys[i_connected_poly]];
+                  for (const int i_loop : IndexRange(connected_poly)) {
+                    if (corner_verts[i_loop] == orig_vert_1) {
+                      mixer.mix_in(0, data[i_loop]);
+                    }
+                    if (corner_verts[i_loop] == orig_vert_2) {
+                      mixer.mix_in(1, data[i_loop]);
+                    }
                  }
                }
-              }

-              mixer.finalize();
+                mixer.finalize();

-              /* Instead of replicating the order in #fill_quad_consistent_direction here, it's
-               * simpler (though probably slower) to just match the corner data based on the vertex
-               * indices. */
-              for (const int i : IndexRange(4 * i_edge_selection, 4)) {
-                if (ELEM(new_corner_verts[i], new_vert_1, orig_vert_1)) {
-                  new_data[i] = side_poly_corner_data.first();
+                /* Instead of replicating the order in #fill_quad_consistent_direction here, it's
+                 * simpler (though probably slower) to just match the corner data based on the
+                 * vertex indices. */
+                for (const int i : IndexRange(4 * i_edge_selection, 4)) {
+                  if (ELEM(new_corner_verts[i], new_vert_1, orig_vert_1)) {
+                    new_data[i] = side_poly_corner_data.first();
+                  }
+                  else if (ELEM(new_corner_verts[i], new_vert_2, orig_vert_2)) {
+                    new_data[i] = side_poly_corner_data.last();
+                  }
                }
-                else if (ELEM(new_corner_verts[i], new_vert_2, orig_vert_2)) {
-                  new_data[i] = side_poly_corner_data.last();
-                }
-              }
-            }
-          });
+              });
        });
        break;
      }
@ -1143,10 +1136,11 @@ static void extrude_individual_mesh_faces(
   * all polygons. */
  int extrude_corner_size = 0;
  Array<int> group_per_face_data(poly_selection.size() + 1);
-  for (const int i_selection : poly_selection.index_range()) {
+  poly_selection.foreach_index_optimized<int>([&](const int index, const int i_selection) {
    group_per_face_data[i_selection] = extrude_corner_size;
-    extrude_corner_size += orig_polys[poly_selection[i_selection]].size();
-  }
+    extrude_corner_size += orig_polys[index].size();
+  });
+
  group_per_face_data.last() = extrude_corner_size;
  const OffsetIndices<int> group_per_face(group_per_face_data);

@ -1187,54 +1181,53 @@ static void extrude_individual_mesh_faces(
   * separate loops, which may or may not be faster, but would involve more duplication. */
  Array<int> new_vert_indices(extrude_corner_size);
  Array<int> duplicate_edge_indices(extrude_corner_size);
-  threading::parallel_for(poly_selection.index_range(), 256, [&](const IndexRange range) {
-    for (const int i_selection : range) {
-      const IndexRange extrude_range = group_per_face[i_selection];
+  poly_selection.foreach_index(
+      GrainSize(256), [&](const int64_t index, const int64_t i_selection) {
+        const IndexRange extrude_range = group_per_face[i_selection];

-      const IndexRange poly = polys[poly_selection[i_selection]];
-      MutableSpan<int> poly_verts = corner_verts.slice(poly);
-      MutableSpan<int> poly_edges = corner_edges.slice(poly);
+        const IndexRange poly = polys[index];
+        MutableSpan<int> poly_verts = corner_verts.slice(poly);
+        MutableSpan<int> poly_edges = corner_edges.slice(poly);

-      for (const int i : IndexRange(poly.size())) {
-        const int i_extrude = extrude_range[i];
-        new_vert_indices[i_extrude] = poly_verts[i];
-        duplicate_edge_indices[i_extrude] = poly_edges[i];
+        for (const int i : IndexRange(poly.size())) {
+          const int i_extrude = extrude_range[i];
+          new_vert_indices[i_extrude] = poly_verts[i];
+          duplicate_edge_indices[i_extrude] = poly_edges[i];

-        poly_verts[i] = new_vert_range[i_extrude];
-        poly_edges[i] = duplicate_edge_range[i_extrude];
-      }
+          poly_verts[i] = new_vert_range[i_extrude];
+          poly_edges[i] = duplicate_edge_range[i_extrude];
+        }

-      for (const int i : IndexRange(poly.size())) {
-        const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
-        const int i_extrude = extrude_range[i];
-        const int i_extrude_next = extrude_range[i_next];
+        for (const int i : IndexRange(poly.size())) {
+          const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
+          const int i_extrude = extrude_range[i];
+          const int i_extrude_next = extrude_range[i_next];

-        const int i_duplicate_edge = duplicate_edge_range[i_extrude];
-        const int new_vert = new_vert_range[i_extrude];
-        const int new_vert_next = new_vert_range[i_extrude_next];
+          const int i_duplicate_edge = duplicate_edge_range[i_extrude];
+          const int new_vert = new_vert_range[i_extrude];
+          const int new_vert_next = new_vert_range[i_extrude_next];

-        const int orig_edge = duplicate_edge_indices[i_extrude];
+          const int orig_edge = duplicate_edge_indices[i_extrude];

-        const int orig_vert = new_vert_indices[i_extrude];
-        const int orig_vert_next = new_vert_indices[i_extrude_next];
+          const int orig_vert = new_vert_indices[i_extrude];
+          const int orig_vert_next = new_vert_indices[i_extrude_next];

-        duplicate_edges[i_extrude] = int2(new_vert, new_vert_next);
+          duplicate_edges[i_extrude] = int2(new_vert, new_vert_next);

-        MutableSpan<int> side_poly_verts = corner_verts.slice(side_loop_range[i_extrude * 4], 4);
-        MutableSpan<int> side_poly_edges = corner_edges.slice(side_loop_range[i_extrude * 4], 4);
-        side_poly_verts[0] = new_vert_next;
-        side_poly_edges[0] = i_duplicate_edge;
-        side_poly_verts[1] = new_vert;
-        side_poly_edges[1] = connect_edge_range[i_extrude];
-        side_poly_verts[2] = orig_vert;
-        side_poly_edges[2] = orig_edge;
-        side_poly_verts[3] = orig_vert_next;
-        side_poly_edges[3] = connect_edge_range[i_extrude_next];
+          MutableSpan<int> side_poly_verts = corner_verts.slice(side_loop_range[i_extrude * 4], 4);
+          MutableSpan<int> side_poly_edges = corner_edges.slice(side_loop_range[i_extrude * 4], 4);
+          side_poly_verts[0] = new_vert_next;
+          side_poly_edges[0] = i_duplicate_edge;
+          side_poly_verts[1] = new_vert;
+          side_poly_edges[1] = connect_edge_range[i_extrude];
+          side_poly_verts[2] = orig_vert;
+          side_poly_edges[2] = orig_edge;
+          side_poly_verts[3] = orig_vert_next;
+          side_poly_edges[3] = connect_edge_range[i_extrude_next];

-        connect_edges[i_extrude] = int2(orig_vert, new_vert);
-      }
-    }
-  });
+          connect_edges[i_extrude] = int2(orig_vert, new_vert);
+        }
+      });

  MutableAttributeAccessor attributes = mesh.attributes_for_write();

@ -1263,31 +1256,30 @@ static void extrude_individual_mesh_faces(
          using T = decltype(dummy);
          MutableSpan<T> data = attribute.span.typed<T>();
          MutableSpan<T> connect_data = data.slice(connect_edge_range);
-          threading::parallel_for(poly_selection.index_range(), 512, [&](const IndexRange range) {
-            for (const int i_selection : range) {
-              const IndexRange poly = polys[poly_selection[i_selection]];
-              const IndexRange extrude_range = group_per_face[i_selection];
+          poly_selection.foreach_index(
+              GrainSize(512), [&](const int64_t index, const int64_t i_selection) {
+                const IndexRange poly = polys[index];
+                const IndexRange extrude_range = group_per_face[i_selection];

-              /* For the extruded edges, mix the data from the two neighboring original edges of
-               * the extruded polygon. */
-              for (const int i : IndexRange(poly.size())) {
-                const int i_prev = (i == 0) ? poly.size() - 1 : i - 1;
-                const int i_extrude = extrude_range[i];
-                const int i_extrude_prev = extrude_range[i_prev];
+                /* For the extruded edges, mix the data from the two neighboring original edges of
+                 * the extruded polygon. */
+                for (const int i : IndexRange(poly.size())) {
+                  const int i_prev = (i == 0) ? poly.size() - 1 : i - 1;
+                  const int i_extrude = extrude_range[i];
+                  const int i_extrude_prev = extrude_range[i_prev];

-                const int orig_edge = duplicate_edge_indices[i_extrude];
-                const int orig_edge_prev = duplicate_edge_indices[i_extrude_prev];
-                if constexpr (std::is_same_v<T, bool>) {
-                  /* Propagate selections with "or" instead of "at least half". */
-                  connect_data[i_extrude] = data[orig_edge] || data[orig_edge_prev];
+                  const int orig_edge = duplicate_edge_indices[i_extrude];
+                  const int orig_edge_prev = duplicate_edge_indices[i_extrude_prev];
+                  if constexpr (std::is_same_v<T, bool>) {
+                    /* Propagate selections with "or" instead of "at least half". */
+                    connect_data[i_extrude] = data[orig_edge] || data[orig_edge_prev];
+                  }
+                  else {
+                    connect_data[i_extrude] = bke::attribute_math::mix2(
+                        0.5f, data[orig_edge], data[orig_edge_prev]);
+                  }
                }
-                else {
-                  connect_data[i_extrude] = bke::attribute_math::mix2(
-                      0.5f, data[orig_edge], data[orig_edge_prev]);
-                }
-              }
-            }
-          });
+              });
        });
        break;
      }
@ -1297,13 +1289,11 @@ static void extrude_individual_mesh_faces(
          using T = decltype(dummy);
          MutableSpan<T> data = attribute.span.typed<T>();
          MutableSpan<T> new_data = data.slice(side_poly_range);
-          threading::parallel_for(poly_selection.index_range(), 1024, [&](const IndexRange range) {
-            for (const int i_selection : range) {
-              const int poly_index = poly_selection[i_selection];
-              const IndexRange extrude_range = group_per_face[i_selection];
-              new_data.slice(extrude_range).fill(data[poly_index]);
-            }
-          });
+          poly_selection.foreach_index(
+              GrainSize(1024), [&](const int64_t poly_index, const int64_t i_selection) {
+                const IndexRange extrude_range = group_per_face[i_selection];
+                new_data.slice(extrude_range).fill(data[poly_index]);
+              });
        });
        break;
      }
@ -1314,28 +1304,27 @@ static void extrude_individual_mesh_faces(
          using T = decltype(dummy);
          MutableSpan<T> data = attribute.span.typed<T>();
          MutableSpan<T> new_data = data.slice(side_loop_range);
-          threading::parallel_for(poly_selection.index_range(), 256, [&](const IndexRange range) {
-            for (const int i_selection : range) {
-              const IndexRange poly = polys[poly_selection[i_selection]];
-              const Span<T> poly_loop_data = data.slice(poly);
-              const IndexRange extrude_range = group_per_face[i_selection];
+          poly_selection.foreach_index(
+              GrainSize(256), [&](const int64_t index, const int64_t i_selection) {
+                const IndexRange poly = polys[index];
+                const Span<T> poly_loop_data = data.slice(poly);
+                const IndexRange extrude_range = group_per_face[i_selection];

-              for (const int i : IndexRange(poly.size())) {
-                const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
-                const int i_extrude = extrude_range[i];
+                for (const int i : IndexRange(poly.size())) {
+                  const int i_next = (i == poly.size() - 1) ? 0 : i + 1;
+                  const int i_extrude = extrude_range[i];

-                MutableSpan<T> side_loop_data = new_data.slice(i_extrude * 4, 4);
+                  MutableSpan<T> side_loop_data = new_data.slice(i_extrude * 4, 4);

-                /* The two corners on each side of the side polygon get the data from the matching
-                 * corners of the extruded polygon. This order depends on the loop filling the loop
-                 * indices. */
-                side_loop_data[0] = poly_loop_data[i_next];
-                side_loop_data[1] = poly_loop_data[i];
-                side_loop_data[2] = poly_loop_data[i];
-                side_loop_data[3] = poly_loop_data[i_next];
-              }
-            }
-          });
+                  /* The two corners on each side of the side polygon get the data from the
+                   * matching corners of the extruded polygon. This order depends on the loop
+                   * filling the loop indices. */
+                  side_loop_data[0] = poly_loop_data[i_next];
+                  side_loop_data[1] = poly_loop_data[i];
+                  side_loop_data[2] = poly_loop_data[i];
+                  side_loop_data[3] = poly_loop_data[i_next];
+                }
+              });
        });
        break;
      }
@ -1348,14 +1337,13 @@ static void extrude_individual_mesh_faces(
  });

  /* Offset the new vertices. */
-  threading::parallel_for(poly_selection.index_range(), 1024, [&](const IndexRange range) {
-    for (const int i_selection : range) {
-      const IndexRange extrude_range = group_per_face[i_selection];
-      for (float3 &position : new_positions.slice(extrude_range)) {
-        position += poly_offset[poly_selection[i_selection]];
-      }
-    }
-  });
+  poly_selection.foreach_index(GrainSize(1025),
+                               [&](const int64_t index, const int64_t i_selection) {
+                                 const IndexRange extrude_range = group_per_face[i_selection];
+                                 for (float3 &position : new_positions.slice(extrude_range)) {
+                                   position += poly_offset[index];
+                                 }
+                               });

  MutableSpan<int> vert_orig_indices = get_orig_index_layer(mesh, ATTR_DOMAIN_POINT);
  if (!vert_orig_indices.is_empty()) {
@ -1375,13 +1363,11 @@ static void extrude_individual_mesh_faces(
  MutableSpan<int> poly_orig_indices = get_orig_index_layer(mesh, ATTR_DOMAIN_FACE);
  if (!poly_orig_indices.is_empty()) {
    MutableSpan<int> new_poly_orig_indices = poly_orig_indices.slice(side_poly_range);
-    threading::parallel_for(poly_selection.index_range(), 1024, [&](const IndexRange range) {
-      for (const int selection_i : range) {
-        const int poly_i = poly_selection[selection_i];
-        const IndexRange extrude_range = group_per_face[selection_i];
-        new_poly_orig_indices.slice(extrude_range).fill(poly_orig_indices[poly_i]);
-      }
-    });
+    poly_selection.foreach_index(
+        GrainSize(1024), [&](const int64_t poly_i, const int64_t selection_i) {
+          const IndexRange extrude_range = group_per_face[selection_i];
+          new_poly_orig_indices.slice(extrude_range).fill(poly_orig_indices[poly_i]);
+        });
  }

  if (attribute_outputs.top_id) {
--- a/source/blender/nodes/geometry/nodes/node_geo_instance_on_points.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_instance_on_points.cc
@ -112,43 +112,39 @@ static void add_instances_from_component(
  /* Add this reference last, because it is the most likely one to be removed later on. */
  const int empty_reference_handle = dst_component.add_reference(bke::InstanceReference());

-  threading::parallel_for(selection.index_range(), 1024, [&](IndexRange selection_range) {
-    for (const int range_i : selection_range) {
-      const int64_t i = selection[range_i];
+  selection.foreach_index(GrainSize(1024), [&](const int64_t i, const int64_t range_i) {
+    /* Compute base transform for every instances. */
+    float4x4 &dst_transform = dst_transforms[range_i];
+    dst_transform = math::from_loc_rot_scale<float4x4>(
+        positions[i], math::EulerXYZ(rotations[i]), scales[i]);

-      /* Compute base transform for every instances. */
-      float4x4 &dst_transform = dst_transforms[range_i];
-      dst_transform = math::from_loc_rot_scale<float4x4>(
-          positions[i], math::EulerXYZ(rotations[i]), scales[i]);
+    /* Reference that will be used by this new instance. */
+    int dst_handle = empty_reference_handle;

-      /* Reference that will be used by this new instance. */
-      int dst_handle = empty_reference_handle;
+    const bool use_individual_instance = pick_instance[i];
+    if (use_individual_instance) {
+      if (src_instances != nullptr) {
+        const int src_instances_num = src_instances->instances_num();
+        const int original_index = indices[i];
+        /* Use #mod_i instead of `%` to get the desirable wrap around behavior where -1
+         * refers to the last element. */
+        const int index = mod_i(original_index, std::max(src_instances_num, 1));
+        if (index < src_instances_num) {
+          /* Get the reference to the source instance. */
+          const int src_handle = src_instances->reference_handles()[index];
+          dst_handle = handle_mapping[src_handle];

-      const bool use_individual_instance = pick_instance[i];
-      if (use_individual_instance) {
-        if (src_instances != nullptr) {
-          const int src_instances_num = src_instances->instances_num();
-          const int original_index = indices[i];
-          /* Use #mod_i instead of `%` to get the desirable wrap around behavior where -1
-           * refers to the last element. */
-          const int index = mod_i(original_index, std::max(src_instances_num, 1));
-          if (index < src_instances_num) {
-            /* Get the reference to the source instance. */
-            const int src_handle = src_instances->reference_handles()[index];
-            dst_handle = handle_mapping[src_handle];
-
-            /* Take transforms of the source instance into account. */
-            mul_m4_m4_post(dst_transform.ptr(), src_instances->transforms()[index].ptr());
-          }
+          /* Take transforms of the source instance into account. */
+          mul_m4_m4_post(dst_transform.ptr(), src_instances->transforms()[index].ptr());
        }
      }
-      else {
-        /* Use entire source geometry as instance. */
-        dst_handle = full_instance_handle;
-      }
-      /* Set properties of new instance. */
-      dst_handles[range_i] = dst_handle;
    }
+    else {
+      /* Use entire source geometry as instance. */
+      dst_handle = full_instance_handle;
+    }
+    /* Set properties of new instance. */
+    dst_handles[range_i] = dst_handle;
  });

  if (pick_instance.is_single()) {
--- a/source/blender/nodes/geometry/nodes/node_geo_material_selection.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_material_selection.cc
@ -47,12 +47,9 @@ static VArray<bool> select_mesh_faces_by_material(const Mesh &mesh,
  const VArraySpan<int> material_indices_span(material_indices);

  Array<bool> face_selection(face_mask.min_array_size());
-  threading::parallel_for(face_mask.index_range(), 1024, [&](IndexRange range) {
-    for (const int i : range) {
-      const int face_index = face_mask[i];
-      const int slot_i = material_indices_span[face_index];
-      face_selection[face_index] = slots.contains(slot_i);
-    }
+  face_mask.foreach_index_optimized<int>(GrainSize(1024), [&](const int face_index) {
+    const int slot_i = material_indices_span[face_index];
+    face_selection[face_index] = slots.contains(slot_i);
  });

  return VArray<bool>::ForContainer(std::move(face_selection));
--- a/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_offset_corner_in_face.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_mesh_topology_offset_corner_in_face.cc
@ -55,18 +55,16 @@ class OffsetCornerInFaceFieldInput final : public bke::MeshFieldInput {
    Array<int> loop_to_poly_map = bke::mesh::build_loop_to_poly_map(polys);

    Array<int> offset_corners(mask.min_array_size());
-    threading::parallel_for(mask.index_range(), 2048, [&](const IndexRange range) {
-      for (const int selection_i : range) {
-        const int corner_i = corner_indices[selection_i];
-        const int offset = offsets[selection_i];
-        if (!corner_range.contains(corner_i)) {
-          offset_corners[selection_i] = 0;
-          continue;
-        }
-
-        const IndexRange poly = polys[loop_to_poly_map[corner_i]];
-        offset_corners[selection_i] = apply_offset_in_cyclic_range(poly, corner_i, offset);
+    mask.foreach_index_optimized<int>(GrainSize(2048), [&](const int selection_i) {
+      const int corner_i = corner_indices[selection_i];
+      const int offset = offsets[selection_i];
+      if (!corner_range.contains(corner_i)) {
+        offset_corners[selection_i] = 0;
+        return;
      }
+
+      const IndexRange poly = polys[loop_to_poly_map[corner_i]];
+      offset_corners[selection_i] = apply_offset_in_cyclic_range(poly, corner_i, offset);
    });

    return VArray<int>::ForContainer(std::move(offset_corners));
--- a/source/blender/nodes/geometry/nodes/node_geo_proximity.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_proximity.cc
@ -65,24 +65,21 @@ static bool calculate_mesh_proximity(const VArray<float3> &positions,
    return false;
  }

-  threading::parallel_for(mask.index_range(), 512, [&](IndexRange range) {
+  mask.foreach_index(GrainSize(512), [&](const int index) {
    BVHTreeNearest nearest;
    copy_v3_fl(nearest.co, FLT_MAX);
    nearest.index = -1;

-    for (int i : range) {
-      const int index = mask[i];
-      /* Use the distance to the last found point as upper bound to speedup the bvh lookup. */
-      nearest.dist_sq = math::distance_squared(float3(nearest.co), positions[index]);
+    /* Use the distance to the last found point as upper bound to speedup the bvh lookup. */
+    nearest.dist_sq = math::distance_squared(float3(nearest.co), positions[index]);

-      BLI_bvhtree_find_nearest(
-          bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);
+    BLI_bvhtree_find_nearest(
+        bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);

-      if (nearest.dist_sq < r_distances[index]) {
-        r_distances[index] = nearest.dist_sq;
-        if (!r_locations.is_empty()) {
-          r_locations[index] = nearest.co;
-        }
+    if (nearest.dist_sq < r_distances[index]) {
+      r_distances[index] = nearest.dist_sq;
+      if (!r_locations.is_empty()) {
+        r_locations[index] = nearest.co;
      }
    }
  });
@ -103,26 +100,23 @@ static bool calculate_pointcloud_proximity(const VArray<float3> &positions,
    return false;
  }

-  threading::parallel_for(mask.index_range(), 512, [&](IndexRange range) {
+  mask.foreach_index(GrainSize(512), [&](const int index) {
    BVHTreeNearest nearest;
    copy_v3_fl(nearest.co, FLT_MAX);
    nearest.index = -1;

-    for (int i : range) {
-      const int index = mask[i];
-      /* Use the distance to the closest point in the mesh to speedup the pointcloud bvh lookup.
-       * This is ok because we only need to find the closest point in the pointcloud if it's
-       * closer than the mesh. */
-      nearest.dist_sq = r_distances[index];
+    /* Use the distance to the closest point in the mesh to speedup the pointcloud bvh lookup.
+     * This is ok because we only need to find the closest point in the pointcloud if it's
+     * closer than the mesh. */
+    nearest.dist_sq = r_distances[index];

-      BLI_bvhtree_find_nearest(
-          bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);
+    BLI_bvhtree_find_nearest(
+        bvh_data.tree, positions[index], &nearest, bvh_data.nearest_callback, &bvh_data);

-      if (nearest.dist_sq < r_distances[index]) {
-        r_distances[index] = nearest.dist_sq;
-        if (!r_locations.is_empty()) {
-          r_locations[index] = nearest.co;
-        }
+    if (nearest.dist_sq < r_distances[index]) {
+      r_distances[index] = nearest.dist_sq;
+      if (!r_locations.is_empty()) {
+        r_locations[index] = nearest.co;
      }
    }
  });
@ -187,12 +181,8 @@ class ProximityFunction : public mf::MultiFunction {
    }

    if (params.single_output_is_required(2, "Distance")) {
-      threading::parallel_for(mask.index_range(), 2048, [&](IndexRange range) {
-        for (const int i : range) {
-          const int j = mask[i];
-          distances[j] = std::sqrt(distances[j]);
-        }
-      });
+      mask.foreach_index_optimized<int>(
+          GrainSize(2048), [&](const int j) { distances[j] = std::sqrt(distances[j]); });
    }
  }
 };
--- a/source/blender/nodes/geometry/nodes/node_geo_rotate_instances.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_rotate_instances.cc
@ -41,44 +41,41 @@ static void rotate_instances(GeoNodeExecParams &params, bke::Instances &instance

  MutableSpan<float4x4> transforms = instances.transforms();

-  threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const int i = selection[i_selection];
-      const float3 pivot = pivots[i];
-      const float3 euler = rotations[i];
-      float4x4 &instance_transform = transforms[i];
+  selection.foreach_index(GrainSize(512), [&](const int64_t i) {
+    const float3 pivot = pivots[i];
+    const float3 euler = rotations[i];
+    float4x4 &instance_transform = transforms[i];

-      float4x4 rotation_matrix;
-      float3 used_pivot;
+    float4x4 rotation_matrix;
+    float3 used_pivot;

-      if (local_spaces[i]) {
-        /* Find rotation axis from the matrix. This should work even if the instance is skewed. */
-        /* Create rotations around the individual axis. This could be optimized to skip some axis
-         * when the angle is zero. */
-        const float3x3 rotation_x = from_rotation<float3x3>(
-            AxisAngle(normalize(instance_transform.x_axis()), euler.x));
-        const float3x3 rotation_y = from_rotation<float3x3>(
-            AxisAngle(normalize(instance_transform.y_axis()), euler.y));
-        const float3x3 rotation_z = from_rotation<float3x3>(
-            AxisAngle(normalize(instance_transform.z_axis()), euler.z));
+    if (local_spaces[i]) {
+      /* Find rotation axis from the matrix. This should work even if the instance is skewed. */
+      /* Create rotations around the individual axis. This could be optimized to skip some axis
+       * when the angle is zero. */
+      const float3x3 rotation_x = from_rotation<float3x3>(
+          AxisAngle(normalize(instance_transform.x_axis()), euler.x));
+      const float3x3 rotation_y = from_rotation<float3x3>(
+          AxisAngle(normalize(instance_transform.y_axis()), euler.y));
+      const float3x3 rotation_z = from_rotation<float3x3>(
+          AxisAngle(normalize(instance_transform.z_axis()), euler.z));

-        /* Combine the previously computed rotations into the final rotation matrix. */
-        rotation_matrix = float4x4(rotation_z * rotation_y * rotation_x);
+      /* Combine the previously computed rotations into the final rotation matrix. */
+      rotation_matrix = float4x4(rotation_z * rotation_y * rotation_x);

-        /* Transform the passed in pivot into the local space of the instance. */
-        used_pivot = transform_point(instance_transform, pivot);
-      }
-      else {
-        used_pivot = pivot;
-        rotation_matrix = from_rotation<float4x4>(EulerXYZ(euler));
-      }
-      /* Move the pivot to the origin so that we can rotate around it. */
-      instance_transform.location() -= used_pivot;
-      /* Perform the actual rotation. */
-      instance_transform = rotation_matrix * instance_transform;
-      /* Undo the pivot shifting done before. */
-      instance_transform.location() += used_pivot;
+      /* Transform the passed in pivot into the local space of the instance. */
+      used_pivot = transform_point(instance_transform, pivot);
    }
+    else {
+      used_pivot = pivot;
+      rotation_matrix = from_rotation<float4x4>(EulerXYZ(euler));
+    }
+    /* Move the pivot to the origin so that we can rotate around it. */
+    instance_transform.location() -= used_pivot;
+    /* Perform the actual rotation. */
+    instance_transform = rotation_matrix * instance_transform;
+    /* Undo the pivot shifting done before. */
+    instance_transform.location() += used_pivot;
  });
 }

--- a/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_scale_instances.cc
@ -40,24 +40,21 @@ static void scale_instances(GeoNodeExecParams &params, bke::Instances &instances

  MutableSpan<float4x4> transforms = instances.transforms();

-  threading::parallel_for(selection.index_range(), 512, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const int i = selection[i_selection];
-      const float3 pivot = pivots[i];
-      float4x4 &instance_transform = transforms[i];
+  selection.foreach_index(GrainSize(512), [&](const int64_t i) {
+    const float3 pivot = pivots[i];
+    float4x4 &instance_transform = transforms[i];

-      if (local_spaces[i]) {
-        instance_transform *= math::from_location<float4x4>(pivot);
-        rescale_m4(instance_transform.ptr(), scales[i]);
-        instance_transform *= math::from_location<float4x4>(-pivot);
-      }
-      else {
-        const float4x4 original_transform = instance_transform;
-        instance_transform = math::from_location<float4x4>(pivot);
-        rescale_m4(instance_transform.ptr(), scales[i]);
-        instance_transform *= math::from_location<float4x4>(-pivot);
-        instance_transform *= original_transform;
-      }
+    if (local_spaces[i]) {
+      instance_transform *= math::from_location<float4x4>(pivot);
+      rescale_m4(instance_transform.ptr(), scales[i]);
+      instance_transform *= math::from_location<float4x4>(-pivot);
+    }
+    else {
+      const float4x4 original_transform = instance_transform;
+      instance_transform = math::from_location<float4x4>(pivot);
+      rescale_m4(instance_transform.ptr(), scales[i]);
+      instance_transform *= math::from_location<float4x4>(-pivot);
+      instance_transform *= original_transform;
    }
  });
 }
--- a/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc
+++ b/source/blender/nodes/geometry/nodes/node_geo_translate_instances.cc
@ -36,15 +36,12 @@ static void translate_instances(GeoNodeExecParams &params, bke::Instances &insta

  MutableSpan<float4x4> transforms = instances.transforms();

-  threading::parallel_for(selection.index_range(), 1024, [&](IndexRange range) {
-    for (const int i_selection : range) {
-      const int i = selection[i_selection];
-      if (local_spaces[i]) {
-        transforms[i] *= math::from_location<float4x4>(translations[i]);
-      }
-      else {
-        transforms[i].location() += translations[i];
-      }
+  selection.foreach_index(GrainSize(1024), [&](const int64_t i) {
+    if (local_spaces[i]) {
+      transforms[i] *= math::from_location<float4x4>(translations[i]);
+    }
+    else {
+      transforms[i].location() += translations[i];
    }
  });
 }
--- a/source/blender/render/RE_compositor.hh
+++ b/source/blender/render/RE_compositor.hh
@ -12,16 +12,6 @@ struct Render;
 struct RenderData;
 struct Scene;

-namespace blender {
-
-namespace realtime_compositor {
-class Evaluator;
-}
-
-namespace render {
-class Context;
-class TexturePool;
-
 /* ------------------------------------------------------------------------------------------------
 * Render Realtime Compositor
 *
@ -29,31 +19,17 @@ class TexturePool;
 * that is part of the draw manager. The input and output of this is pre-existing RenderResult
 * buffers in scenes, that are uploaded to and read back from the GPU. */

-class RealtimeCompositor {
- private:
-  /* Render instance for GPU context to run compositor in. */
-  Render &render_;
+namespace blender::render {
+class RealtimeCompositor;
+}

-  std::unique_ptr<TexturePool> texture_pool_;
-  std::unique_ptr<Context> context_;
-  std::unique_ptr<realtime_compositor::Evaluator> evaluator_;
+/* Execute compositor. */
+void RE_compositor_execute(Render &render,
+                           const Scene &scene,
+                           const RenderData &render_data,
+                           const bNodeTree &node_tree,
+                           const bool use_file_output,
+                           const char *view_name);

- public:
-  RealtimeCompositor(Render &render,
-                     const Scene &scene,
-                     const RenderData &render_data,
-                     const bNodeTree &node_tree,
-                     const bool use_file_output,
-                     const char *view_name);
-
-  ~RealtimeCompositor();
-
-  /* Evaluate the compositor and output to the scene render result. */
-  void execute();
-
-  /* If the compositor node tree changed, reset the evaluator. */
-  void update(const Depsgraph *depsgraph);
-};
-
-}  // namespace render
-}  // namespace blender
+/* Free compositor caches. */
+void RE_compositor_free(Render &render);
--- a/source/blender/render/RE_pipeline.h
+++ b/source/blender/render/RE_pipeline.h
@ -223,10 +223,15 @@ void RE_FreeAllPersistentData(void);
 void RE_FreePersistentData(const struct Scene *scene);

 /**
- * Free cached GPU textures to reduce memory usage. Before rendering all are cleared
- * and on UI changes when detected they are no longer used.
+ * Free cached GPU textures to reduce memory usage.
 */
-void RE_FreeGPUTextureCaches(const bool only_unused);
+void RE_FreeGPUTextureCaches(void);
+
+/**
+ * Free cached GPU textures, contexts and compositor to reduce memory usage,
+ * when nothing in the UI requires them anymore.
+ */
+void RE_FreeUnusedGPUResources(void);

 /**
 * Get results and statistics.
@ -429,10 +434,12 @@ void RE_current_scene_update_cb(struct Render *re,
                                void *handle,
                                void (*f)(void *handle, struct Scene *scene));

-void RE_system_gpu_context_create(Render *re);
-void RE_system_gpu_context_destroy(Render *re);
+void RE_system_gpu_context_ensure(Render *re);
+void RE_system_gpu_context_free(Render *re);
 void *RE_system_gpu_context_get(Render *re);
-void *RE_blender_gpu_context_get(Render *re);
+
+void *RE_blender_gpu_context_ensure(Render *re);
+void RE_blender_gpu_context_free(Render *re);

 /**
 * \param x: ranges from -1 to 1.
--- a/source/blender/render/intern/compositor.cc
+++ b/source/blender/render/intern/compositor.cc
@ -3,6 +3,7 @@
 * SPDX-License-Identifier: GPL-2.0-or-later */

 #include <cstring>
+#include <string>

 #include "BLI_threads.h"
 #include "BLI_vector.hh"
@ -19,12 +20,16 @@
 #include "IMB_colormanagement.h"
 #include "IMB_imbuf.h"

+#include "DEG_depsgraph_query.h"
+
 #include "COM_context.hh"
 #include "COM_evaluator.hh"

 #include "RE_compositor.hh"
 #include "RE_pipeline.h"

+#include "render_types.h"
+
 namespace blender::render {

 /* Render Texture Pool */
@ -56,16 +61,39 @@ class TexturePool : public realtime_compositor::TexturePool {
  }
 };

-/* Render Context */
+/* Render Context Data
+ *
+ * Stored separately from the context so we can update it without losing any cached
+ * data from the context. */
+
+class ContextInputData {
+ public:
+  const Scene *scene;
+  const RenderData *render_data;
+  const bNodeTree *node_tree;
+  bool use_file_output;
+  std::string view_name;
+
+  ContextInputData(const Scene &scene,
+                   const RenderData &render_data,
+                   const bNodeTree &node_tree,
+                   const bool use_file_output,
+                   const char *view_name)
+      : scene(&scene),
+        render_data(&render_data),
+        node_tree(&node_tree),
+        use_file_output(use_file_output),
+        view_name(view_name)
+  {
+  }
+};
+
+/* Render Context Data */

 class Context : public realtime_compositor::Context {
 private:
  /* Input data. */
-  const Scene &scene_;
-  const RenderData &render_data_;
-  const bNodeTree &node_tree_;
-  const bool use_file_output_;
-  const char *view_name_;
+  ContextInputData input_data_;

  /* Output combined texture. */
  GPUTexture *output_texture_ = nullptr;
@ -77,18 +105,9 @@ class Context : public realtime_compositor::Context {
  TexturePool &render_texture_pool_;

 public:
-  Context(const Scene &scene,
-          const RenderData &render_data,
-          const bNodeTree &node_tree,
-          const bool use_file_output,
-          const char *view_name,
-          TexturePool &texture_pool)
+  Context(const ContextInputData &input_data, TexturePool &texture_pool)
      : realtime_compositor::Context(texture_pool),
-        scene_(scene),
-        render_data_(render_data),
-        node_tree_(node_tree),
-        use_file_output_(use_file_output),
-        view_name_(view_name),
+        input_data_(input_data),
        render_texture_pool_(texture_pool)
  {
  }
@ -99,19 +118,24 @@ class Context : public realtime_compositor::Context {
    GPU_TEXTURE_FREE_SAFE(viewer_output_texture_);
  }

+  void update_input_data(const ContextInputData &input_data)
+  {
+    input_data_ = input_data;
+  }
+
  const Scene &get_scene() const override
  {
-    return scene_;
+    return *input_data_.scene;
  }

  const bNodeTree &get_node_tree() const override
  {
-    return node_tree_;
+    return *input_data_.node_tree;
  }

  bool use_file_output() const override
  {
-    return use_file_output_;
+    return input_data_.use_file_output;
  }

  bool use_composite_output() const override
@ -121,18 +145,18 @@ class Context : public realtime_compositor::Context {

  bool use_texture_color_management() const override
  {
-    return BKE_scene_check_color_management_enabled(&scene_);
+    return BKE_scene_check_color_management_enabled(input_data_.scene);
  }

  const RenderData &get_render_data() const override
  {
-    return render_data_;
+    return *(input_data_.render_data);
  }

  int2 get_render_size() const override
  {
    int width, height;
-    BKE_render_resolution(&render_data_, false, &width, &height);
+    BKE_render_resolution(input_data_.render_data, false, &width, &height);
    return int2(width, height);
  }

@ -182,9 +206,11 @@ class Context : public realtime_compositor::Context {
    return viewer_output_texture_;
  }

-  GPUTexture *get_input_texture(int view_layer_id, const char *pass_name) override
+  GPUTexture *get_input_texture(const Scene *scene,
+                                int view_layer_id,
+                                const char *pass_name) override
  {
-    Render *re = RE_GetSceneRender(&scene_);
+    Render *re = RE_GetSceneRender(scene);
    RenderResult *rr = nullptr;
    GPUTexture *input_texture = nullptr;

@ -193,7 +219,7 @@ class Context : public realtime_compositor::Context {
    }

    if (rr) {
-      ViewLayer *view_layer = (ViewLayer *)BLI_findlink(&scene_.view_layers, view_layer_id);
+      ViewLayer *view_layer = (ViewLayer *)BLI_findlink(&scene->view_layers, view_layer_id);
      if (view_layer) {
        RenderLayer *rl = RE_GetRenderLayer(rr, view_layer->name);
        if (rl) {
@ -223,7 +249,7 @@ class Context : public realtime_compositor::Context {

  StringRef get_view_name() override
  {
-    return view_name_;
+    return input_data_.view_name;
  }

  void set_info_message(StringRef /* message */) const override
@ -247,11 +273,11 @@ class Context : public realtime_compositor::Context {
      return;
    }

-    Render *re = RE_GetSceneRender(&scene_);
+    Render *re = RE_GetSceneRender(input_data_.scene);
    RenderResult *rr = RE_AcquireResultWrite(re);

    if (rr) {
-      RenderView *rv = RE_RenderViewGetByName(rr, view_name_);
+      RenderView *rv = RE_RenderViewGetByName(rr, input_data_.view_name.c_str());

      GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
      float *output_buffer = (float *)GPU_texture_read(output_texture_, GPU_DATA_FLOAT, 0);
@ -286,10 +312,13 @@ class Context : public realtime_compositor::Context {
    Image *image = BKE_image_ensure_viewer(G.main, IMA_TYPE_COMPOSITE, "Viewer Node");

    ImageUser image_user = {0};
-    image_user.multi_index = BKE_scene_multiview_view_id_get(&render_data_, view_name_);
+    image_user.multi_index = BKE_scene_multiview_view_id_get(input_data_.render_data,
+                                                             input_data_.view_name.c_str());

-    if (BKE_scene_multiview_is_render_view_first(&render_data_, view_name_)) {
-      BKE_image_ensure_viewer_views(&render_data_, image, &image_user);
+    if (BKE_scene_multiview_is_render_view_first(input_data_.render_data,
+                                                 input_data_.view_name.c_str()))
+    {
+      BKE_image_ensure_viewer_views(input_data_.render_data, image, &image_user);
    }

    BLI_thread_lock(LOCK_DRAW_IMAGE);
@ -321,53 +350,106 @@ class Context : public realtime_compositor::Context {
    MEM_freeN(output_buffer);

    BKE_image_partial_update_mark_full_update(image);
-    if (node_tree_.runtime->update_draw) {
-      node_tree_.runtime->update_draw(node_tree_.runtime->udh);
+    if (input_data_.node_tree->runtime->update_draw) {
+      input_data_.node_tree->runtime->update_draw(input_data_.node_tree->runtime->udh);
    }
  }
 };

 /* Render Realtime Compositor */

-RealtimeCompositor::RealtimeCompositor(Render &render,
-                                       const Scene &scene,
-                                       const RenderData &render_data,
-                                       const bNodeTree &node_tree,
-                                       const bool use_file_output,
-                                       const char *view_name)
-    : render_(render)
-{
-  /* Create resources with GPU context enabled. */
-  DRW_render_context_enable(&render_);
-  texture_pool_ = std::make_unique<TexturePool>();
-  context_ = std::make_unique<Context>(
-      scene, render_data, node_tree, use_file_output, view_name, *texture_pool_);
-  evaluator_ = std::make_unique<realtime_compositor::Evaluator>(*context_);
-  DRW_render_context_disable(&render_);
-}
+class RealtimeCompositor {
+ private:
+  /* Render instance for GPU context to run compositor in. */
+  Render &render_;

-RealtimeCompositor::~RealtimeCompositor()
-{
-  /* Free resources with GPU context enabled. */
-  DRW_render_context_enable(&render_);
-  evaluator_.reset();
-  context_.reset();
-  texture_pool_.reset();
-  DRW_render_context_disable(&render_);
-}
+  std::unique_ptr<TexturePool> texture_pool_;
+  std::unique_ptr<Context> context_;

-void RealtimeCompositor::execute()
-{
-  DRW_render_context_enable(&render_);
-  evaluator_->evaluate();
-  context_->output_to_render_result();
-  context_->viewer_output_to_viewer_image();
-  DRW_render_context_disable(&render_);
-}
+ public:
+  RealtimeCompositor(Render &render, const ContextInputData &input_data) : render_(render)
+  {
+    BLI_assert(!BLI_thread_is_main());

-void RealtimeCompositor::update(const Depsgraph * /* depsgraph */)
-{
-  /* TODO: implement */
-}
+    /* Create resources with GPU context enabled. */
+    DRW_render_context_enable(&render_);
+    texture_pool_ = std::make_unique<TexturePool>();
+    context_ = std::make_unique<Context>(input_data, *texture_pool_);
+    DRW_render_context_disable(&render_);
+  }
+
+  ~RealtimeCompositor()
+  {
+    /* Free resources with GPU context enabled. Cleanup may happen from the
+     * main thread, and we must use the main context there. */
+    if (BLI_thread_is_main()) {
+      DRW_gpu_context_enable();
+    }
+    else {
+      DRW_render_context_enable(&render_);
+    }
+
+    context_.reset();
+    texture_pool_.reset();
+
+    if (BLI_thread_is_main()) {
+      DRW_gpu_context_disable();
+    }
+    else {
+      DRW_render_context_disable(&render_);
+    }
+  }
+
+  /* Evaluate the compositor and output to the scene render result. */
+  void execute(const ContextInputData &input_data)
+  {
+    BLI_assert(!BLI_thread_is_main());
+
+    DRW_render_context_enable(&render_);
+    context_->update_input_data(input_data);
+
+    /* Always recreate the evaluator, as this only runs on compositing node changes and
+     * there is no reason to cache this. Unlike the viewport where it helps for navigation. */
+    {
+      realtime_compositor::Evaluator evaluator(*context_);
+      evaluator.evaluate();
+    }
+
+    context_->output_to_render_result();
+    context_->viewer_output_to_viewer_image();
+    DRW_render_context_disable(&render_);
+  }
+};

 }  // namespace blender::render
+
+void RE_compositor_execute(Render &render,
+                           const Scene &scene,
+                           const RenderData &render_data,
+                           const bNodeTree &node_tree,
+                           const bool use_file_output,
+                           const char *view_name)
+{
+  BLI_mutex_lock(&render.gpu_compositor_mutex);
+
+  blender::render::ContextInputData input_data(
+      scene, render_data, node_tree, use_file_output, view_name);
+
+  if (render.gpu_compositor == nullptr) {
+    render.gpu_compositor = new blender::render::RealtimeCompositor(render, input_data);
+  }
+
+  render.gpu_compositor->execute(input_data);
+
+  BLI_mutex_unlock(&render.gpu_compositor_mutex);
+}
+
+void RE_compositor_free(Render &render)
+{
+  BLI_mutex_lock(&render.gpu_compositor_mutex);
+  if (render.gpu_compositor) {
+    delete render.gpu_compositor;
+    render.gpu_compositor = nullptr;
+  }
+  BLI_mutex_unlock(&render.gpu_compositor_mutex);
+}
--- a/source/blender/render/intern/pipeline.cc
+++ b/source/blender/render/intern/pipeline.cc
@ -527,6 +527,7 @@ Render *RE_NewRender(const char *name)
    BLI_rw_mutex_init(&re->resultmutex);
    BLI_mutex_init(&re->engine_draw_mutex);
    BLI_mutex_init(&re->highlighted_tiles_mutex);
+    BLI_mutex_init(&re->gpu_compositor_mutex);
  }

  RE_InitRenderCB(re);
@ -586,9 +587,15 @@ void RE_FreeRender(Render *re)
    RE_engine_free(re->engine);
  }

+  RE_compositor_free(*re);
+
+  RE_blender_gpu_context_free(re);
+  RE_system_gpu_context_free(re);
+
  BLI_rw_mutex_end(&re->resultmutex);
  BLI_mutex_end(&re->engine_draw_mutex);
  BLI_mutex_end(&re->highlighted_tiles_mutex);
+  BLI_mutex_end(&re->gpu_compositor_mutex);

  BKE_curvemapping_free_data(&re->r.mblur_shutter_curve);

@ -642,50 +649,86 @@ void RE_FreeAllPersistentData(void)
  }
 }

-void RE_FreeGPUTextureCaches(const bool only_unused)
+static void re_gpu_texture_caches_free(Render *re)
+{
+  /* Free persistent compositor that may be using these textures. */
+  if (re->gpu_compositor) {
+    RE_compositor_free(*re);
+  }
+
+  /* Free textures. */
+  if (re->result_has_gpu_texture_caches) {
+    RenderResult *result = RE_AcquireResultWrite(re);
+    if (result != nullptr) {
+      render_result_free_gpu_texture_caches(result);
+    }
+    re->result_has_gpu_texture_caches = false;
+    RE_ReleaseResult(re);
+  }
+}
+
+void RE_FreeGPUTextureCaches()
 {
  LISTBASE_FOREACH (Render *, re, &RenderGlobal.renderlist) {
-    if (!re->result_has_gpu_texture_caches) {
-      continue;
-    }
+    re_gpu_texture_caches_free(re);
+  }
+}

-    Scene *scene = re->scene;
+void RE_FreeUnusedGPUResources()
+{
+  BLI_assert(BLI_thread_is_main());
+
+  wmWindowManager *wm = static_cast<wmWindowManager *>(G_MAIN->wm.first);
+
+  LISTBASE_FOREACH (Render *, re, &RenderGlobal.renderlist) {
    bool do_free = true;

-    /* Detect if scene is using realtime compositing, and if either a node editor is
-     * showing the nodes, or an image editor is showing the render result or viewer. */
-    if (only_unused && scene && scene->use_nodes && scene->nodetree &&
-        scene->nodetree->execution_mode == NTREE_EXECUTION_MODE_REALTIME)
-    {
-      wmWindowManager *wm = static_cast<wmWindowManager *>(G_MAIN->wm.first);
-      LISTBASE_FOREACH (const wmWindow *, win, &wm->windows) {
-        const bScreen *screen = WM_window_get_active_screen(win);
-        LISTBASE_FOREACH (const ScrArea *, area, &screen->areabase) {
-          const SpaceLink &space = *static_cast<const SpaceLink *>(area->spacedata.first);
+    LISTBASE_FOREACH (const wmWindow *, win, &wm->windows) {
+      const Scene *scene = WM_window_get_active_scene(win);
+      if (re != RE_GetSceneRender(scene)) {
+        continue;
+      }

-          if (space.spacetype == SPACE_NODE) {
-            const SpaceNode &snode = reinterpret_cast<const SpaceNode &>(space);
-            if (snode.nodetree == scene->nodetree) {
-              do_free = false;
-            }
+      /* Don't free if this scene is being rendered or composited. Note there is no
+       * race condition here because we are on the main thread and new jobs can only
+       * be started from the main thread. */
+      if (WM_jobs_test(wm, scene, WM_JOB_TYPE_RENDER) ||
+          WM_jobs_test(wm, scene, WM_JOB_TYPE_COMPOSITE)) {
+        do_free = false;
+        break;
+      }
+
+      /* Detect if scene is using realtime compositing, and if either a node editor is
+       * showing the nodes, or an image editor is showing the render result or viewer. */
+      if (!(scene->use_nodes && scene->nodetree &&
+            scene->nodetree->execution_mode == NTREE_EXECUTION_MODE_REALTIME))
+      {
+        continue;
+      }
+
+      const bScreen *screen = WM_window_get_active_screen(win);
+      LISTBASE_FOREACH (const ScrArea *, area, &screen->areabase) {
+        const SpaceLink &space = *static_cast<const SpaceLink *>(area->spacedata.first);
+
+        if (space.spacetype == SPACE_NODE) {
+          const SpaceNode &snode = reinterpret_cast<const SpaceNode &>(space);
+          if (snode.nodetree == scene->nodetree) {
+            do_free = false;
          }
-          else if (space.spacetype == SPACE_IMAGE) {
-            const SpaceImage &sima = reinterpret_cast<const SpaceImage &>(space);
-            if (sima.image && sima.image->source == IMA_SRC_VIEWER) {
-              do_free = false;
-            }
+        }
+        else if (space.spacetype == SPACE_IMAGE) {
+          const SpaceImage &sima = reinterpret_cast<const SpaceImage &>(space);
+          if (sima.image && sima.image->source == IMA_SRC_VIEWER) {
+            do_free = false;
          }
        }
      }
    }

    if (do_free) {
-      RenderResult *result = RE_AcquireResultWrite(re);
-      if (result != nullptr) {
-        render_result_free_gpu_texture_caches(result);
-      }
-      re->result_has_gpu_texture_caches = false;
-      RE_ReleaseResult(re);
+      re_gpu_texture_caches_free(re);
+      RE_blender_gpu_context_free(re);
+      RE_system_gpu_context_free(re);
    }
  }
 }
@ -921,17 +964,20 @@ void RE_test_break_cb(Render *re, void *handle, bool (*f)(void *handle))
 /** \name GPU Context
 * \{ */

-void RE_system_gpu_context_create(Render *re)
+void RE_system_gpu_context_ensure(Render *re)
 {
-  /* Needs to be created in the main thread. */
-  re->system_gpu_context = WM_system_gpu_context_create();
-  /* So we activate the window's one afterwards. */
-  wm_window_reset_drawable();
+  BLI_assert(BLI_thread_is_main());
+
+  if (re->system_gpu_context == nullptr) {
+    /* Needs to be created in the main thread. */
+    re->system_gpu_context = WM_system_gpu_context_create();
+    /* So we activate the window's one afterwards. */
+    wm_window_reset_drawable();
+  }
 }

-void RE_system_gpu_context_destroy(Render *re)
+void RE_system_gpu_context_free(Render *re)
 {
-  /* Needs to be called from the thread which used the GPU context for rendering. */
  if (re->system_gpu_context) {
    if (re->blender_gpu_context) {
      WM_system_gpu_context_activate(re->system_gpu_context);
@ -942,6 +988,11 @@ void RE_system_gpu_context_destroy(Render *re)

    WM_system_gpu_context_dispose(re->system_gpu_context);
    re->system_gpu_context = nullptr;
+
+    /* If in main thread, reset window context. */
+    if (BLI_thread_is_main()) {
+      wm_window_reset_drawable();
+    }
  }
 }

@ -950,7 +1001,7 @@ void *RE_system_gpu_context_get(Render *re)
  return re->system_gpu_context;
 }

-void *RE_blender_gpu_context_get(Render *re)
+void *RE_blender_gpu_context_ensure(Render *re)
 {
  if (re->blender_gpu_context == nullptr) {
    re->blender_gpu_context = GPU_context_create(nullptr, re->system_gpu_context);
@ -958,6 +1009,16 @@ void *RE_blender_gpu_context_get(Render *re)
  return re->blender_gpu_context;
 }

+void RE_blender_gpu_context_free(Render *re)
+{
+  if (re->blender_gpu_context) {
+    WM_system_gpu_context_activate(re->system_gpu_context);
+    GPU_context_active_set(static_cast<GPUContext *>(re->blender_gpu_context));
+    GPU_context_discard(static_cast<GPUContext *>(re->blender_gpu_context));
+    re->blender_gpu_context = nullptr;
+  }
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -1787,13 +1848,20 @@ static void render_pipeline_free(Render *re)
    RE_engine_free(re->engine);
    re->engine = nullptr;
  }
+
+  /* Destroy compositor that was using pipeline depsgraph. */
+  RE_compositor_free(*re);
+
+  /* Destroy pipeline depsgraph. */
  if (re->pipeline_depsgraph != nullptr) {
    DEG_graph_free(re->pipeline_depsgraph);
    re->pipeline_depsgraph = nullptr;
    re->pipeline_scene_eval = nullptr;
  }
+
  /* Destroy the opengl context in the correct thread. */
-  RE_system_gpu_context_destroy(re);
+  RE_blender_gpu_context_free(re);
+  RE_system_gpu_context_free(re);

  /* In the case the engine did not mark tiles as finished (un-highlight, which could happen in
   * the case of cancelled render) ensure the storage is empty. */
@ -1839,7 +1907,7 @@ void RE_RenderFrame(Render *re,
    render_callback_exec_id(re, re->main, &scene->id, BKE_CB_EVT_RENDER_PRE);

    /* Reduce GPU memory usage so renderer has more space. */
-    RE_FreeGPUTextureCaches(false);
+    RE_FreeGPUTextureCaches();

    render_init_depsgraph(re);

@ -2244,7 +2312,7 @@ void RE_RenderAnim(Render *re,
    char filepath[FILE_MAX];

    /* Reduce GPU memory usage so renderer has more space. */
-    RE_FreeGPUTextureCaches(false);
+    RE_FreeGPUTextureCaches();

    /* A feedback loop exists here -- render initialization requires updated
     * render layers settings which could be animated, but scene evaluation for
--- a/source/blender/render/intern/render_types.h
+++ b/source/blender/render/intern/render_types.h
@ -16,6 +16,7 @@

 #include "BLI_threads.h"

+#include "RE_compositor.hh"
 #include "RE_pipeline.h"

 struct Depsgraph;
@ -94,6 +95,10 @@ struct Render {
  struct Depsgraph *pipeline_depsgraph;
  Scene *pipeline_scene_eval;

+  /* Realtime GPU Compositor. */
+  blender::render::RealtimeCompositor *gpu_compositor;
+  ThreadMutex gpu_compositor_mutex;
+
  /* callbacks */
  void (*display_init)(void *handle, RenderResult *rr);
  void *dih;
--- a/source/blender/windowmanager/intern/wm_event_system.cc
+++ b/source/blender/windowmanager/intern/wm_event_system.cc
@ -718,7 +718,7 @@ void wm_event_do_notifiers(bContext *C)

  wm_event_do_refresh_wm_and_depsgraph(C);

-  RE_FreeGPUTextureCaches(true);
+  RE_FreeUnusedGPUResources();

  /* Status bar. */
  if (wm->winactive) {