Don't make a local variable to load the WorldClipPlanes.

Use GPU_SHADER; not GPU_VULKAN.
Vulkan: add cube map array.
2022-12-12 12:21:05 +01:00 · 2022-12-12 12:20:23 +01:00 · 2022-12-12 11:06:04 +01:00 · 2022-12-12 09:47:04 +01:00 · 2022-12-12 09:29:27 +01:00 · 2022-12-11 23:28:01 -06:00
237 changed files with 5638 additions and 2349 deletions
--- a/build_files/build_environment/cmake/boost.cmake
+++ b/build_files/build_environment/cmake/boost.cmake
@@ -23,13 +23,11 @@ elseif(APPLE)
  set(BOOST_BUILD_COMMAND ./b2)
  set(BOOST_BUILD_OPTIONS toolset=clang-darwin cxxflags=${PLATFORM_CXXFLAGS} linkflags=${PLATFORM_LDFLAGS} visibility=global --disable-icu boost.locale.icu=off)
  set(BOOST_HARVEST_CMD echo .)
-  set(BOOST_PATCH_COMMAND echo .)
 else()
  set(BOOST_HARVEST_CMD echo .)
  set(BOOST_CONFIGURE_COMMAND ./bootstrap.sh)
  set(BOOST_BUILD_COMMAND ./b2)
  set(BOOST_BUILD_OPTIONS cxxflags=${PLATFORM_CXXFLAGS} --disable-icu boost.locale.icu=off)
-  set(BOOST_PATCH_COMMAND echo .)
 endif()

 set(JAM_FILE ${BUILD_DIR}/boost.user-config.jam)
@@ -72,7 +70,7 @@ ExternalProject_Add(external_boost
  URL_HASH ${BOOST_HASH_TYPE}=${BOOST_HASH}
  PREFIX ${BUILD_DIR}/boost
  UPDATE_COMMAND  ""
-  PATCH_COMMAND ${BOOST_PATCH_COMMAND}
+  PATCH_COMMAND ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/boost/src/external_boost < ${PATCH_DIR}/boost.diff
  CONFIGURE_COMMAND ${BOOST_CONFIGURE_COMMAND}
  BUILD_COMMAND ${BOOST_BUILD_COMMAND} ${BOOST_BUILD_OPTIONS} -j${MAKE_THREADS} architecture=${BOOST_ARCHITECTURE} address-model=${BOOST_ADDRESS_MODEL} link=shared threading=multi ${BOOST_OPTIONS}    --prefix=${LIBDIR}/boost install
  BUILD_IN_SOURCE 1
--- a/build_files/build_environment/cmake/harvest.cmake
+++ b/build_files/build_environment/cmake/harvest.cmake
@@ -63,6 +63,8 @@ endfunction()
 # Ideally this would be done as part of the Blender build since it makes assumptions
 # about where the files will be installed. However it would add patchelf as a new
 # dependency for building.
+#
+# Also removes versioned symlinks, which give errors with macOS notarization.
 if(APPLE)
  set(set_rpath_cmd python3 ${CMAKE_CURRENT_SOURCE_DIR}/darwin/set_rpath.py @loader_path)
 else()
@@ -76,7 +78,11 @@ function(harvest_rpath_lib from to pattern)
    cmake_policy(SET CMP0009 NEW)\n
    file(GLOB_RECURSE shared_libs ${HARVEST_TARGET}/${to}/${pattern}) \n
    foreach(f \${shared_libs}) \n
-      if(NOT IS_SYMLINK \${f})\n
+      if(IS_SYMLINK \${f})\n
+        if(APPLE)\n
+          file(REMOVE_RECURSE \${f})
+        endif()\n
+      else()\n
        execute_process(COMMAND ${set_rpath_cmd} \${f}) \n
      endif()\n
    endforeach()")
@@ -101,15 +107,21 @@ function(harvest_rpath_python from to pattern)
  install(CODE "\
    file(GLOB_RECURSE shared_libs ${HARVEST_TARGET}/${to}/${pattern}\.so*) \n
    foreach(f \${shared_libs}) \n
-      get_filename_component(f_dir \${f} DIRECTORY) \n
-      file(RELATIVE_PATH relative_dir \${f_dir} ${HARVEST_TARGET}) \n
-      execute_process(COMMAND ${set_rpath_cmd}/\${relative_dir}../lib \${f}) \n
+      if(IS_SYMLINK \${f})\n
+        if(APPLE)\n
+          file(REMOVE_RECURSE \${f})
+        endif()\n
+      else()\n
+        get_filename_component(f_dir \${f} DIRECTORY) \n
+        file(RELATIVE_PATH relative_dir \${f_dir} ${HARVEST_TARGET}) \n
+        execute_process(COMMAND ${set_rpath_cmd}/\${relative_dir}../lib \${f}) \n
+      endif()\n
    endforeach()")
 endfunction()

 harvest(alembic/include alembic/include "*.h")
 harvest(alembic/lib/libAlembic.a alembic/lib/libAlembic.a)
-harvest(alembic/bin alembic/bin "*")
+harvest_rpath_bin(alembic/bin alembic/bin "*")
 harvest(brotli/include brotli/include "*.h")
 harvest(brotli/lib brotli/lib "*.a")
 harvest(boost/include boost/include "*")
@@ -151,7 +163,7 @@ harvest(llvm/lib llvm/lib "libLLVM*.a")
 harvest(llvm/lib llvm/lib "libclang*.a")
 harvest(llvm/lib/clang llvm/lib/clang "*.h")
 if(APPLE)
-  harvest(openmp/lib openmp/lib "*")
+  harvest(openmp/lib openmp/lib "libomp.dylib")
  harvest(openmp/include openmp/include "*.h")
 endif()
 if(BLENDER_PLATFORM_ARM)
@@ -206,7 +218,7 @@ harvest_rpath_lib(openvdb/lib openvdb/lib "*${SHAREDLIBEXT}*")
 harvest_rpath_python(openvdb/lib/python${PYTHON_SHORT_VERSION} python/lib/python${PYTHON_SHORT_VERSION} "*pyopenvdb*")
 harvest(xr_openxr_sdk/include/openxr xr_openxr_sdk/include/openxr "*.h")
 harvest(xr_openxr_sdk/lib xr_openxr_sdk/lib "*.a")
-harvest(osl/bin osl/bin "oslc")
+harvest_rpath_bin(osl/bin osl/bin "oslc")
 harvest(osl/include osl/include "*.h")
 harvest(osl/lib osl/lib "*.a")
 harvest(osl/share/OSL/shaders osl/share/OSL/shaders "*.h")
@@ -242,9 +254,8 @@ harvest(usd/lib/usd usd/lib/usd "*")
 harvest_rpath_python(usd/lib/python/pxr python/lib/python${PYTHON_SHORT_VERSION}/site-packages/pxr "*")
 harvest(usd/plugin usd/plugin "*")
 harvest(materialx/include materialx/include "*.h")
-harvest(materialx/lib materialx/lib "*")
+harvest_rpath_lib(materialx/lib materialx/lib "*${SHAREDLIBEXT}*")
 harvest(materialx/libraries materialx/libraries "*")
-harvest(materialx/python materialx/python "*")
 harvest(materialx/lib/cmake/MaterialX materialx/lib/cmake/MaterialX "*.cmake")
 harvest_rpath_python(materialx/python/MaterialX python/lib/python${PYTHON_SHORT_VERSION}/site-packages/MaterialX "*")
 # We do not need anything from the resources folder, but the MaterialX config
--- a/build_files/build_environment/cmake/python.cmake
+++ b/build_files/build_environment/cmake/python.cmake
@@ -32,13 +32,11 @@ if(WIN32)
    # Python will download its own deps and there's very little we can do about
    # that beyond placing some code in their externals dir before it tries.
    # the foldernames *HAVE* to match the ones inside pythons get_externals.cmd.
-    # python 3.10.8 still ships zlib 1.2.12, replace it with our 1.2.13
-    # copy until they update. Same rules apply to openssl foldernames HAVE to match
    # regardless of the version actually in there.
    PATCH_COMMAND mkdir ${PYTHON_EXTERNALS_FOLDER_DOS} &&
-      mklink /J ${PYTHON_EXTERNALS_FOLDER_DOS}\\zlib-1.2.12 ${ZLIB_SOURCE_FOLDER_DOS} &&
+      mklink /J ${PYTHON_EXTERNALS_FOLDER_DOS}\\zlib-1.2.13 ${ZLIB_SOURCE_FOLDER_DOS} &&
      mklink /J ${PYTHON_EXTERNALS_FOLDER_DOS}\\openssl-1.1.1q ${SSL_SOURCE_FOLDER_DOS} &&
-      ${CMAKE_COMMAND} -E copy ${ZLIB_SOURCE_FOLDER}/../external_zlib-build/zconf.h ${PYTHON_EXTERNALS_FOLDER}/zlib-1.2.12/zconf.h &&
+      ${CMAKE_COMMAND} -E copy ${ZLIB_SOURCE_FOLDER}/../external_zlib-build/zconf.h ${PYTHON_EXTERNALS_FOLDER}/zlib-1.2.13/zconf.h &&
      ${PATCH_CMD} --verbose -p1 -d ${BUILD_DIR}/python/src/external_python < ${PATCH_DIR}/python_windows.diff
    CONFIGURE_COMMAND echo "."
    BUILD_COMMAND ${CONFIGURE_ENV_MSVC} && cd ${BUILD_DIR}/python/src/external_python/pcbuild/ && set IncludeTkinter=false && set LDFLAGS=/DEBUG && call prepare_ssl.bat && call build.bat -e -p x64 -c ${BUILD_MODE}
--- a/build_files/build_environment/cmake/python_site_packages.cmake
+++ b/build_files/build_environment/cmake/python_site_packages.cmake
@@ -15,7 +15,9 @@ ExternalProject_Add(external_python_site_packages
  CONFIGURE_COMMAND ${PIP_CONFIGURE_COMMAND}
  BUILD_COMMAND ""
  PREFIX ${BUILD_DIR}/site_packages
-  INSTALL_COMMAND ${PYTHON_BINARY} -m pip install --no-cache-dir ${SITE_PACKAGES_EXTRA} cython==${CYTHON_VERSION} idna==${IDNA_VERSION} charset-normalizer==${CHARSET_NORMALIZER_VERSION} urllib3==${URLLIB3_VERSION} certifi==${CERTIFI_VERSION} requests==${REQUESTS_VERSION} zstandard==${ZSTANDARD_VERSION} autopep8==${AUTOPEP8_VERSION} pycodestyle==${PYCODESTYLE_VERSION} toml==${TOML_VERSION} meson==${MESON_VERSION} --no-binary :all:
+  # setuptools is downgraded to 63.2.0 (same as python 3.10.8) since numpy 1.23.x seemingly has 
+  # issues building on windows with the newer versions that ships with python 3.10.9+
+  INSTALL_COMMAND ${PYTHON_BINARY} -m pip install --no-cache-dir ${SITE_PACKAGES_EXTRA} setuptools==63.2.0 cython==${CYTHON_VERSION} idna==${IDNA_VERSION} charset-normalizer==${CHARSET_NORMALIZER_VERSION} urllib3==${URLLIB3_VERSION} certifi==${CERTIFI_VERSION} requests==${REQUESTS_VERSION} zstandard==${ZSTANDARD_VERSION} autopep8==${AUTOPEP8_VERSION} pycodestyle==${PYCODESTYLE_VERSION} toml==${TOML_VERSION} meson==${MESON_VERSION} --no-binary :all:
 )

 if(USE_PIP_NUMPY)
--- a/build_files/build_environment/cmake/versions.cmake
+++ b/build_files/build_environment/cmake/versions.cmake
@@ -201,6 +201,11 @@ set(OSL_HASH 53211da86c34ba6e0344998c1a6d219c)
 set(OSL_HASH_TYPE MD5)
 set(OSL_FILE OpenShadingLanguage-${OSL_VERSION}.tar.gz)

+# NOTE: When updating the python version, it's required to check the versions of
+# it wants to use in PCbuild/get_externals.bat for the following dependencies:
+# BZIP2, FFI, SQLITE and change the versions in this file as well. For compliance 
+# reasons there can be no exceptions to this.
+
 set(PYTHON_VERSION 3.10.9)
 set(PYTHON_SHORT_VERSION 3.10)
 set(PYTHON_SHORT_VERSION_NO_DOTS 310)
@@ -240,10 +245,10 @@ set(PYCODESTYLE_VERSION 2.8.0)
 set(TOML_VERSION 0.10.2)
 set(MESON_VERSION 0.63.0)

-set(NUMPY_VERSION 1.23.2)
+set(NUMPY_VERSION 1.23.5)
 set(NUMPY_SHORT_VERSION 1.23)
 set(NUMPY_URI https://github.com/numpy/numpy/releases/download/v${NUMPY_VERSION}/numpy-${NUMPY_VERSION}.tar.gz)
-set(NUMPY_HASH 9bf2a361509797de14ceee607387fe0f)
+set(NUMPY_HASH 8b2692a511a3795f3af8af2cd7566a15)
 set(NUMPY_HASH_TYPE MD5)
 set(NUMPY_FILE numpy-${NUMPY_VERSION}.tar.gz)
 set(NUMPY_CPE "cpe:2.3:a:numpy:numpy:${NUMPY_VERSION}:*:*:*:*:*:*:*")
@@ -437,9 +442,7 @@ set(LZMA_HASH 5117f930900b341493827d63aa910ff5e011e0b994197c3b71c08a20228a42df)
 set(LZMA_HASH_TYPE SHA256)
 set(LZMA_FILE xz-${LZMA_VERSION}.tar.bz2)

-# NOTE: This will *HAVE* to match the version python ships on windows which
-# is hardcoded in pythons PCbuild/get_externals.bat. For compliance reasons there
-# can be no exceptions to this.
+# NOTE: Python's build has been modified to use our ssl version.
 set(SSL_VERSION 1.1.1q)
 set(SSL_URI https://www.openssl.org/source/openssl-${SSL_VERSION}.tar.gz)
 set(SSL_HASH d7939ce614029cdff0b6c20f0e2e5703158a489a72b2507b8bd51bf8c8fd10ca)
@@ -450,10 +453,10 @@ set(SSL_CPE "cpe:2.3:a:openssl:openssl:${SSL_VERSION}:*:*:*:*:*:*:*")
 # Note: This will *HAVE* to match the version python ships on windows which
 # is hardcoded in pythons PCbuild/get_externals.bat for compliance reasons there
 # can be no exceptions to this.
-set(SQLITE_VERSION 3.37.2)
-set(SQLLITE_LONG_VERSION 3370200)
+set(SQLITE_VERSION 3.39.4)
+set(SQLLITE_LONG_VERSION 3390400)
 set(SQLITE_URI https://www.sqlite.org/2022/sqlite-autoconf-${SQLLITE_LONG_VERSION}.tar.gz)
-set(SQLITE_HASH e56faacadfb4154f8fbd0f2a3f827d13706b70a1)
+set(SQLITE_HASH c4c5c39269d1b9bb1487cff580c1f583608229b2)
 set(SQLITE_HASH_TYPE SHA1)
 set(SQLITE_FILE sqlite-autoconf-${SQLLITE_LONG_VERSION}.tar.gz)
 set(SQLITE_CPE "cpe:2.3:a:sqlite:sqlite:${SQLITE_VERSION}:*:*:*:*:*:*:*")
--- a/build_files/build_environment/darwin/set_rpath.py
+++ b/build_files/build_environment/darwin/set_rpath.py
@@ -1,9 +1,19 @@
 #!/usr/bin/env python3
 # macOS utility to remove all rpaths and add a new one.

+import os
+import re
 import subprocess
 import sys

+# Strip version numbers from dependenciesm macOS notarizatiom fails
+# with version symlinks.
+def strip_lib_version(name):
+    name = re.sub(r'(\.[0-9]+)+.dylib', '.dylib', name)
+    name = re.sub(r'(\.[0-9]+)+.so', '.so', name)
+    name = re.sub(r'(\.[0-9]+)+.cpython', '.cpython', name)
+    return name
+
 rpath = sys.argv[1]
 file = sys.argv[2]

@@ -17,3 +27,18 @@ for i, token in enumerate(tokens):
        subprocess.run(['install_name_tool', '-delete_rpath', old_rpath, file])

 subprocess.run(['install_name_tool', '-add_rpath', rpath, file])
+
+# Strip version from dependencies.
+p = subprocess.run(['otool', '-L', file], capture_output=True)
+tokens = p.stdout.split()
+for i, token in enumerate(tokens):
+    token = token.decode("utf-8")
+    if token.startswith("@rpath"):
+        new_token = strip_lib_version(token)
+        subprocess.run(['install_name_tool', '-change', token, new_token, file])
+
+# Strip version from library itself.
+new_file = strip_lib_version(file)
+new_id = '@rpath/' + os.path.basename(new_file)
+os.rename(file, new_file)
+subprocess.run(['install_name_tool', '-id', new_id, new_file])
--- a/build_files/build_environment/patches/boost.diff
+++ b/build_files/build_environment/patches/boost.diff
@@ -0,0 +1,12 @@
+--- a/boost/python//detail/wrap_python.hpp	2022-12-09 19:16:17
+++ b/boost/python//detail/wrap_python.hpp	2022-12-09 19:18:08
+@@ -206,7 +206,8 @@
+ 
+ #ifdef DEBUG_UNDEFINED_FROM_WRAP_PYTHON_H
+ # undef DEBUG_UNDEFINED_FROM_WRAP_PYTHON_H
+-# define _DEBUG
+// BLENDER: TBB excepts this to have a value.
+# define _DEBUG 1
+ # ifdef _CRT_NOFORCE_MANIFEST_DEFINED_FROM_WRAP_PYTHON_H
+ #  undef _CRT_NOFORCE_MANIFEST_DEFINED_FROM_WRAP_PYTHON_H
+ #  undef _CRT_NOFORCE_MANIFEST
--- a/build_files/build_environment/patches/python_unix.diff
+++ b/build_files/build_environment/patches/python_unix.diff
@@ -36,3 +36,39 @@ index a97a755..07ce853 100644
                 if (self.compiler.find_library_file(self.lib_dirs, lib_name)):
                     ffi_lib = lib_name
                     break
+--- a/Modules/posixmodule.c	2022-12-09 21:44:03
+++ b/Modules/posixmodule.c	2022-12-09 21:39:46
+@@ -10564,10 +10564,15 @@
+         Py_BEGIN_ALLOW_THREADS
+ #ifdef HAVE_MKFIFOAT
+         if (dir_fd != DEFAULT_DIR_FD) {
+// BLENDER: disable also at compile time for compatibility when linking with older Xcode.
+// https://github.com/python/cpython/issues/97897
+#ifndef __APPLE__
+             if (HAVE_MKFIFOAT_RUNTIME) {
+                 result = mkfifoat(dir_fd, path->narrow, mode);
+ 
+            } else
+#endif
+            {
+-            } else {
+                 mkfifoat_unavailable = 1;
+                 result = 0;
+             }
+@@ -10638,10 +10633,15 @@
+         Py_BEGIN_ALLOW_THREADS
+ #ifdef HAVE_MKNODAT
+         if (dir_fd != DEFAULT_DIR_FD) {
+// BLENDER: disable also at compile time for compatibility when linking with older Xcode.
+// https://github.com/python/cpython/issues/97897
+#ifndef __APPLE__
+             if (HAVE_MKNODAT_RUNTIME) {
+                 result = mknodat(dir_fd, path->narrow, mode, device);
+ 
+            } else
+#endif
+            {
+-            } else {
+                 mknodat_unavailable = 1;
+                 result = 0;
+             }
--- a/build_files/build_environment/patches/sdl.diff
+++ b/build_files/build_environment/patches/sdl.diff
@@ -30,3 +30,19 @@ diff -ru ./src/video/SDL_video.c ./src/video/SDL_video.c
     if (SDL_strcmp(_this->name, "cocoa") == 0) {  /* don't do this for X11, etc */
         if (Cocoa_IsWindowInFullscreenSpace(window)) {
             return SDL_FALSE;
+--- CMakeLists.txt	2022-12-09 20:40:00
+++ CMakeLists.txt	2022-12-09 20:40:00
+@@ -526,6 +526,13 @@
+     list(APPEND EXTRA_CFLAGS "-fno-strict-aliasing")
+   endif()
+ 
+  # BLENDER: make libs compatible with older Xcode.
+  # https://github.com/KhronosGroup/MoltenVK/issues/1756
+  check_c_compiler_flag(-fno-objc-msgsend-selector-stubs HAVE_GCC_NO_OBJC_MSGSEND_SELECTOR_STUBS)
+  if(HAVE_GCC_NO_OBJC_MSGSEND_SELECTOR_STUBS)
+    list(APPEND EXTRA_CFLAGS "-fno-objc-msgsend-selector-stubs")
+  endif()
+
+   check_c_compiler_flag(-Wdeclaration-after-statement HAVE_GCC_WDECLARATION_AFTER_STATEMENT)
+   if(HAVE_GCC_WDECLARATION_AFTER_STATEMENT)
+     check_c_compiler_flag(-Werror=declaration-after-statement HAVE_GCC_WERROR_DECLARATION_AFTER_STATEMENT)
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@@ -105,9 +105,10 @@ if(WITH_VULKAN_BACKEND)
    set(VULKAN_ROOT_DIR ${LIBDIR}/vulkan/macOS)
    set(VULKAN_INCLUDE_DIR ${VULKAN_ROOT_DIR}/include)
    set(VULKAN_LIBRARY ${VULKAN_ROOT_DIR}/lib/libvulkan.1.dylib)
+    set(SHADERC_LIBRARY ${VULKAN_ROOT_DIR}/lib/libshaderc_combined.a)

    set(VULKAN_INCLUDE_DIRS ${VULKAN_INCLUDE_DIR} ${MOLTENVK_INCLUDE_DIRS})
-    set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${MOLTENVK_LIBRARIES})
+    set(VULKAN_LIBRARIES ${VULKAN_LIBRARY} ${SHADERC_LIBRARY} ${MOLTENVK_LIBRARIES})
  else()
    message(WARNING "Vulkan SDK was not found, disabling WITH_VULKAN_BACKEND")
    set(WITH_VULKAN_BACKEND OFF)
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1206,7 +1206,7 @@ class CyclesWorldSettings(bpy.types.PropertyGroup):
    )
    homogeneous_volume: BoolProperty(
        name="Homogeneous Volume",
-        description="When using volume rendering, assume volume has the same density everywhere"
+        description="When using volume rendering, assume volume has the same density everywhere "
        "(not using any textures), for faster rendering",
        default=False,
    )
--- a/intern/cycles/blender/addon/ui.py
+++ b/intern/cycles/blender/addon/ui.py
@@ -193,7 +193,7 @@ class CYCLES_RENDER_PT_sampling_viewport(CyclesButtonsPanel, Panel):

        if cscene.use_preview_adaptive_sampling:
            col = layout.column(align=True)
-            col.prop(cscene, "preview_samples", text=" Max Samples")
+            col.prop(cscene, "preview_samples", text="Max Samples")
            col.prop(cscene, "preview_adaptive_min_samples", text="Min Samples")
        else:
            layout.prop(cscene, "preview_samples", text="Samples")
@@ -255,7 +255,7 @@ class CYCLES_RENDER_PT_sampling_render(CyclesButtonsPanel, Panel):

        col = layout.column(align=True)
        if cscene.use_adaptive_sampling:
-            col.prop(cscene, "samples", text=" Max Samples")
+            col.prop(cscene, "samples", text="Max Samples")
            col.prop(cscene, "adaptive_min_samples", text="Min Samples")
        else:
            col.prop(cscene, "samples", text="Samples")
--- a/intern/opencolorio/ocio_impl_glsl.cc
+++ b/intern/opencolorio/ocio_impl_glsl.cc
@@ -337,10 +337,12 @@ static bool addGPULut1D2D(OCIO_GPUTextures &textures,
   * It depends on more than height. So check instead by looking at the source. */
  std::string sampler1D_name = std::string("sampler1D ") + sampler_name;
  if (strstr(shader_desc->getShaderText(), sampler1D_name.c_str()) != nullptr) {
-    lut.texture = GPU_texture_create_1d(texture_name, width, 1, format, values);
+    lut.texture = GPU_texture_create_1d_ex(
+        texture_name, width, 1, format, GPU_TEXTURE_USAGE_SHADER_READ, values);
  }
  else {
-    lut.texture = GPU_texture_create_2d(texture_name, width, height, 1, format, values);
+    lut.texture = GPU_texture_create_2d_ex(
+        texture_name, width, height, 1, format, GPU_TEXTURE_USAGE_SHADER_READ, values);
  }
  if (lut.texture == nullptr) {
    return false;
@@ -372,8 +374,15 @@ static bool addGPULut3D(OCIO_GPUTextures &textures,
  }

  OCIO_GPULutTexture lut;
-  lut.texture = GPU_texture_create_3d(
-      texture_name, edgelen, edgelen, edgelen, 1, GPU_RGB16F, GPU_DATA_FLOAT, values);
+  lut.texture = GPU_texture_create_3d_ex(texture_name,
+                                         edgelen,
+                                         edgelen,
+                                         edgelen,
+                                         1,
+                                         GPU_RGB16F,
+                                         GPU_DATA_FLOAT,
+                                         GPU_TEXTURE_USAGE_SHADER_READ,
+                                         values);
  if (lut.texture == nullptr) {
    return false;
  }
@@ -442,7 +451,8 @@ static bool createGPUCurveMapping(OCIO_GPUCurveMappping &curvemap,
  if (curve_mapping_settings) {
    int lut_size = curve_mapping_settings->lut_size;

-    curvemap.texture = GPU_texture_create_1d("OCIOCurveMap", lut_size, 1, GPU_RGBA16F, nullptr);
+    curvemap.texture = GPU_texture_create_1d_ex(
+        "OCIOCurveMap", lut_size, 1, GPU_RGBA16F, GPU_TEXTURE_USAGE_SHADER_READ, nullptr);
    GPU_texture_filter_mode(curvemap.texture, false);
    GPU_texture_wrap_mode(curvemap.texture, false, true);

--- a/release/datafiles/colormanagement/config.ocio
+++ b/release/datafiles/colormanagement/config.ocio
@@ -171,7 +171,7 @@ colorspaces:
    name: Non-Color
    family: raw
    description: |
-        Color space used for images which contains non-color data (i.e. normal maps)
+        Color space used for images which contain non-color data (e.g. normal maps)
    equalitygroup:
    bitdepth: 32f
    isdata: true
--- a/release/scripts/startup/bl_operators/presets.py
+++ b/release/scripts/startup/bl_operators/presets.py
@@ -214,7 +214,7 @@ class AddPresetBase:


 class ExecutePreset(Operator):
-    """Execute a preset"""
+    """Load a preset"""
    bl_idname = "script.execute_preset"
    bl_label = "Execute a Python Preset"

--- a/release/scripts/startup/bl_operators/uvcalc_lightmap.py
+++ b/release/scripts/startup/bl_operators/uvcalc_lightmap.py
@@ -228,8 +228,8 @@ def lightmap_uvpack(
    """
    BOX_DIV if the maximum division of the UV map that
    a box may be consolidated into.
-    Basically, a lower value will be slower but waist less space
-    and a higher value will have more clumpy boxes but more wasted space
+    A lower value will create more clumpy boxes and more wasted space,
+    and a higher value will be slower but waste less space
    """
    import time
    from math import sqrt
@@ -623,7 +623,10 @@ class LightMapPack(Operator):
    # UV Packing...
    PREF_BOX_DIV: IntProperty(
        name="Pack Quality",
-        description="Pre-packing before the complex boxpack",
+        description=(
+            "Quality of the packing. "
+            "Higher values will be slower but waste less space"
+        ),
        min=1, max=48,
        default=12,
    )
--- a/release/scripts/startup/bl_operators/wm.py
+++ b/release/scripts/startup/bl_operators/wm.py
@@ -2084,7 +2084,7 @@ class WM_OT_operator_cheat_sheet(Operator):
 # Add-on Operators

 class WM_OT_owner_enable(Operator):
-    """Enable workspace owner ID"""
+    """Enable add-on for workspace"""
    bl_idname = "wm.owner_enable"
    bl_label = "Enable Add-on"

@@ -2099,9 +2099,9 @@ class WM_OT_owner_enable(Operator):


 class WM_OT_owner_disable(Operator):
-    """Enable workspace owner ID"""
+    """Disable add-on for workspace"""
    bl_idname = "wm.owner_disable"
-    bl_label = "Disable UI Tag"
+    bl_label = "Disable Add-on"

    owner_id: StringProperty(
        name="UI Tag",
--- a/release/scripts/startup/bl_ui/node_add_menu_geometry.py
+++ b/release/scripts/startup/bl_ui/node_add_menu_geometry.py
@@ -140,6 +140,7 @@ class NODE_MT_geometry_node_GEO_INPUT(Menu):
        node_add_menu.add_node_type(layout, "FunctionNodeInputBool")
        node_add_menu.add_node_type(layout, "GeometryNodeCollectionInfo")
        node_add_menu.add_node_type(layout, "FunctionNodeInputColor")
+        node_add_menu.add_node_type(layout, "GeometryNodeInputImage")
        node_add_menu.add_node_type(layout, "GeometryNodeImageInfo")
        node_add_menu.add_node_type(layout, "FunctionNodeInputInt")
        node_add_menu.add_node_type(layout, "GeometryNodeIsViewport")
--- a/release/scripts/startup/bl_ui/properties_physics_fluid.py
+++ b/release/scripts/startup/bl_ui/properties_physics_fluid.py
@@ -428,7 +428,7 @@ class PHYSICS_PT_fire(PhysicButtonsPanel, Panel):
        col.prop(domain, "flame_max_temp", text="Temperature Maximum")
        col.prop(domain, "flame_ignition", text="Minimum")
        row = col.row()
-        row.prop(domain, "flame_smoke_color", text="Flame Color")
+        row.prop(domain, "flame_smoke_color", text="Smoke Color")


 class PHYSICS_PT_liquid(PhysicButtonsPanel, Panel):
--- a/release/scripts/startup/bl_ui/properties_physics_softbody.py
+++ b/release/scripts/startup/bl_ui/properties_physics_softbody.py
@@ -214,8 +214,12 @@ class PHYSICS_PT_softbody_edge(PhysicButtonsPanel, Panel):

        col = flow.column()
        col.prop(softbody, "spring_length", text="Length")
-        col.prop(softbody, "use_edge_collision", text="Collision Edge")
-        col.prop(softbody, "use_face_collision", text="Face")
+
+        col.separator()
+
+        col = flow.column(align=True, heading="Collision")
+        col.prop(softbody, "use_edge_collision", text="Edge", toggle=False)
+        col.prop(softbody, "use_face_collision", text="Face", toggle=False)


 class PHYSICS_PT_softbody_edge_aerodynamics(PhysicButtonsPanel, Panel):
--- a/release/scripts/startup/bl_ui/space_sequencer.py
+++ b/release/scripts/startup/bl_ui/space_sequencer.py
@@ -2196,7 +2196,7 @@ class SEQUENCER_PT_cache_settings(SequencerButtonsPanel, Panel):
        col = layout.column(heading="Cache", align=True)

        col.prop(ed, "use_cache_raw", text="Raw")
-        col.prop(ed, "use_cache_preprocessed", text="Pre-Processed")
+        col.prop(ed, "use_cache_preprocessed", text="Preprocessed")
        col.prop(ed, "use_cache_composite", text="Composite")
        col.prop(ed, "use_cache_final", text="Final")

@@ -2315,7 +2315,7 @@ class SEQUENCER_PT_strip_cache(SequencerButtonsPanel, Panel):

        col = layout.column(heading="Cache")
        col.prop(strip, "use_cache_raw", text="Raw")
-        col.prop(strip, "use_cache_preprocessed", text="Pre-Processed")
+        col.prop(strip, "use_cache_preprocessed", text="Preprocessed")
        col.prop(strip, "use_cache_composite", text="Composite")


--- a/source/blender/blenfont/intern/blf_glyph.c
+++ b/source/blender/blenfont/intern/blf_glyph.c
@@ -1199,7 +1199,8 @@ void blf_glyph_draw(FontBLF *font, GlyphCacheBLF *gc, GlyphBLF *g, const int x,
      if (gc->texture) {
        GPU_texture_free(gc->texture);
      }
-      gc->texture = GPU_texture_create_2d(__func__, w, h, 1, GPU_R8, NULL);
+      gc->texture = GPU_texture_create_2d_ex(
+          __func__, w, h, 1, GPU_R8, GPU_TEXTURE_USAGE_SHADER_READ, NULL);

      gc->bitmap_len_landed = 0;
    }
--- a/source/blender/blenkernel/BKE_mesh.h
+++ b/source/blender/blenkernel/BKE_mesh.h
@@ -496,9 +496,7 @@ void BKE_mesh_ensure_normals_for_display(struct Mesh *mesh);
 * Used when defining an empty custom loop normals data layer,
 * to keep same shading as with auto-smooth!
 */
-void BKE_edges_sharp_from_angle_set(const struct MVert *mverts,
-                                    int numVerts,
-                                    struct MEdge *medges,
+void BKE_edges_sharp_from_angle_set(struct MEdge *medges,
                                    int numEdges,
                                    const struct MLoop *mloops,
                                    int numLoops,
--- a/source/blender/blenkernel/BKE_mesh_mapping.h
+++ b/source/blender/blenkernel/BKE_mesh_mapping.h
@@ -356,12 +356,12 @@ Array<Vector<int>> build_vert_to_loop_map(Span<MLoop> loops, int verts_num);
 Array<Vector<int>> build_edge_to_loop_map(Span<MLoop> loops, int edges_num);
 Vector<Vector<int>> build_edge_to_loop_map_resizable(Span<MLoop> loops, int edges_num);

-inline int previous_poly_loop(const MPoly &poly, int loop_i)
+inline int poly_loop_prev(const MPoly &poly, int loop_i)
 {
  return loop_i - 1 + (loop_i == poly.loopstart) * poly.totloop;
 }

-inline int next_poly_loop(const MPoly &poly, int loop_i)
+inline int poly_loop_next(const MPoly &poly, int loop_i)
 {
  if (loop_i == poly.loopstart + poly.totloop - 1) {
    return poly.loopstart;
--- a/source/blender/blenkernel/BKE_node.h
+++ b/source/blender/blenkernel/BKE_node.h
@@ -668,6 +668,10 @@ void nodeUnlinkNode(struct bNodeTree *ntree, struct bNode *node);
 * Find the first available, non-duplicate name for a given node.
 */
 void nodeUniqueName(struct bNodeTree *ntree, struct bNode *node);
+/**
+ * Create a new unique integer identifier for the node. Also set the node's
+ * index in the tree, which is an eagerly maintained cache.
+ */
 void nodeUniqueID(struct bNodeTree *ntree, struct bNode *node);

 /**
@@ -1542,6 +1546,7 @@ struct TexResult;
 #define GEO_NODE_SET_CURVE_NORMAL 1188
 #define GEO_NODE_IMAGE_INFO 1189
 #define GEO_NODE_BLUR_ATTRIBUTE 1190
+#define GEO_NODE_IMAGE 1191

 /** \} */

--- a/source/blender/blenkernel/BKE_node_runtime.hh
+++ b/source/blender/blenkernel/BKE_node_runtime.hh
@@ -251,12 +251,14 @@ class bNodeRuntime : NonCopyable, NonMovable {
  /** List of cached internal links (input to output), for muted nodes and operators. */
  Vector<bNodeLink *> internal_links;

+  /** Eagerly maintained cache of the node's index in the tree. */
+  int index_in_tree = -1;
+
  /** Only valid if #topology_cache_is_dirty is false. */
  Vector<bNodeSocket *> inputs;
  Vector<bNodeSocket *> outputs;
  Map<StringRefNull, bNodeSocket *> inputs_by_identifier;
  Map<StringRefNull, bNodeSocket *> outputs_by_identifier;
-  int index_in_tree = -1;
  bool has_available_linked_inputs = false;
  bool has_available_linked_outputs = false;
  Vector<bNode *> direct_children_in_frame;
@@ -320,6 +322,10 @@ inline bool topology_cache_is_available(const bNodeSocket &socket)

 }  // namespace node_tree_runtime

+namespace node_field_inferencing {
+bool update_field_inferencing(const bNodeTree &tree);
+}
+
 }  // namespace blender::bke

 /* -------------------------------------------------------------------- */
@@ -463,6 +469,15 @@ inline blender::Span<bNode *> bNodeTree::root_frames() const
 /** \name #bNode Inline Methods
 * \{ */

+inline int bNode::index() const
+{
+  const int index = this->runtime->index_in_tree;
+  /* The order of nodes should always be consistent with the `nodes_by_id` vector. */
+  BLI_assert(index ==
+             this->runtime->owner_tree->runtime->nodes_by_id.index_of_as(this->identifier));
+  return index;
+}
+
 inline blender::Span<bNodeSocket *> bNode::input_sockets()
 {
  BLI_assert(blender::bke::node_tree_runtime::topology_cache_is_available(*this));
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@@ -229,6 +229,7 @@ set(SRC
  intern/nla.c
  intern/node.cc
  intern/node_runtime.cc
+  intern/node_tree_field_inferencing.cc
  intern/node_tree_update.cc
  intern/object.cc
  intern/object_deform.c
--- a/source/blender/blenkernel/intern/blendfile_link_append.c
+++ b/source/blender/blenkernel/intern/blendfile_link_append.c
@@ -1001,7 +1001,7 @@ static void blendfile_link_append_proxies_convert(Main *bmain, ReportList *repor
        RPT_WARNING,
        "Proxies have been removed from Blender (%d proxies were automatically converted "
        "to library overrides, %d proxies could not be converted and were cleared). "
-        "Please consider re-saving any library .blend file with the newest Blender version",
+        "Consider re-saving any library .blend file with the newest Blender version",
        bf_reports.count.proxies_to_lib_overrides_success,
        bf_reports.count.proxies_to_lib_overrides_failures);
  }
--- a/source/blender/blenkernel/intern/crazyspace.cc
+++ b/source/blender/blenkernel/intern/crazyspace.cc
@@ -571,7 +571,7 @@ void BKE_crazyspace_api_displacement_to_original(struct Object *object,
  if (vertex_index < 0 || vertex_index >= object->runtime.crazyspace_verts_num) {
    BKE_reportf(reports,
                RPT_ERROR,
-                "Invalid vertex index %d (expected to be within 0 to %d range))",
+                "Invalid vertex index %d (expected to be within 0 to %d range)",
                vertex_index,
                object->runtime.crazyspace_verts_num);
    return;
--- a/source/blender/blenkernel/intern/curve_poly.cc
+++ b/source/blender/blenkernel/intern/curve_poly.cc
@@ -6,7 +6,7 @@

 #include <algorithm>

-#include "BLI_math_rotation.hh"
+#include "BLI_math_rotation_legacy.hh"
 #include "BLI_math_vector.hh"

 #include "BKE_curves.hh"
--- a/source/blender/blenkernel/intern/curves_geometry.cc
+++ b/source/blender/blenkernel/intern/curves_geometry.cc
@@ -13,7 +13,7 @@
 #include "BLI_bounds.hh"
 #include "BLI_index_mask_ops.hh"
 #include "BLI_length_parameterize.hh"
-#include "BLI_math_rotation.hh"
+#include "BLI_math_rotation_legacy.hh"
 #include "BLI_task.hh"

 #include "DNA_curves_types.h"
@@ -519,7 +519,7 @@ void CurvesGeometry::ensure_evaluated_offsets() const
      this->runtime->bezier_evaluated_offsets.resize(this->points_num());
    }
    else {
-      this->runtime->bezier_evaluated_offsets.clear_and_make_inline();
+      this->runtime->bezier_evaluated_offsets.clear_and_shrink();
    }

    calculate_evaluated_offsets(
@@ -605,7 +605,7 @@ Span<float3> CurvesGeometry::evaluated_positions() const
  this->runtime->position_cache_mutex.ensure([&]() {
    if (this->is_single_type(CURVE_TYPE_POLY)) {
      this->runtime->evaluated_positions_span = this->positions();
-      this->runtime->evaluated_position_cache.clear_and_make_inline();
+      this->runtime->evaluated_position_cache.clear_and_shrink();
      return;
    }

--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@@ -618,7 +618,7 @@ void adapt_mesh_domain_edge_to_corner_impl(const Mesh &mesh,

    /* For every corner, mix the values from the adjacent edges on the face. */
    for (const int loop_index : IndexRange(poly.loopstart, poly.totloop)) {
-      const int loop_index_prev = mesh_topology::previous_poly_loop(poly, loop_index);
+      const int loop_index_prev = mesh_topology::poly_loop_prev(poly, loop_index);
      const MLoop &loop = loops[loop_index];
      const MLoop &loop_prev = loops[loop_index_prev];
      mixer.mix_in(loop_index, old_values[loop.e]);
@@ -645,7 +645,7 @@ void adapt_mesh_domain_edge_to_corner_impl(const Mesh &mesh,
    for (const int poly_index : range) {
      const MPoly &poly = polys[poly_index];
      for (const int loop_index : IndexRange(poly.loopstart, poly.totloop)) {
-        const int loop_index_prev = mesh_topology::previous_poly_loop(poly, loop_index);
+        const int loop_index_prev = mesh_topology::poly_loop_prev(poly, loop_index);
        const MLoop &loop = loops[loop_index];
        const MLoop &loop_prev = loops[loop_index_prev];
        if (old_values[loop.e] && old_values[loop_prev.e]) {
--- a/source/blender/blenkernel/intern/image_gpu.cc
+++ b/source/blender/blenkernel/intern/image_gpu.cc
@@ -111,7 +111,8 @@ static GPUTexture *gpu_texture_create_tile_mapping(Image *ima, const int multivi
    tile_info[3] = tile_runtime->tilearray_size[1] / array_h;
  }

-  GPUTexture *tex = GPU_texture_create_1d_array(ima->id.name + 2, width, 2, 1, GPU_RGBA32F, data);
+  GPUTexture *tex = GPU_texture_create_1d_array_ex(
+      ima->id.name + 2, width, 2, 1, GPU_RGBA32F, GPU_TEXTURE_USAGE_SHADER_READ, data);
  GPU_texture_mipmap_mode(tex, false, false);

  MEM_freeN(data);
--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@@ -27,6 +27,7 @@
 #include "BLI_stack.h"
 #include "BLI_task.h"
 #include "BLI_task.hh"
+#include "BLI_timeit.hh"
 #include "BLI_utildefines.h"

 #include "BKE_customdata.h"
@@ -39,6 +40,7 @@

 using blender::BitVector;
 using blender::float3;
+using blender::int2;
 using blender::MutableSpan;
 using blender::short2;
 using blender::Span;
@@ -791,24 +793,20 @@ void BKE_lnor_space_custom_normal_to_data(const MLoopNorSpace *lnor_space,
 #define LOOP_SPLIT_TASK_BLOCK_SIZE 1024

 struct LoopSplitTaskData {
-  /* Specific to each instance (each task). */
+  enum class Type : int8_t {
+    BlockEnd = 0, /* Set implicitly by calloc. */
+    Fan = 1,
+    Single = 2,
+  };

  /** We have to create those outside of tasks, since #MemArena is not thread-safe. */
  MLoopNorSpace *lnor_space;
-  float3 *lnor;
-  const MLoop *ml_curr;
-  const MLoop *ml_prev;
  int ml_curr_index;
-  int ml_prev_index;
  /** Also used a flag to switch between single or fan process! */
-  const int *e2l_prev;
+  int ml_prev_index;
  int mp_index;

-  /** This one is special, it's owned and managed by worker tasks,
-   * avoid to have to create it for each fan! */
-  BLI_Stack *edge_vectors;
-
-  char pad_c;
+  Type flag;
 };

 struct LoopSplitTaskDataCommon {
@@ -821,10 +819,10 @@ struct LoopSplitTaskDataCommon {

  /* Read-only. */
  Span<MVert> verts;
-  MutableSpan<MEdge> edges;
+  Span<MEdge> edges;
  Span<MLoop> loops;
  Span<MPoly> polys;
-  int (*edge_to_loops)[2];
+  Span<int2> edge_to_loops;
  Span<int> loop_to_poly;
  Span<float3> polynors;
  Span<float3> vert_normals;
@@ -835,76 +833,57 @@ struct LoopSplitTaskDataCommon {
 /* See comment about edge_to_loops below. */
 #define IS_EDGE_SHARP(_e2l) ELEM((_e2l)[1], INDEX_UNSET, INDEX_INVALID)

-static void mesh_edges_sharp_tag(LoopSplitTaskDataCommon *data,
+static void mesh_edges_sharp_tag(const Span<MEdge> edges,
+                                 const Span<MPoly> polys,
+                                 const Span<MLoop> loops,
+                                 const Span<int> loop_to_poly_map,
+                                 const Span<float3> poly_normals,
                                 const bool check_angle,
                                 const float split_angle,
-                                 const bool do_sharp_edges_tag)
+                                 MutableSpan<int2> edge_to_loops,
+                                 BitVector<> *r_sharp_edges)
 {
-  MutableSpan<MEdge> edges = data->edges;
-  const Span<MPoly> polys = data->polys;
-  const Span<MLoop> loops = data->loops;
-  const Span<int> loop_to_poly = data->loop_to_poly;
-
-  MutableSpan<float3> loopnors = data->loopnors; /* NOTE: loopnors may be empty here. */
-  const Span<float3> polynors = data->polynors;
-
-  int(*edge_to_loops)[2] = data->edge_to_loops;
-
-  BitVector sharp_edges;
-  if (do_sharp_edges_tag) {
-    sharp_edges.resize(edges.size(), false);
-  }
-
+  using namespace blender;
  const float split_angle_cos = check_angle ? cosf(split_angle) : -1.0f;

-  for (const int mp_index : polys.index_range()) {
-    const MPoly &poly = polys[mp_index];
-    int *e2l;
-    int ml_curr_index = poly.loopstart;
-    const int ml_last_index = (ml_curr_index + poly.totloop) - 1;
+  for (const int poly_i : polys.index_range()) {
+    const MPoly &poly = polys[poly_i];
+    for (const int loop_index : IndexRange(poly.loopstart, poly.totloop)) {
+      const int vert_i = loops[loop_index].v;
+      const int edge_i = loops[loop_index].e;

-    const MLoop *ml_curr = &loops[ml_curr_index];
-
-    for (; ml_curr_index <= ml_last_index; ml_curr++, ml_curr_index++) {
-      e2l = edge_to_loops[ml_curr->e];
-
-      /* Pre-populate all loop normals as if their verts were all-smooth,
-       * this way we don't have to compute those later!
-       */
-      if (!loopnors.is_empty()) {
-        copy_v3_v3(loopnors[ml_curr_index], data->vert_normals[ml_curr->v]);
-      }
+      int2 &e2l = edge_to_loops[edge_i];

      /* Check whether current edge might be smooth or sharp */
      if ((e2l[0] | e2l[1]) == 0) {
        /* 'Empty' edge until now, set e2l[0] (and e2l[1] to INDEX_UNSET to tag it as unset). */
-        e2l[0] = ml_curr_index;
+        e2l[0] = loop_index;
        /* We have to check this here too, else we might miss some flat faces!!! */
        e2l[1] = (poly.flag & ME_SMOOTH) ? INDEX_UNSET : INDEX_INVALID;
      }
      else if (e2l[1] == INDEX_UNSET) {
        const bool is_angle_sharp = (check_angle &&
-                                     dot_v3v3(polynors[loop_to_poly[e2l[0]]], polynors[mp_index]) <
-                                         split_angle_cos);
+                                     dot_v3v3(poly_normals[loop_to_poly_map[e2l[0]]],
+                                              poly_normals[poly_i]) < split_angle_cos);

        /* Second loop using this edge, time to test its sharpness.
         * An edge is sharp if it is tagged as such, or its face is not smooth,
         * or both poly have opposed (flipped) normals, i.e. both loops on the same edge share the
         * same vertex, or angle between both its polys' normals is above split_angle value.
         */
-        if (!(poly.flag & ME_SMOOTH) || (edges[ml_curr->e].flag & ME_SHARP) ||
-            ml_curr->v == loops[e2l[0]].v || is_angle_sharp) {
+        if (!(poly.flag & ME_SMOOTH) || (edges[edge_i].flag & ME_SHARP) ||
+            vert_i == loops[e2l[0]].v || is_angle_sharp) {
          /* NOTE: we are sure that loop != 0 here ;). */
          e2l[1] = INDEX_INVALID;

          /* We want to avoid tagging edges as sharp when it is already defined as such by
           * other causes than angle threshold. */
-          if (do_sharp_edges_tag && is_angle_sharp) {
-            sharp_edges[ml_curr->e].set();
+          if (r_sharp_edges && is_angle_sharp) {
+            (*r_sharp_edges)[edge_i].set();
          }
        }
        else {
-          e2l[1] = ml_curr_index;
+          e2l[1] = loop_index;
        }
      }
      else if (!IS_EDGE_SHARP(e2l)) {
@@ -913,27 +892,16 @@ static void mesh_edges_sharp_tag(LoopSplitTaskDataCommon *data,

        /* We want to avoid tagging edges as sharp when it is already defined as such by
         * other causes than angle threshold. */
-        if (do_sharp_edges_tag) {
-          sharp_edges[ml_curr->e].reset();
+        if (r_sharp_edges) {
+          (*r_sharp_edges)[edge_i].reset();
        }
      }
      /* Else, edge is already 'disqualified' (i.e. sharp)! */
    }
  }
-
-  /* If requested, do actual tagging of edges as sharp in another loop. */
-  if (do_sharp_edges_tag) {
-    for (const int i : edges.index_range()) {
-      if (sharp_edges[i]) {
-        edges[i].flag |= ME_SHARP;
-      }
-    }
-  }
 }

-void BKE_edges_sharp_from_angle_set(const MVert *mverts,
-                                    const int numVerts,
-                                    MEdge *medges,
+void BKE_edges_sharp_from_angle_set(MEdge *medges,
                                    const int numEdges,
                                    const MLoop *mloops,
                                    const int numLoops,
@@ -950,25 +918,30 @@ void BKE_edges_sharp_from_angle_set(const MVert *mverts,
  }

  /* Mapping edge -> loops. See #BKE_mesh_normals_loop_split for details. */
-  int(*edge_to_loops)[2] = (int(*)[2])MEM_calloc_arrayN(
-      size_t(numEdges), sizeof(*edge_to_loops), __func__);
+  Array<int2> edge_to_loops(numEdges, int2(0));

  /* Simple mapping from a loop to its polygon index. */
  const Array<int> loop_to_poly = mesh_topology::build_loop_to_poly_map({mpolys, numPolys},
                                                                        numLoops);

-  LoopSplitTaskDataCommon common_data = {};
-  common_data.verts = {mverts, numVerts};
-  common_data.edges = {medges, numEdges};
-  common_data.polys = {mpolys, numPolys};
-  common_data.loops = {mloops, numLoops};
-  common_data.edge_to_loops = edge_to_loops;
-  common_data.loop_to_poly = loop_to_poly;
-  common_data.polynors = {reinterpret_cast<const float3 *>(polynors), numPolys};
+  BitVector<> sharp_edges(numEdges, false);
+  mesh_edges_sharp_tag({medges, numEdges},
+                       {mpolys, numPolys},
+                       {mloops, numLoops},
+                       loop_to_poly,
+                       {reinterpret_cast<const float3 *>(polynors), numPolys},
+                       true,
+                       split_angle,
+                       edge_to_loops,
+                       &sharp_edges);

-  mesh_edges_sharp_tag(&common_data, true, split_angle, true);
-
-  MEM_freeN(edge_to_loops);
+  threading::parallel_for(IndexRange(numEdges), 4096, [&](const IndexRange range) {
+    for (const int edge_i : range) {
+      if (sharp_edges[edge_i]) {
+        medges[edge_i].flag |= ME_SHARP;
+      }
+    }
+  });
 }

 static void loop_manifold_fan_around_vert_next(const Span<MLoop> loops,
@@ -976,11 +949,13 @@ static void loop_manifold_fan_around_vert_next(const Span<MLoop> loops,
                                               const Span<int> loop_to_poly,
                                               const int *e2lfan_curr,
                                               const uint mv_pivot_index,
-                                               const MLoop **r_mlfan_curr,
                                               int *r_mlfan_curr_index,
                                               int *r_mlfan_vert_index,
                                               int *r_mpfan_curr_index)
 {
+  const int mlfan_curr_orig = *r_mlfan_curr_index;
+  const uint vert_fan_orig = loops[mlfan_curr_orig].v;
+
  /* WARNING: This is rather complex!
   * We have to find our next edge around the vertex (fan mode).
   * First we find the next loop, which is either previous or next to mlfan_curr_index, depending
@@ -994,10 +969,10 @@ static void loop_manifold_fan_around_vert_next(const Span<MLoop> loops,
  BLI_assert(*r_mlfan_curr_index >= 0);
  BLI_assert(*r_mpfan_curr_index >= 0);

-  const MLoop &mlfan_next = loops[*r_mlfan_curr_index];
+  const uint vert_fan_next = loops[*r_mlfan_curr_index].v;
  const MPoly &mpfan_next = polys[*r_mpfan_curr_index];
-  if (((*r_mlfan_curr)->v == mlfan_next.v && (*r_mlfan_curr)->v == mv_pivot_index) ||
-      ((*r_mlfan_curr)->v != mlfan_next.v && (*r_mlfan_curr)->v != mv_pivot_index)) {
+  if ((vert_fan_orig == vert_fan_next && vert_fan_orig == mv_pivot_index) ||
+      (vert_fan_orig != vert_fan_next && vert_fan_orig != mv_pivot_index)) {
    /* We need the previous loop, but current one is our vertex's loop. */
    *r_mlfan_vert_index = *r_mlfan_curr_index;
    if (--(*r_mlfan_curr_index) < mpfan_next.loopstart) {
@@ -1011,8 +986,6 @@ static void loop_manifold_fan_around_vert_next(const Span<MLoop> loops,
    }
    *r_mlfan_vert_index = *r_mlfan_curr_index;
  }
-  *r_mlfan_curr = &loops[*r_mlfan_curr_index];
-  /* And now we are back in sync, mlfan_curr_index is the index of `mlfan_curr`! Pff! */
 }

 static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopSplitTaskData *data)
@@ -1022,29 +995,25 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS

  const Span<MVert> verts = common_data->verts;
  const Span<MEdge> edges = common_data->edges;
+  const Span<MLoop> loops = common_data->loops;
  const Span<float3> polynors = common_data->polynors;
+  MutableSpan<float3> loop_normals = common_data->loopnors;

  MLoopNorSpace *lnor_space = data->lnor_space;
-  float3 *lnor = data->lnor;
-  const MLoop *ml_curr = data->ml_curr;
-  const MLoop *ml_prev = data->ml_prev;
  const int ml_curr_index = data->ml_curr_index;
-#if 0 /* Not needed for 'single' loop. */
  const int ml_prev_index = data->ml_prev_index;
-  const int *e2l_prev = data->e2l_prev;
-#endif
  const int mp_index = data->mp_index;

  /* Simple case (both edges around that vertex are sharp in current polygon),
   * this loop just takes its poly normal.
   */
-  copy_v3_v3(*lnor, polynors[mp_index]);
+  loop_normals[ml_curr_index] = polynors[mp_index];

 #if 0
  printf("BASIC: handling loop %d / edge %d / vert %d / poly %d\n",
         ml_curr_index,
-         ml_curr->e,
-         ml_curr->v,
+         loops[ml_curr_index].e,
+         loops[ml_curr_index].v,
         mp_index);
 #endif

@@ -1052,12 +1021,12 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS
  if (lnors_spacearr) {
    float vec_curr[3], vec_prev[3];

-    const uint mv_pivot_index = ml_curr->v; /* The vertex we are "fanning" around! */
+    const uint mv_pivot_index = loops[ml_curr_index].v; /* The vertex we are "fanning" around! */
    const MVert *mv_pivot = &verts[mv_pivot_index];
-    const MEdge *me_curr = &edges[ml_curr->e];
+    const MEdge *me_curr = &edges[loops[ml_curr_index].e];
    const MVert *mv_2 = (me_curr->v1 == mv_pivot_index) ? &verts[me_curr->v2] :
                                                          &verts[me_curr->v1];
-    const MEdge *me_prev = &edges[ml_prev->e];
+    const MEdge *me_prev = &edges[loops[ml_prev_index].e];
    const MVert *mv_3 = (me_prev->v1 == mv_pivot_index) ? &verts[me_prev->v2] :
                                                          &verts[me_prev->v1];

@@ -1066,17 +1035,20 @@ static void split_loop_nor_single_do(LoopSplitTaskDataCommon *common_data, LoopS
    sub_v3_v3v3(vec_prev, mv_3->co, mv_pivot->co);
    normalize_v3(vec_prev);

-    BKE_lnor_space_define(lnor_space, *lnor, vec_curr, vec_prev, nullptr);
+    BKE_lnor_space_define(lnor_space, loop_normals[ml_curr_index], vec_curr, vec_prev, nullptr);
    /* We know there is only one loop in this space, no need to create a link-list in this case. */
    BKE_lnor_space_add_loop(lnors_spacearr, lnor_space, ml_curr_index, nullptr, true);

    if (!clnors_data.is_empty()) {
-      BKE_lnor_space_custom_data_to_normal(lnor_space, clnors_data[ml_curr_index], *lnor);
+      BKE_lnor_space_custom_data_to_normal(
+          lnor_space, clnors_data[ml_curr_index], loop_normals[ml_curr_index]);
    }
  }
 }

-static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSplitTaskData *data)
+static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data,
+                                  LoopSplitTaskData *data,
+                                  BLI_Stack *edge_vectors)
 {
  MLoopNorSpaceArray *lnors_spacearr = common_data->lnors_spacearr;
  MutableSpan<float3> loopnors = common_data->loopnors;
@@ -1086,7 +1058,7 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
  const Span<MEdge> edges = common_data->edges;
  const Span<MPoly> polys = common_data->polys;
  const Span<MLoop> loops = common_data->loops;
-  const int(*edge_to_loops)[2] = common_data->edge_to_loops;
+  const Span<int2> edge_to_loops = common_data->edge_to_loops;
  const Span<int> loop_to_poly = common_data->loop_to_poly;
  const Span<float3> polynors = common_data->polynors;

@@ -1094,14 +1066,9 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
 #if 0 /* Not needed for 'fan' loops. */
  float(*lnor)[3] = data->lnor;
 #endif
-  const MLoop *ml_curr = data->ml_curr;
-  const MLoop *ml_prev = data->ml_prev;
  const int ml_curr_index = data->ml_curr_index;
  const int ml_prev_index = data->ml_prev_index;
  const int mp_index = data->mp_index;
-  const int *e2l_prev = data->e2l_prev;
-
-  BLI_Stack *edge_vectors = data->edge_vectors;

  /* Sigh! we have to fan around current vertex, until we find the other non-smooth edge,
   * and accumulate face normals into the vertex!
@@ -1109,11 +1076,11 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
   * same as the vertex normal, but I do not see any easy way to detect that (would need to count
   * number of sharp edges per vertex, I doubt the additional memory usage would be worth it,
   * especially as it should not be a common case in real-life meshes anyway). */
-  const uint mv_pivot_index = ml_curr->v; /* The vertex we are "fanning" around! */
+  const uint mv_pivot_index = loops[ml_curr_index].v; /* The vertex we are "fanning" around! */
  const MVert *mv_pivot = &verts[mv_pivot_index];

-  /* `ml_curr` would be mlfan_prev if we needed that one. */
-  const MEdge *me_org = &edges[ml_curr->e];
+  /* `ml_curr_index` would be mlfan_prev if we needed that one. */
+  const MEdge *me_org = &edges[loops[ml_curr_index].e];

  float vec_curr[3], vec_prev[3], vec_org[3];
  float lnor[3] = {0.0f, 0.0f, 0.0f};
@@ -1129,8 +1096,6 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
  /* Temp clnors stack. */
  BLI_SMALLSTACK_DECLARE(clnors, short *);

-  const int *e2lfan_curr = e2l_prev;
-  const MLoop *mlfan_curr = ml_prev;
  /* `mlfan_vert_index` the loop of our current edge might not be the loop of our current vertex!
   */
  int mlfan_curr_index = ml_prev_index;
@@ -1157,7 +1122,7 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
  // printf("FAN: vert %d, start edge %d\n", mv_pivot_index, ml_curr->e);

  while (true) {
-    const MEdge *me_curr = &edges[mlfan_curr->e];
+    const MEdge *me_curr = &edges[loops[mlfan_curr_index].e];
    /* Compute edge vectors.
     * NOTE: We could pre-compute those into an array, in the first iteration, instead of computing
     *       them twice (or more) here. However, time gained is not worth memory and time lost,
@@ -1171,7 +1136,7 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
      normalize_v3(vec_curr);
    }

-    // printf("\thandling edge %d / loop %d\n", mlfan_curr->e, mlfan_curr_index);
+    // printf("\thandling edge %d / loop %d\n", loops[mlfan_curr_index].e, mlfan_curr_index);

    {
      /* Code similar to accumulate_vertex_normals_poly_v3. */
@@ -1209,7 +1174,7 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
      }
    }

-    if (IS_EDGE_SHARP(e2lfan_curr) || (me_curr == me_org)) {
+    if (IS_EDGE_SHARP(edge_to_loops[loops[mlfan_curr_index].e]) || (me_curr == me_org)) {
      /* Current edge is sharp and we have finished with this fan of faces around this vert,
       * or this vert is smooth, and we have completed a full turn around it. */
      // printf("FAN: Finished!\n");
@@ -1222,14 +1187,11 @@ static void split_loop_nor_fan_do(LoopSplitTaskDataCommon *common_data, LoopSpli
    loop_manifold_fan_around_vert_next(loops,
                                       polys,
                                       loop_to_poly,
-                                       e2lfan_curr,
+                                       edge_to_loops[loops[mlfan_curr_index].e],
                                       mv_pivot_index,
-                                       &mlfan_curr,
                                       &mlfan_curr_index,
                                       &mlfan_vert_index,
                                       &mpfan_curr_index);
-
-    e2lfan_curr = edge_to_loops[mlfan_curr->e];
  }

  {
@@ -1289,11 +1251,9 @@ static void loop_split_worker_do(LoopSplitTaskDataCommon *common_data,
                                 LoopSplitTaskData *data,
                                 BLI_Stack *edge_vectors)
 {
-  BLI_assert(data->ml_curr);
-  if (data->e2l_prev) {
+  if (data->flag == LoopSplitTaskData::Type::Fan) {
    BLI_assert((edge_vectors == nullptr) || BLI_stack_is_empty(edge_vectors));
-    data->edge_vectors = edge_vectors;
-    split_loop_nor_fan_do(common_data, data);
+    split_loop_nor_fan_do(common_data, data, edge_vectors);
  }
  else {
    /* No need for edge_vectors for 'single' case! */
@@ -1312,8 +1272,7 @@ static void loop_split_worker(TaskPool *__restrict pool, void *taskdata)
                                nullptr;

  for (int i = 0; i < LOOP_SPLIT_TASK_BLOCK_SIZE; i++, data++) {
-    /* A nullptr ml_curr is used to tag ended data! */
-    if (data->ml_curr == nullptr) {
+    if (data->flag == LoopSplitTaskData::Type::BlockEnd) {
      break;
    }

@@ -1332,17 +1291,15 @@ static void loop_split_worker(TaskPool *__restrict pool, void *taskdata)
 */
 static bool loop_split_generator_check_cyclic_smooth_fan(const Span<MLoop> mloops,
                                                         const Span<MPoly> mpolys,
-                                                         const int (*edge_to_loops)[2],
+                                                         const Span<int2> edge_to_loops,
                                                         const Span<int> loop_to_poly,
                                                         const int *e2l_prev,
                                                         BitVector<> &skip_loops,
-                                                         const MLoop *ml_curr,
-                                                         const MLoop *ml_prev,
                                                         const int ml_curr_index,
                                                         const int ml_prev_index,
                                                         const int mp_curr_index)
 {
-  const uint mv_pivot_index = ml_curr->v; /* The vertex we are "fanning" around! */
+  const uint mv_pivot_index = mloops[ml_curr_index].v; /* The vertex we are "fanning" around! */

  const int *e2lfan_curr = e2l_prev;
  if (IS_EDGE_SHARP(e2lfan_curr)) {
@@ -1352,7 +1309,6 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const Span<MLoop> mloop

  /* `mlfan_vert_index` the loop of our current edge might not be the loop of our current vertex!
   */
-  const MLoop *mlfan_curr = ml_prev;
  int mlfan_curr_index = ml_prev_index;
  int mlfan_vert_index = ml_curr_index;
  int mpfan_curr_index = mp_curr_index;
@@ -1371,12 +1327,11 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const Span<MLoop> mloop
                                       loop_to_poly,
                                       e2lfan_curr,
                                       mv_pivot_index,
-                                       &mlfan_curr,
                                       &mlfan_curr_index,
                                       &mlfan_vert_index,
                                       &mpfan_curr_index);

-    e2lfan_curr = edge_to_loops[mlfan_curr->e];
+    e2lfan_curr = edge_to_loops[mloops[mlfan_curr_index].e];

    if (IS_EDGE_SHARP(e2lfan_curr)) {
      /* Sharp loop/edge, so not a cyclic smooth fan. */
@@ -1386,7 +1341,7 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const Span<MLoop> mloop
    if (skip_loops[mlfan_vert_index]) {
      if (mlfan_vert_index == ml_curr_index) {
        /* We walked around a whole cyclic smooth fan without finding any already-processed loop,
-         * means we can use initial `ml_curr` / `ml_prev` edge as start for this smooth fan. */
+         * means we can use initial current / previous edge as start for this smooth fan. */
        return true;
      }
      /* Already checked in some previous looping, we can abort. */
@@ -1400,13 +1355,14 @@ static bool loop_split_generator_check_cyclic_smooth_fan(const Span<MLoop> mloop

 static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common_data)
 {
+  using namespace blender;
+  using namespace blender::bke;
  MLoopNorSpaceArray *lnors_spacearr = common_data->lnors_spacearr;
-  MutableSpan<float3> loopnors = common_data->loopnors;

  const Span<MLoop> loops = common_data->loops;
  const Span<MPoly> polys = common_data->polys;
  const Span<int> loop_to_poly = common_data->loop_to_poly;
-  const int(*edge_to_loops)[2] = common_data->edge_to_loops;
+  const Span<int2> edge_to_loops = common_data->edge_to_loops;

  BitVector<> skip_loops(loops.size(), false);

@@ -1432,24 +1388,16 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
   */
  for (const int mp_index : polys.index_range()) {
    const MPoly &poly = polys[mp_index];
-    const int ml_last_index = (poly.loopstart + poly.totloop) - 1;
-    int ml_curr_index = poly.loopstart;
-    int ml_prev_index = ml_last_index;

-    const MLoop *ml_curr = &loops[ml_curr_index];
-    const MLoop *ml_prev = &loops[ml_prev_index];
-    float3 *lnors = &loopnors[ml_curr_index];
-
-    for (; ml_curr_index <= ml_last_index; ml_curr++, ml_curr_index++, lnors++) {
-      const int *e2l_curr = edge_to_loops[ml_curr->e];
-      const int *e2l_prev = edge_to_loops[ml_prev->e];
+    for (const int ml_curr_index : IndexRange(poly.loopstart, poly.totloop)) {
+      const int ml_prev_index = mesh_topology::poly_loop_prev(poly, ml_curr_index);

 #if 0
      printf("Checking loop %d / edge %u / vert %u (sharp edge: %d, skiploop: %d)",
             ml_curr_index,
-             ml_curr->e,
-             ml_curr->v,
-             IS_EDGE_SHARP(e2l_curr),
+             loops[ml_curr_index].e,
+             loops[ml_curr_index].v,
+             IS_EDGE_SHARP(edge_to_loops[loops[ml_curr_index].e]),
             skip_loops[ml_curr_index]);
 #endif

@@ -1463,18 +1411,17 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
       * However, this would complicate the code, add more memory usage, and despite its logical
       * complexity, #loop_manifold_fan_around_vert_next() is quite cheap in term of CPU cycles,
       * so really think it's not worth it. */
-      if (!IS_EDGE_SHARP(e2l_curr) && (skip_loops[ml_curr_index] ||
-                                       !loop_split_generator_check_cyclic_smooth_fan(loops,
-                                                                                     polys,
-                                                                                     edge_to_loops,
-                                                                                     loop_to_poly,
-                                                                                     e2l_prev,
-                                                                                     skip_loops,
-                                                                                     ml_curr,
-                                                                                     ml_prev,
-                                                                                     ml_curr_index,
-                                                                                     ml_prev_index,
-                                                                                     mp_index))) {
+      if (!IS_EDGE_SHARP(edge_to_loops[loops[ml_curr_index].e]) &&
+          (skip_loops[ml_curr_index] ||
+           !loop_split_generator_check_cyclic_smooth_fan(loops,
+                                                         polys,
+                                                         edge_to_loops,
+                                                         loop_to_poly,
+                                                         edge_to_loops[loops[ml_prev_index].e],
+                                                         skip_loops,
+                                                         ml_curr_index,
+                                                         ml_prev_index,
+                                                         mp_index))) {
        // printf("SKIPPING!\n");
      }
      else {
@@ -1494,38 +1441,27 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
          memset(data, 0, sizeof(*data));
        }

-        if (IS_EDGE_SHARP(e2l_curr) && IS_EDGE_SHARP(e2l_prev)) {
-          data->lnor = lnors;
-          data->ml_curr = ml_curr;
-          data->ml_prev = ml_prev;
+        if (IS_EDGE_SHARP(edge_to_loops[loops[ml_curr_index].e]) &&
+            IS_EDGE_SHARP(edge_to_loops[loops[ml_prev_index].e])) {
          data->ml_curr_index = ml_curr_index;
-#if 0 /* Not needed for 'single' loop. */
          data->ml_prev_index = ml_prev_index;
-          data->e2l_prev = nullptr; /* Tag as 'single' task. */
-#endif
+          data->flag = LoopSplitTaskData::Type::Single;
          data->mp_index = mp_index;
          if (lnors_spacearr) {
            data->lnor_space = BKE_lnor_space_create(lnors_spacearr);
          }
        }
-        /* We *do not need* to check/tag loops as already computed!
-         * Due to the fact a loop only links to one of its two edges,
-         * a same fan *will never be walked more than once!*
-         * Since we consider edges having neighbor polys with inverted
-         * (flipped) normals as sharp, we are sure that no fan will be skipped,
-         * even only considering the case (sharp curr_edge, smooth prev_edge),
-         * and not the alternative (smooth curr_edge, sharp prev_edge).
-         * All this due/thanks to link between normals and loop ordering (i.e. winding).
-         */
        else {
-#if 0 /* Not needed for 'fan' loops. */
-          data->lnor = lnors;
-#endif
-          data->ml_curr = ml_curr;
-          data->ml_prev = ml_prev;
+          /* We do not need to check/tag loops as already computed. Due to the fact that a loop
+           * only points to one of its two edges, the same fan will never be walked more than once.
+           * Since we consider edges that have neighbor polys with inverted (flipped) normals as
+           * sharp, we are sure that no fan will be skipped, even only considering the case (sharp
+           * current edge, smooth previous edge), and not the alternative (smooth current edge,
+           * sharp previous edge). All this due/thanks to the link between normals and loop
+           * ordering (i.e. winding). */
          data->ml_curr_index = ml_curr_index;
          data->ml_prev_index = ml_prev_index;
-          data->e2l_prev = e2l_prev; /* Also tag as 'fan' task. */
+          data->flag = LoopSplitTaskData::Type::Fan;
          data->mp_index = mp_index;
          if (lnors_spacearr) {
            data->lnor_space = BKE_lnor_space_create(lnors_spacearr);
@@ -1543,14 +1479,9 @@ static void loop_split_generator(TaskPool *pool, LoopSplitTaskDataCommon *common
          loop_split_worker_do(common_data, data, edge_vectors);
        }
      }
-
-      ml_prev = ml_curr;
-      ml_prev_index = ml_curr_index;
    }
  }

-  /* Last block of data. Since it is calloc'ed and we use first nullptr item as stopper,
-   * everything is fine. */
  if (pool && data_idx) {
    BLI_task_pool_push(pool, loop_split_worker, data_buff, true, nullptr);
  }
@@ -1624,8 +1555,7 @@ void BKE_mesh_normals_loop_split(const MVert *mverts,
   * However, if needed, we can store the negated value of loop index instead of INDEX_INVALID
   * to retrieve the real value later in code).
   * Note also that loose edges always have both values set to 0! */
-  int(*edge_to_loops)[2] = (int(*)[2])MEM_calloc_arrayN(
-      size_t(numEdges), sizeof(*edge_to_loops), __func__);
+  Array<int2> edge_to_loops(numEdges, int2(0));

  /* Simple mapping from a loop to its polygon index. */
  Span<int> loop_to_poly;
@@ -1655,22 +1585,44 @@ void BKE_mesh_normals_loop_split(const MVert *mverts,
    BKE_lnor_spacearr_init(r_lnors_spacearr, numLoops, MLNOR_SPACEARR_LOOP_INDEX);
  }

+  const Span<MPoly> polys(mpolys, numPolys);
+  const Span<MLoop> loops(mloops, numLoops);
+
  /* Init data common to all tasks. */
  LoopSplitTaskDataCommon common_data;
  common_data.lnors_spacearr = r_lnors_spacearr;
  common_data.loopnors = {reinterpret_cast<float3 *>(r_loopnors), numLoops};
  common_data.clnors_data = {reinterpret_cast<short2 *>(clnors_data), clnors_data ? numLoops : 0};
  common_data.verts = {mverts, numVerts};
-  common_data.edges = {const_cast<MEdge *>(medges), numEdges};
-  common_data.polys = {mpolys, numPolys};
-  common_data.loops = {mloops, numLoops};
+  common_data.edges = {medges, numEdges};
+  common_data.polys = polys;
+  common_data.loops = loops;
  common_data.edge_to_loops = edge_to_loops;
  common_data.loop_to_poly = loop_to_poly;
  common_data.polynors = {reinterpret_cast<const float3 *>(polynors), numPolys};
  common_data.vert_normals = {reinterpret_cast<const float3 *>(vert_normals), numVerts};

+  /* Pre-populate all loop normals as if their verts were all smooth.
+   * This way we don't have to compute those later! */
+  threading::parallel_for(polys.index_range(), 1024, [&](const IndexRange range) {
+    for (const int poly_i : range) {
+      const MPoly &poly = polys[poly_i];
+      for (const int loop_i : IndexRange(poly.loopstart, poly.totloop)) {
+        copy_v3_v3(r_loopnors[loop_i], vert_normals[loops[loop_i].v]);
+      }
+    }
+  });
+
  /* This first loop check which edges are actually smooth, and compute edge vectors. */
-  mesh_edges_sharp_tag(&common_data, check_angle, split_angle, false);
+  mesh_edges_sharp_tag({medges, numEdges},
+                       polys,
+                       loops,
+                       loop_to_poly,
+                       {reinterpret_cast<const float3 *>(polynors), numPolys},
+                       check_angle,
+                       split_angle,
+                       edge_to_loops,
+                       nullptr);

  if (numLoops < LOOP_SPLIT_TASK_BLOCK_SIZE * 8) {
    /* Not enough loops to be worth the whole threading overhead. */
@@ -1686,8 +1638,6 @@ void BKE_mesh_normals_loop_split(const MVert *mverts,
    BLI_task_pool_free(task_pool);
  }

-  MEM_freeN(edge_to_loops);
-
  if (r_lnors_spacearr) {
    if (r_lnors_spacearr == &_lnors_spacearr) {
      BKE_lnor_spacearr_free(r_lnors_spacearr);
--- a/source/blender/blenkernel/intern/node.cc
+++ b/source/blender/blenkernel/intern/node.cc
@@ -145,11 +145,13 @@ static void ntree_copy_data(Main * /*bmain*/, ID *id_dst, const ID *id_src, cons

  dst_runtime.nodes_by_id.reserve(ntree_src->all_nodes().size());
  BLI_listbase_clear(&ntree_dst->nodes);
-  LISTBASE_FOREACH (const bNode *, src_node, &ntree_src->nodes) {
+  int i;
+  LISTBASE_FOREACH_INDEX (const bNode *, src_node, &ntree_src->nodes, i) {
    /* Don't find a unique name for every node, since they should have valid names already. */
    bNode *new_node = blender::bke::node_copy_with_mapping(
        ntree_dst, *src_node, flag_subdata, false, socket_map);
    dst_runtime.nodes_by_id.add_new(new_node);
+    new_node->runtime->index_in_tree = i;
  }

  /* copy links */
@@ -673,9 +675,11 @@ void ntreeBlendReadData(BlendDataReader *reader, ID *owner_id, bNodeTree *ntree)
  BKE_animdata_blend_read_data(reader, ntree->adt);

  BLO_read_list(reader, &ntree->nodes);
-  LISTBASE_FOREACH (bNode *, node, &ntree->nodes) {
+  int i;
+  LISTBASE_FOREACH_INDEX (bNode *, node, &ntree->nodes, i) {
    node->runtime = MEM_new<bNodeRuntime>(__func__);
    node->typeinfo = nullptr;
+    node->runtime->index_in_tree = i;

    /* Create the `nodes_by_id` cache eagerly so it can be expected to be valid. Because
     * we create it here we also have to check for zero identifiers from previous versions. */
@@ -1373,8 +1377,7 @@ void nodeRegisterType(bNodeType *nt)
  if (nt->declare && !nt->declaration_is_dynamic) {
    if (nt->fixed_declaration == nullptr) {
      nt->fixed_declaration = new blender::nodes::NodeDeclaration();
-      blender::nodes::NodeDeclarationBuilder builder{*nt->fixed_declaration};
-      nt->declare(builder);
+      blender::nodes::build_node_declaration(*nt, *nt->fixed_declaration);
    }
  }

@@ -2198,6 +2201,8 @@ void nodeUniqueID(bNodeTree *ntree, bNode *node)

  node->identifier = new_id;
  ntree->runtime->nodes_by_id.add_new(node);
+  node->runtime->index_in_tree = ntree->runtime->nodes_by_id.index_range().last();
+  BLI_assert(node->runtime->index_in_tree == ntree->runtime->nodes_by_id.index_of(node));
 }

 bNode *nodeAddNode(const bContext *C, bNodeTree *ntree, const char *idname)
@@ -2937,8 +2942,10 @@ void nodeRebuildIDVector(bNodeTree *node_tree)
 {
  /* Rebuild nodes #VectorSet which must have the same order as the list. */
  node_tree->runtime->nodes_by_id.clear();
-  LISTBASE_FOREACH (bNode *, node, &node_tree->nodes) {
+  int i;
+  LISTBASE_FOREACH_INDEX (bNode *, node, &node_tree->nodes, i) {
    node_tree->runtime->nodes_by_id.add_new(node);
+    node->runtime->index_in_tree = i;
  }
 }

@@ -3607,8 +3614,7 @@ bool nodeDeclarationEnsureOnOutdatedNode(bNodeTree * /*ntree*/, bNode *node)
  }
  if (node->typeinfo->declaration_is_dynamic) {
    node->runtime->declaration = new blender::nodes::NodeDeclaration();
-    blender::nodes::NodeDeclarationBuilder builder{*node->runtime->declaration};
-    node->typeinfo->declare(builder);
+    blender::nodes::build_node_declaration(*node->typeinfo, *node->runtime->declaration);
  }
  else {
    /* Declaration should have been created in #nodeRegisterType. */
--- a/source/blender/blenkernel/intern/node_runtime.cc
+++ b/source/blender/blenkernel/intern/node_runtime.cc
@@ -278,7 +278,7 @@ static void toposort_from_start_node(const ToposortDirection direction,

  Stack<Item, 64> nodes_to_check;
  nodes_to_check.push({&start_node});
-  node_states[start_node.runtime->index_in_tree].is_in_stack = true;
+  node_states[start_node.index()].is_in_stack = true;
  while (!nodes_to_check.is_empty()) {
    Item &item = nodes_to_check.peek();
    bNode &node = *item.node;
@@ -306,7 +306,7 @@ static void toposort_from_start_node(const ToposortDirection direction,
      }
      bNodeSocket &linked_socket = *socket.runtime->directly_linked_sockets[item.link_index];
      bNode &linked_node = *linked_socket.runtime->owner_node;
-      ToposortNodeState &linked_node_state = node_states[linked_node.runtime->index_in_tree];
+      ToposortNodeState &linked_node_state = node_states[linked_node.index()];
      if (linked_node_state.is_done) {
        /* The linked node has already been visited. */
        item.link_index++;
@@ -324,7 +324,7 @@ static void toposort_from_start_node(const ToposortDirection direction,

    /* If no other element has been pushed, the current node can be pushed to the sorted list. */
    if (&item == &nodes_to_check.peek()) {
-      ToposortNodeState &node_state = node_states[node.runtime->index_in_tree];
+      ToposortNodeState &node_state = node_states[node.index()];
      node_state.is_done = true;
      node_state.is_in_stack = false;
      r_sorted_nodes.append(&node);
@@ -345,7 +345,7 @@ static void update_toposort(const bNodeTree &ntree,

  Array<ToposortNodeState> node_states(tree_runtime.nodes_by_id.size());
  for (bNode *node : tree_runtime.nodes_by_id) {
-    if (node_states[node->runtime->index_in_tree].is_done) {
+    if (node_states[node->index()].is_done) {
      /* Ignore nodes that are done already. */
      continue;
    }
@@ -361,7 +361,7 @@ static void update_toposort(const bNodeTree &ntree,
  if (r_sorted_nodes.size() < tree_runtime.nodes_by_id.size()) {
    r_cycle_detected = true;
    for (bNode *node : tree_runtime.nodes_by_id) {
-      if (node_states[node->runtime->index_in_tree].is_done) {
+      if (node_states[node->index()].is_done) {
        /* Ignore nodes that are done already. */
        continue;
      }
--- a/source/blender/blenkernel/intern/node_tree_field_inferencing.cc
+++ b/source/blender/blenkernel/intern/node_tree_field_inferencing.cc
@@ -0,0 +1,519 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BKE_node.h"
+#include "BKE_node_runtime.hh"
+
+#include "NOD_node_declaration.hh"
+
+#include "BLI_set.hh"
+#include "BLI_stack.hh"
+
+namespace blender::bke::node_field_inferencing {
+
+using nodes::FieldInferencingInterface;
+using nodes::InputSocketFieldType;
+using nodes::NodeDeclaration;
+using nodes::OutputFieldDependency;
+using nodes::OutputSocketFieldType;
+using nodes::SocketDeclaration;
+
+static bool is_field_socket_type(eNodeSocketDatatype type)
+{
+  return ELEM(type, SOCK_FLOAT, SOCK_INT, SOCK_BOOLEAN, SOCK_VECTOR, SOCK_RGBA);
+}
+
+static bool is_field_socket_type(const bNodeSocket &socket)
+{
+  return is_field_socket_type((eNodeSocketDatatype)socket.typeinfo->type);
+}
+
+static InputSocketFieldType get_interface_input_field_type(const bNode &node,
+                                                           const bNodeSocket &socket)
+{
+  if (!is_field_socket_type(socket)) {
+    return InputSocketFieldType::None;
+  }
+  if (node.type == NODE_REROUTE) {
+    return InputSocketFieldType::IsSupported;
+  }
+  if (node.type == NODE_GROUP_OUTPUT) {
+    /* Outputs always support fields when the data type is correct. */
+    return InputSocketFieldType::IsSupported;
+  }
+  if (node.typeinfo == &NodeTypeUndefined) {
+    return InputSocketFieldType::None;
+  }
+  if (node.type == NODE_CUSTOM) {
+    return InputSocketFieldType::None;
+  }
+
+  /* TODO: Ensure declaration exists. */
+  const NodeDeclaration *node_decl = node.declaration();
+
+  /* Node declarations should be implemented for nodes involved here. */
+  BLI_assert(node_decl != nullptr);
+
+  /* Get the field type from the declaration. */
+  const SocketDeclaration &socket_decl = *node_decl->inputs()[socket.index()];
+  const InputSocketFieldType field_type = socket_decl.input_field_type();
+  return field_type;
+}
+
+static OutputFieldDependency get_interface_output_field_dependency(const bNode &node,
+                                                                   const bNodeSocket &socket)
+{
+  if (!is_field_socket_type(socket)) {
+    /* Non-field sockets always output data. */
+    return OutputFieldDependency::ForDataSource();
+  }
+  if (node.type == NODE_REROUTE) {
+    /* The reroute just forwards what is passed in. */
+    return OutputFieldDependency::ForDependentField();
+  }
+  if (node.type == NODE_GROUP_INPUT) {
+    /* Input nodes get special treatment in #determine_group_input_states. */
+    return OutputFieldDependency::ForDependentField();
+  }
+  if (node.typeinfo == &NodeTypeUndefined) {
+    return OutputFieldDependency::ForDataSource();
+  }
+  if (node.type == NODE_CUSTOM) {
+    return OutputFieldDependency::ForDataSource();
+  }
+
+  const NodeDeclaration *node_decl = node.declaration();
+
+  /* Node declarations should be implemented for nodes involved here. */
+  BLI_assert(node_decl != nullptr);
+
+  /* Use the socket declaration. */
+  const SocketDeclaration &socket_decl = *node_decl->outputs()[socket.index()];
+  return socket_decl.output_field_dependency();
+}
+
+static FieldInferencingInterface get_dummy_field_inferencing_interface(const bNode &node)
+{
+  FieldInferencingInterface inferencing_interface;
+  inferencing_interface.inputs.append_n_times(InputSocketFieldType::None,
+                                              node.input_sockets().size());
+  inferencing_interface.outputs.append_n_times(OutputFieldDependency::ForDataSource(),
+                                               node.output_sockets().size());
+  return inferencing_interface;
+}
+
+/**
+ * Retrieves information about how the node interacts with fields.
+ * In the future, this information can be stored in the node declaration. This would allow this
+ * function to return a reference, making it more efficient.
+ */
+static FieldInferencingInterface get_node_field_inferencing_interface(const bNode &node)
+{
+  /* Node groups already reference all required information, so just return that. */
+  if (node.is_group()) {
+    bNodeTree *group = (bNodeTree *)node.id;
+    if (group == nullptr) {
+      return FieldInferencingInterface();
+    }
+    if (!ntreeIsRegistered(group)) {
+      /* This can happen when there is a linked node group that was not found (see T92799). */
+      return get_dummy_field_inferencing_interface(node);
+    }
+    if (!group->runtime->field_inferencing_interface) {
+      /* This shouldn't happen because referenced node groups should always be updated first. */
+      BLI_assert_unreachable();
+    }
+    return *group->runtime->field_inferencing_interface;
+  }
+
+  FieldInferencingInterface inferencing_interface;
+  for (const bNodeSocket *input_socket : node.input_sockets()) {
+    inferencing_interface.inputs.append(get_interface_input_field_type(node, *input_socket));
+  }
+
+  for (const bNodeSocket *output_socket : node.output_sockets()) {
+    inferencing_interface.outputs.append(
+        get_interface_output_field_dependency(node, *output_socket));
+  }
+  return inferencing_interface;
+}
+
+/**
+ * This struct contains information for every socket. The values are propagated through the
+ * network.
+ */
+struct SocketFieldState {
+  /* This socket starts a new field. */
+  bool is_field_source = false;
+  /* This socket can never become a field, because the node itself does not support it. */
+  bool is_always_single = false;
+  /* This socket is currently a single value. It could become a field though. */
+  bool is_single = true;
+  /* This socket is required to be a single value. This can be because the node itself only
+   * supports this socket to be a single value, or because a node afterwards requires this to be a
+   * single value. */
+  bool requires_single = false;
+};
+
+static Vector<const bNodeSocket *> gather_input_socket_dependencies(
+    const OutputFieldDependency &field_dependency, const bNode &node)
+{
+  const OutputSocketFieldType type = field_dependency.field_type();
+  Vector<const bNodeSocket *> input_sockets;
+  switch (type) {
+    case OutputSocketFieldType::FieldSource:
+    case OutputSocketFieldType::None: {
+      break;
+    }
+    case OutputSocketFieldType::DependentField: {
+      /* This output depends on all inputs. */
+      input_sockets.extend(node.input_sockets());
+      break;
+    }
+    case OutputSocketFieldType::PartiallyDependent: {
+      /* This output depends only on a few inputs. */
+      for (const int i : field_dependency.linked_input_indices()) {
+        input_sockets.append(&node.input_socket(i));
+      }
+      break;
+    }
+  }
+  return input_sockets;
+}
+
+/**
+ * Check what the group output socket depends on. Potentially traverses the node tree
+ * to figure out if it is always a field or if it depends on any group inputs.
+ */
+static OutputFieldDependency find_group_output_dependencies(
+    const bNodeSocket &group_output_socket, const Span<SocketFieldState> field_state_by_socket_id)
+{
+  if (!is_field_socket_type(group_output_socket)) {
+    return OutputFieldDependency::ForDataSource();
+  }
+
+  /* Use a Set here instead of an array indexed by socket id, because we my only need to look at
+   * very few sockets. */
+  Set<const bNodeSocket *> handled_sockets;
+  Stack<const bNodeSocket *> sockets_to_check;
+
+  handled_sockets.add(&group_output_socket);
+  sockets_to_check.push(&group_output_socket);
+
+  /* Keeps track of group input indices that are (indirectly) connected to the output. */
+  Vector<int> linked_input_indices;
+
+  while (!sockets_to_check.is_empty()) {
+    const bNodeSocket *input_socket = sockets_to_check.pop();
+
+    if (!input_socket->is_directly_linked() &&
+        !field_state_by_socket_id[input_socket->index_in_tree()].is_single) {
+      /* This socket uses a field as input by default. */
+      return OutputFieldDependency::ForFieldSource();
+    }
+
+    for (const bNodeSocket *origin_socket : input_socket->directly_linked_sockets()) {
+      const bNode &origin_node = origin_socket->owner_node();
+      const SocketFieldState &origin_state =
+          field_state_by_socket_id[origin_socket->index_in_tree()];
+
+      if (origin_state.is_field_source) {
+        if (origin_node.type == NODE_GROUP_INPUT) {
+          /* Found a group input that the group output depends on. */
+          linked_input_indices.append_non_duplicates(origin_socket->index());
+        }
+        else {
+          /* Found a field source that is not the group input. So the output is always a field. */
+          return OutputFieldDependency::ForFieldSource();
+        }
+      }
+      else if (!origin_state.is_single) {
+        const FieldInferencingInterface inferencing_interface =
+            get_node_field_inferencing_interface(origin_node);
+        const OutputFieldDependency &field_dependency =
+            inferencing_interface.outputs[origin_socket->index()];
+
+        /* Propagate search further to the left. */
+        for (const bNodeSocket *origin_input_socket :
+             gather_input_socket_dependencies(field_dependency, origin_node)) {
+          if (!origin_input_socket->is_available()) {
+            continue;
+          }
+          if (!field_state_by_socket_id[origin_input_socket->index_in_tree()].is_single) {
+            if (handled_sockets.add(origin_input_socket)) {
+              sockets_to_check.push(origin_input_socket);
+            }
+          }
+        }
+      }
+    }
+  }
+  return OutputFieldDependency::ForPartiallyDependentField(std::move(linked_input_indices));
+}
+
+static void propagate_data_requirements_from_right_to_left(
+    const bNodeTree &tree, const MutableSpan<SocketFieldState> field_state_by_socket_id)
+{
+  const Span<const bNode *> toposort_result = tree.toposort_right_to_left();
+
+  for (const bNode *node : toposort_result) {
+    const FieldInferencingInterface inferencing_interface = get_node_field_inferencing_interface(
+        *node);
+
+    for (const bNodeSocket *output_socket : node->output_sockets()) {
+      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
+
+      const OutputFieldDependency &field_dependency =
+          inferencing_interface.outputs[output_socket->index()];
+
+      if (field_dependency.field_type() == OutputSocketFieldType::FieldSource) {
+        continue;
+      }
+      if (field_dependency.field_type() == OutputSocketFieldType::None) {
+        state.requires_single = true;
+        state.is_always_single = true;
+        continue;
+      }
+
+      /* The output is required to be a single value when it is connected to any input that does
+       * not support fields. */
+      for (const bNodeSocket *target_socket : output_socket->directly_linked_sockets()) {
+        if (target_socket->is_available()) {
+          state.requires_single |=
+              field_state_by_socket_id[target_socket->index_in_tree()].requires_single;
+        }
+      }
+
+      if (state.requires_single) {
+        bool any_input_is_field_implicitly = false;
+        const Vector<const bNodeSocket *> connected_inputs = gather_input_socket_dependencies(
+            field_dependency, *node);
+        for (const bNodeSocket *input_socket : connected_inputs) {
+          if (!input_socket->is_available()) {
+            continue;
+          }
+          if (inferencing_interface.inputs[input_socket->index()] ==
+              InputSocketFieldType::Implicit) {
+            if (!input_socket->is_logically_linked()) {
+              any_input_is_field_implicitly = true;
+              break;
+            }
+          }
+        }
+        if (any_input_is_field_implicitly) {
+          /* This output isn't a single value actually. */
+          state.requires_single = false;
+        }
+        else {
+          /* If the output is required to be a single value, the connected inputs in the same node
+           * must not be fields as well. */
+          for (const bNodeSocket *input_socket : connected_inputs) {
+            field_state_by_socket_id[input_socket->index_in_tree()].requires_single = true;
+          }
+        }
+      }
+    }
+
+    /* Some inputs do not require fields independent of what the outputs are connected to. */
+    for (const bNodeSocket *input_socket : node->input_sockets()) {
+      SocketFieldState &state = field_state_by_socket_id[input_socket->index_in_tree()];
+      if (inferencing_interface.inputs[input_socket->index()] == InputSocketFieldType::None) {
+        state.requires_single = true;
+        state.is_always_single = true;
+      }
+    }
+  }
+}
+
+static void determine_group_input_states(
+    const bNodeTree &tree,
+    FieldInferencingInterface &new_inferencing_interface,
+    const MutableSpan<SocketFieldState> field_state_by_socket_id)
+{
+  {
+    /* Non-field inputs never support fields. */
+    int index;
+    LISTBASE_FOREACH_INDEX (bNodeSocket *, group_input, &tree.inputs, index) {
+      if (!is_field_socket_type((eNodeSocketDatatype)group_input->type)) {
+        new_inferencing_interface.inputs[index] = InputSocketFieldType::None;
+      }
+    }
+  }
+  /* Check if group inputs are required to be single values, because they are (indirectly)
+   * connected to some socket that does not support fields. */
+  for (const bNode *node : tree.nodes_by_type("NodeGroupInput")) {
+    for (const bNodeSocket *output_socket : node->output_sockets().drop_back(1)) {
+      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
+      if (state.requires_single) {
+        new_inferencing_interface.inputs[output_socket->index()] = InputSocketFieldType::None;
+      }
+    }
+  }
+  /* If an input does not support fields, this should be reflected in all Group Input nodes. */
+  for (const bNode *node : tree.nodes_by_type("NodeGroupInput")) {
+    for (const bNodeSocket *output_socket : node->output_sockets().drop_back(1)) {
+      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
+      const bool supports_field = new_inferencing_interface.inputs[output_socket->index()] !=
+                                  InputSocketFieldType::None;
+      if (supports_field) {
+        state.is_single = false;
+        state.is_field_source = true;
+      }
+      else {
+        state.requires_single = true;
+      }
+    }
+    SocketFieldState &dummy_socket_state =
+        field_state_by_socket_id[node->output_sockets().last()->index_in_tree()];
+    dummy_socket_state.requires_single = true;
+  }
+}
+
+static void propagate_field_status_from_left_to_right(
+    const bNodeTree &tree, const MutableSpan<SocketFieldState> field_state_by_socket_id)
+{
+  const Span<const bNode *> toposort_result = tree.toposort_left_to_right();
+
+  for (const bNode *node : toposort_result) {
+    if (node->type == NODE_GROUP_INPUT) {
+      continue;
+    }
+
+    const FieldInferencingInterface inferencing_interface = get_node_field_inferencing_interface(
+        *node);
+
+    /* Update field state of input sockets, also taking into account linked origin sockets. */
+    for (const bNodeSocket *input_socket : node->input_sockets()) {
+      SocketFieldState &state = field_state_by_socket_id[input_socket->index_in_tree()];
+      if (state.is_always_single) {
+        state.is_single = true;
+        continue;
+      }
+      state.is_single = true;
+      if (!input_socket->is_directly_linked()) {
+        if (inferencing_interface.inputs[input_socket->index()] ==
+            InputSocketFieldType::Implicit) {
+          state.is_single = false;
+        }
+      }
+      else {
+        for (const bNodeSocket *origin_socket : input_socket->directly_linked_sockets()) {
+          if (!field_state_by_socket_id[origin_socket->index_in_tree()].is_single) {
+            state.is_single = false;
+            break;
+          }
+        }
+      }
+    }
+
+    /* Update field state of output sockets, also taking into account input sockets. */
+    for (const bNodeSocket *output_socket : node->output_sockets()) {
+      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
+      const OutputFieldDependency &field_dependency =
+          inferencing_interface.outputs[output_socket->index()];
+
+      switch (field_dependency.field_type()) {
+        case OutputSocketFieldType::None: {
+          state.is_single = true;
+          break;
+        }
+        case OutputSocketFieldType::FieldSource: {
+          state.is_single = false;
+          state.is_field_source = true;
+          break;
+        }
+        case OutputSocketFieldType::PartiallyDependent:
+        case OutputSocketFieldType::DependentField: {
+          for (const bNodeSocket *input_socket :
+               gather_input_socket_dependencies(field_dependency, *node)) {
+            if (!input_socket->is_available()) {
+              continue;
+            }
+            if (!field_state_by_socket_id[input_socket->index_in_tree()].is_single) {
+              state.is_single = false;
+              break;
+            }
+          }
+          break;
+        }
+      }
+    }
+  }
+}
+
+static void determine_group_output_states(const bNodeTree &tree,
+                                          FieldInferencingInterface &new_inferencing_interface,
+                                          const Span<SocketFieldState> field_state_by_socket_id)
+{
+  const bNode *group_output_node = tree.group_output_node();
+  if (!group_output_node) {
+    return;
+  }
+
+  for (const bNodeSocket *group_output_socket : group_output_node->input_sockets().drop_back(1)) {
+    OutputFieldDependency field_dependency = find_group_output_dependencies(
+        *group_output_socket, field_state_by_socket_id);
+    new_inferencing_interface.outputs[group_output_socket->index()] = std::move(field_dependency);
+  }
+}
+
+static void update_socket_shapes(const bNodeTree &tree,
+                                 const Span<SocketFieldState> field_state_by_socket_id)
+{
+  const eNodeSocketDisplayShape requires_data_shape = SOCK_DISPLAY_SHAPE_CIRCLE;
+  const eNodeSocketDisplayShape data_but_can_be_field_shape = SOCK_DISPLAY_SHAPE_DIAMOND_DOT;
+  const eNodeSocketDisplayShape is_field_shape = SOCK_DISPLAY_SHAPE_DIAMOND;
+
+  auto get_shape_for_state = [&](const SocketFieldState &state) {
+    if (state.is_always_single) {
+      return requires_data_shape;
+    }
+    if (!state.is_single) {
+      return is_field_shape;
+    }
+    if (state.requires_single) {
+      return requires_data_shape;
+    }
+    return data_but_can_be_field_shape;
+  };
+
+  for (const bNodeSocket *socket : tree.all_input_sockets()) {
+    const SocketFieldState &state = field_state_by_socket_id[socket->index_in_tree()];
+    const_cast<bNodeSocket *>(socket)->display_shape = get_shape_for_state(state);
+  }
+  for (const bNodeSocket *socket : tree.all_sockets()) {
+    const SocketFieldState &state = field_state_by_socket_id[socket->index_in_tree()];
+    const_cast<bNodeSocket *>(socket)->display_shape = get_shape_for_state(state);
+  }
+}
+
+bool update_field_inferencing(const bNodeTree &tree)
+{
+  tree.ensure_topology_cache();
+
+  /* Create new inferencing interface for this node group. */
+  std::unique_ptr<FieldInferencingInterface> new_inferencing_interface =
+      std::make_unique<FieldInferencingInterface>();
+  new_inferencing_interface->inputs.resize(BLI_listbase_count(&tree.inputs),
+                                           InputSocketFieldType::IsSupported);
+  new_inferencing_interface->outputs.resize(BLI_listbase_count(&tree.outputs),
+                                            OutputFieldDependency::ForDataSource());
+
+  /* Keep track of the state of all sockets. The index into this array is #SocketRef::id(). */
+  Array<SocketFieldState> field_state_by_socket_id(tree.all_sockets().size());
+
+  propagate_data_requirements_from_right_to_left(tree, field_state_by_socket_id);
+  determine_group_input_states(tree, *new_inferencing_interface, field_state_by_socket_id);
+  propagate_field_status_from_left_to_right(tree, field_state_by_socket_id);
+  determine_group_output_states(tree, *new_inferencing_interface, field_state_by_socket_id);
+  update_socket_shapes(tree, field_state_by_socket_id);
+
+  /* Update the previous group interface. */
+  const bool group_interface_changed = !tree.runtime->field_inferencing_interface ||
+                                       *tree.runtime->field_inferencing_interface !=
+                                           *new_inferencing_interface;
+  tree.runtime->field_inferencing_interface = std::move(new_inferencing_interface);
+
+  return group_interface_changed;
+}
+
+}  // namespace blender::bke::node_field_inferencing
--- a/source/blender/blenkernel/intern/node_tree_update.cc
+++ b/source/blender/blenkernel/intern/node_tree_update.cc
@@ -68,526 +68,6 @@ static void add_socket_tag(bNodeTree *ntree, bNodeSocket *socket, const eNodeTre

 namespace blender::bke {

-namespace node_field_inferencing {
-
-static bool is_field_socket_type(eNodeSocketDatatype type)
-{
-  return ELEM(type, SOCK_FLOAT, SOCK_INT, SOCK_BOOLEAN, SOCK_VECTOR, SOCK_RGBA);
-}
-
-static bool is_field_socket_type(const bNodeSocket &socket)
-{
-  return is_field_socket_type((eNodeSocketDatatype)socket.typeinfo->type);
-}
-
-static InputSocketFieldType get_interface_input_field_type(const bNode &node,
-                                                           const bNodeSocket &socket)
-{
-  if (!is_field_socket_type(socket)) {
-    return InputSocketFieldType::None;
-  }
-  if (node.type == NODE_REROUTE) {
-    return InputSocketFieldType::IsSupported;
-  }
-  if (node.type == NODE_GROUP_OUTPUT) {
-    /* Outputs always support fields when the data type is correct. */
-    return InputSocketFieldType::IsSupported;
-  }
-  if (node.typeinfo == &NodeTypeUndefined) {
-    return InputSocketFieldType::None;
-  }
-  if (node.type == NODE_CUSTOM) {
-    return InputSocketFieldType::None;
-  }
-
-  /* TODO: Ensure declaration exists. */
-  const NodeDeclaration *node_decl = node.declaration();
-
-  /* Node declarations should be implemented for nodes involved here. */
-  BLI_assert(node_decl != nullptr);
-
-  /* Get the field type from the declaration. */
-  const SocketDeclaration &socket_decl = *node_decl->inputs()[socket.index()];
-  const InputSocketFieldType field_type = socket_decl.input_field_type();
-  if (field_type == InputSocketFieldType::Implicit) {
-    return field_type;
-  }
-  if (node_decl->is_function_node()) {
-    /* In a function node, every socket supports fields. */
-    return InputSocketFieldType::IsSupported;
-  }
-  return field_type;
-}
-
-static OutputFieldDependency get_interface_output_field_dependency(const bNode &node,
-                                                                   const bNodeSocket &socket)
-{
-  if (!is_field_socket_type(socket)) {
-    /* Non-field sockets always output data. */
-    return OutputFieldDependency::ForDataSource();
-  }
-  if (node.type == NODE_REROUTE) {
-    /* The reroute just forwards what is passed in. */
-    return OutputFieldDependency::ForDependentField();
-  }
-  if (node.type == NODE_GROUP_INPUT) {
-    /* Input nodes get special treatment in #determine_group_input_states. */
-    return OutputFieldDependency::ForDependentField();
-  }
-  if (node.typeinfo == &NodeTypeUndefined) {
-    return OutputFieldDependency::ForDataSource();
-  }
-  if (node.type == NODE_CUSTOM) {
-    return OutputFieldDependency::ForDataSource();
-  }
-
-  const NodeDeclaration *node_decl = node.declaration();
-
-  /* Node declarations should be implemented for nodes involved here. */
-  BLI_assert(node_decl != nullptr);
-
-  if (node_decl->is_function_node()) {
-    /* In a generic function node, all outputs depend on all inputs. */
-    return OutputFieldDependency::ForDependentField();
-  }
-
-  /* Use the socket declaration. */
-  const SocketDeclaration &socket_decl = *node_decl->outputs()[socket.index()];
-  return socket_decl.output_field_dependency();
-}
-
-static FieldInferencingInterface get_dummy_field_inferencing_interface(const bNode &node)
-{
-  FieldInferencingInterface inferencing_interface;
-  inferencing_interface.inputs.append_n_times(InputSocketFieldType::None,
-                                              node.input_sockets().size());
-  inferencing_interface.outputs.append_n_times(OutputFieldDependency::ForDataSource(),
-                                               node.output_sockets().size());
-  return inferencing_interface;
-}
-
-/**
- * Retrieves information about how the node interacts with fields.
- * In the future, this information can be stored in the node declaration. This would allow this
- * function to return a reference, making it more efficient.
- */
-static FieldInferencingInterface get_node_field_inferencing_interface(const bNode &node)
-{
-  /* Node groups already reference all required information, so just return that. */
-  if (node.is_group()) {
-    bNodeTree *group = (bNodeTree *)node.id;
-    if (group == nullptr) {
-      return FieldInferencingInterface();
-    }
-    if (!ntreeIsRegistered(group)) {
-      /* This can happen when there is a linked node group that was not found (see T92799). */
-      return get_dummy_field_inferencing_interface(node);
-    }
-    if (!group->runtime->field_inferencing_interface) {
-      /* This shouldn't happen because referenced node groups should always be updated first. */
-      BLI_assert_unreachable();
-    }
-    return *group->runtime->field_inferencing_interface;
-  }
-
-  FieldInferencingInterface inferencing_interface;
-  for (const bNodeSocket *input_socket : node.input_sockets()) {
-    inferencing_interface.inputs.append(get_interface_input_field_type(node, *input_socket));
-  }
-
-  for (const bNodeSocket *output_socket : node.output_sockets()) {
-    inferencing_interface.outputs.append(
-        get_interface_output_field_dependency(node, *output_socket));
-  }
-  return inferencing_interface;
-}
-
-/**
- * This struct contains information for every socket. The values are propagated through the
- * network.
- */
-struct SocketFieldState {
-  /* This socket starts a new field. */
-  bool is_field_source = false;
-  /* This socket can never become a field, because the node itself does not support it. */
-  bool is_always_single = false;
-  /* This socket is currently a single value. It could become a field though. */
-  bool is_single = true;
-  /* This socket is required to be a single value. This can be because the node itself only
-   * supports this socket to be a single value, or because a node afterwards requires this to be a
-   * single value. */
-  bool requires_single = false;
-};
-
-static Vector<const bNodeSocket *> gather_input_socket_dependencies(
-    const OutputFieldDependency &field_dependency, const bNode &node)
-{
-  const OutputSocketFieldType type = field_dependency.field_type();
-  Vector<const bNodeSocket *> input_sockets;
-  switch (type) {
-    case OutputSocketFieldType::FieldSource:
-    case OutputSocketFieldType::None: {
-      break;
-    }
-    case OutputSocketFieldType::DependentField: {
-      /* This output depends on all inputs. */
-      input_sockets.extend(node.input_sockets());
-      break;
-    }
-    case OutputSocketFieldType::PartiallyDependent: {
-      /* This output depends only on a few inputs. */
-      for (const int i : field_dependency.linked_input_indices()) {
-        input_sockets.append(&node.input_socket(i));
-      }
-      break;
-    }
-  }
-  return input_sockets;
-}
-
-/**
- * Check what the group output socket depends on. Potentially traverses the node tree
- * to figure out if it is always a field or if it depends on any group inputs.
- */
-static OutputFieldDependency find_group_output_dependencies(
-    const bNodeSocket &group_output_socket, const Span<SocketFieldState> field_state_by_socket_id)
-{
-  if (!is_field_socket_type(group_output_socket)) {
-    return OutputFieldDependency::ForDataSource();
-  }
-
-  /* Use a Set here instead of an array indexed by socket id, because we my only need to look at
-   * very few sockets. */
-  Set<const bNodeSocket *> handled_sockets;
-  Stack<const bNodeSocket *> sockets_to_check;
-
-  handled_sockets.add(&group_output_socket);
-  sockets_to_check.push(&group_output_socket);
-
-  /* Keeps track of group input indices that are (indirectly) connected to the output. */
-  Vector<int> linked_input_indices;
-
-  while (!sockets_to_check.is_empty()) {
-    const bNodeSocket *input_socket = sockets_to_check.pop();
-
-    if (!input_socket->is_directly_linked() &&
-        !field_state_by_socket_id[input_socket->index_in_tree()].is_single) {
-      /* This socket uses a field as input by default. */
-      return OutputFieldDependency::ForFieldSource();
-    }
-
-    for (const bNodeSocket *origin_socket : input_socket->directly_linked_sockets()) {
-      const bNode &origin_node = origin_socket->owner_node();
-      const SocketFieldState &origin_state =
-          field_state_by_socket_id[origin_socket->index_in_tree()];
-
-      if (origin_state.is_field_source) {
-        if (origin_node.type == NODE_GROUP_INPUT) {
-          /* Found a group input that the group output depends on. */
-          linked_input_indices.append_non_duplicates(origin_socket->index());
-        }
-        else {
-          /* Found a field source that is not the group input. So the output is always a field. */
-          return OutputFieldDependency::ForFieldSource();
-        }
-      }
-      else if (!origin_state.is_single) {
-        const FieldInferencingInterface inferencing_interface =
-            get_node_field_inferencing_interface(origin_node);
-        const OutputFieldDependency &field_dependency =
-            inferencing_interface.outputs[origin_socket->index()];
-
-        /* Propagate search further to the left. */
-        for (const bNodeSocket *origin_input_socket :
-             gather_input_socket_dependencies(field_dependency, origin_node)) {
-          if (!origin_input_socket->is_available()) {
-            continue;
-          }
-          if (!field_state_by_socket_id[origin_input_socket->index_in_tree()].is_single) {
-            if (handled_sockets.add(origin_input_socket)) {
-              sockets_to_check.push(origin_input_socket);
-            }
-          }
-        }
-      }
-    }
-  }
-  return OutputFieldDependency::ForPartiallyDependentField(std::move(linked_input_indices));
-}
-
-static void propagate_data_requirements_from_right_to_left(
-    const bNodeTree &tree, const MutableSpan<SocketFieldState> field_state_by_socket_id)
-{
-  const Span<const bNode *> toposort_result = tree.toposort_right_to_left();
-
-  for (const bNode *node : toposort_result) {
-    const FieldInferencingInterface inferencing_interface = get_node_field_inferencing_interface(
-        *node);
-
-    for (const bNodeSocket *output_socket : node->output_sockets()) {
-      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
-
-      const OutputFieldDependency &field_dependency =
-          inferencing_interface.outputs[output_socket->index()];
-
-      if (field_dependency.field_type() == OutputSocketFieldType::FieldSource) {
-        continue;
-      }
-      if (field_dependency.field_type() == OutputSocketFieldType::None) {
-        state.requires_single = true;
-        state.is_always_single = true;
-        continue;
-      }
-
-      /* The output is required to be a single value when it is connected to any input that does
-       * not support fields. */
-      for (const bNodeSocket *target_socket : output_socket->directly_linked_sockets()) {
-        if (target_socket->is_available()) {
-          state.requires_single |=
-              field_state_by_socket_id[target_socket->index_in_tree()].requires_single;
-        }
-      }
-
-      if (state.requires_single) {
-        bool any_input_is_field_implicitly = false;
-        const Vector<const bNodeSocket *> connected_inputs = gather_input_socket_dependencies(
-            field_dependency, *node);
-        for (const bNodeSocket *input_socket : connected_inputs) {
-          if (!input_socket->is_available()) {
-            continue;
-          }
-          if (inferencing_interface.inputs[input_socket->index()] ==
-              InputSocketFieldType::Implicit) {
-            if (!input_socket->is_logically_linked()) {
-              any_input_is_field_implicitly = true;
-              break;
-            }
-          }
-        }
-        if (any_input_is_field_implicitly) {
-          /* This output isn't a single value actually. */
-          state.requires_single = false;
-        }
-        else {
-          /* If the output is required to be a single value, the connected inputs in the same node
-           * must not be fields as well. */
-          for (const bNodeSocket *input_socket : connected_inputs) {
-            field_state_by_socket_id[input_socket->index_in_tree()].requires_single = true;
-          }
-        }
-      }
-    }
-
-    /* Some inputs do not require fields independent of what the outputs are connected to. */
-    for (const bNodeSocket *input_socket : node->input_sockets()) {
-      SocketFieldState &state = field_state_by_socket_id[input_socket->index_in_tree()];
-      if (inferencing_interface.inputs[input_socket->index()] == InputSocketFieldType::None) {
-        state.requires_single = true;
-        state.is_always_single = true;
-      }
-    }
-  }
-}
-
-static void determine_group_input_states(
-    const bNodeTree &tree,
-    FieldInferencingInterface &new_inferencing_interface,
-    const MutableSpan<SocketFieldState> field_state_by_socket_id)
-{
-  {
-    /* Non-field inputs never support fields. */
-    int index;
-    LISTBASE_FOREACH_INDEX (bNodeSocket *, group_input, &tree.inputs, index) {
-      if (!is_field_socket_type((eNodeSocketDatatype)group_input->type)) {
-        new_inferencing_interface.inputs[index] = InputSocketFieldType::None;
-      }
-    }
-  }
-  /* Check if group inputs are required to be single values, because they are (indirectly)
-   * connected to some socket that does not support fields. */
-  for (const bNode *node : tree.nodes_by_type("NodeGroupInput")) {
-    for (const bNodeSocket *output_socket : node->output_sockets().drop_back(1)) {
-      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
-      if (state.requires_single) {
-        new_inferencing_interface.inputs[output_socket->index()] = InputSocketFieldType::None;
-      }
-    }
-  }
-  /* If an input does not support fields, this should be reflected in all Group Input nodes. */
-  for (const bNode *node : tree.nodes_by_type("NodeGroupInput")) {
-    for (const bNodeSocket *output_socket : node->output_sockets().drop_back(1)) {
-      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
-      const bool supports_field = new_inferencing_interface.inputs[output_socket->index()] !=
-                                  InputSocketFieldType::None;
-      if (supports_field) {
-        state.is_single = false;
-        state.is_field_source = true;
-      }
-      else {
-        state.requires_single = true;
-      }
-    }
-    SocketFieldState &dummy_socket_state =
-        field_state_by_socket_id[node->output_sockets().last()->index_in_tree()];
-    dummy_socket_state.requires_single = true;
-  }
-}
-
-static void propagate_field_status_from_left_to_right(
-    const bNodeTree &tree, const MutableSpan<SocketFieldState> field_state_by_socket_id)
-{
-  const Span<const bNode *> toposort_result = tree.toposort_left_to_right();
-
-  for (const bNode *node : toposort_result) {
-    if (node->type == NODE_GROUP_INPUT) {
-      continue;
-    }
-
-    const FieldInferencingInterface inferencing_interface = get_node_field_inferencing_interface(
-        *node);
-
-    /* Update field state of input sockets, also taking into account linked origin sockets. */
-    for (const bNodeSocket *input_socket : node->input_sockets()) {
-      SocketFieldState &state = field_state_by_socket_id[input_socket->index_in_tree()];
-      if (state.is_always_single) {
-        state.is_single = true;
-        continue;
-      }
-      state.is_single = true;
-      if (!input_socket->is_directly_linked()) {
-        if (inferencing_interface.inputs[input_socket->index()] ==
-            InputSocketFieldType::Implicit) {
-          state.is_single = false;
-        }
-      }
-      else {
-        for (const bNodeSocket *origin_socket : input_socket->directly_linked_sockets()) {
-          if (!field_state_by_socket_id[origin_socket->index_in_tree()].is_single) {
-            state.is_single = false;
-            break;
-          }
-        }
-      }
-    }
-
-    /* Update field state of output sockets, also taking into account input sockets. */
-    for (const bNodeSocket *output_socket : node->output_sockets()) {
-      SocketFieldState &state = field_state_by_socket_id[output_socket->index_in_tree()];
-      const OutputFieldDependency &field_dependency =
-          inferencing_interface.outputs[output_socket->index()];
-
-      switch (field_dependency.field_type()) {
-        case OutputSocketFieldType::None: {
-          state.is_single = true;
-          break;
-        }
-        case OutputSocketFieldType::FieldSource: {
-          state.is_single = false;
-          state.is_field_source = true;
-          break;
-        }
-        case OutputSocketFieldType::PartiallyDependent:
-        case OutputSocketFieldType::DependentField: {
-          for (const bNodeSocket *input_socket :
-               gather_input_socket_dependencies(field_dependency, *node)) {
-            if (!input_socket->is_available()) {
-              continue;
-            }
-            if (!field_state_by_socket_id[input_socket->index_in_tree()].is_single) {
-              state.is_single = false;
-              break;
-            }
-          }
-          break;
-        }
-      }
-    }
-  }
-}
-
-static void determine_group_output_states(const bNodeTree &tree,
-                                          FieldInferencingInterface &new_inferencing_interface,
-                                          const Span<SocketFieldState> field_state_by_socket_id)
-{
-  for (const bNode *group_output_node : tree.nodes_by_type("NodeGroupOutput")) {
-    /* Ignore inactive group output nodes. */
-    if (!(group_output_node->flag & NODE_DO_OUTPUT)) {
-      continue;
-    }
-    /* Determine dependencies of all group outputs. */
-    for (const bNodeSocket *group_output_socket :
-         group_output_node->input_sockets().drop_back(1)) {
-      OutputFieldDependency field_dependency = find_group_output_dependencies(
-          *group_output_socket, field_state_by_socket_id);
-      new_inferencing_interface.outputs[group_output_socket->index()] = std::move(
-          field_dependency);
-    }
-    break;
-  }
-}
-
-static void update_socket_shapes(const bNodeTree &tree,
-                                 const Span<SocketFieldState> field_state_by_socket_id)
-{
-  const eNodeSocketDisplayShape requires_data_shape = SOCK_DISPLAY_SHAPE_CIRCLE;
-  const eNodeSocketDisplayShape data_but_can_be_field_shape = SOCK_DISPLAY_SHAPE_DIAMOND_DOT;
-  const eNodeSocketDisplayShape is_field_shape = SOCK_DISPLAY_SHAPE_DIAMOND;
-
-  auto get_shape_for_state = [&](const SocketFieldState &state) {
-    if (state.is_always_single) {
-      return requires_data_shape;
-    }
-    if (!state.is_single) {
-      return is_field_shape;
-    }
-    if (state.requires_single) {
-      return requires_data_shape;
-    }
-    return data_but_can_be_field_shape;
-  };
-
-  for (const bNodeSocket *socket : tree.all_input_sockets()) {
-    const SocketFieldState &state = field_state_by_socket_id[socket->index_in_tree()];
-    const_cast<bNodeSocket *>(socket)->display_shape = get_shape_for_state(state);
-  }
-  for (const bNodeSocket *socket : tree.all_sockets()) {
-    const SocketFieldState &state = field_state_by_socket_id[socket->index_in_tree()];
-    const_cast<bNodeSocket *>(socket)->display_shape = get_shape_for_state(state);
-  }
-}
-
-static bool update_field_inferencing(const bNodeTree &tree)
-{
-  tree.ensure_topology_cache();
-
-  /* Create new inferencing interface for this node group. */
-  std::unique_ptr<FieldInferencingInterface> new_inferencing_interface =
-      std::make_unique<FieldInferencingInterface>();
-  new_inferencing_interface->inputs.resize(BLI_listbase_count(&tree.inputs),
-                                           InputSocketFieldType::IsSupported);
-  new_inferencing_interface->outputs.resize(BLI_listbase_count(&tree.outputs),
-                                            OutputFieldDependency::ForDataSource());
-
-  /* Keep track of the state of all sockets. The index into this array is #SocketRef::id(). */
-  Array<SocketFieldState> field_state_by_socket_id(tree.all_sockets().size());
-
-  propagate_data_requirements_from_right_to_left(tree, field_state_by_socket_id);
-  determine_group_input_states(tree, *new_inferencing_interface, field_state_by_socket_id);
-  propagate_field_status_from_left_to_right(tree, field_state_by_socket_id);
-  determine_group_output_states(tree, *new_inferencing_interface, field_state_by_socket_id);
-  update_socket_shapes(tree, field_state_by_socket_id);
-
-  /* Update the previous group interface. */
-  const bool group_interface_changed = !tree.runtime->field_inferencing_interface ||
-                                       *tree.runtime->field_inferencing_interface !=
-                                           *new_inferencing_interface;
-  tree.runtime->field_inferencing_interface = std::move(new_inferencing_interface);
-
-  return group_interface_changed;
-}
-
-}  // namespace node_field_inferencing
-
 /**
 * Common datatype priorities, works for compositor, shader and texture nodes alike
 * defines priority of datatype connection based on output type (to):
@@ -1012,9 +492,12 @@ class NodeTreeMainUpdater {
 #ifdef DEBUG
    /* Check the uniqueness of node identifiers. */
    Set<int32_t> node_identifiers;
-    for (bNode *node : ntree.all_nodes()) {
-      BLI_assert(node->identifier > 0);
-      node_identifiers.add_new(node->identifier);
+    const Span<const bNode *> nodes = ntree.all_nodes();
+    for (const int i : nodes.index_range()) {
+      const bNode &node = *nodes[i];
+      BLI_assert(node.identifier > 0);
+      node_identifiers.add_new(node.identifier);
+      BLI_assert(node.runtime->index_in_tree == i);
    }
 #endif

@@ -1281,15 +764,14 @@ class NodeTreeMainUpdater {
    Array<int> toposort_indices(toposort.size());
    for (const int i : toposort.index_range()) {
      const bNode &node = *toposort[i];
-      toposort_indices[node.runtime->index_in_tree] = i;
+      toposort_indices[node.index()] = i;
    }

    LISTBASE_FOREACH (bNodeLink *, link, &ntree.links) {
      link->flag |= NODE_LINK_VALID;
      const bNode &from_node = *link->fromnode;
      const bNode &to_node = *link->tonode;
-      if (toposort_indices[from_node.runtime->index_in_tree] >
-          toposort_indices[to_node.runtime->index_in_tree]) {
+      if (toposort_indices[from_node.index()] > toposort_indices[to_node.index()]) {
        link->flag &= ~NODE_LINK_VALID;
        continue;
      }
--- a/source/blender/blenkernel/intern/object.cc
+++ b/source/blender/blenkernel/intern/object.cc
@@ -883,13 +883,13 @@ static void object_blend_read_lib(BlendLibReader *reader, ID *id)
      if (ob->id.lib) {
        BLO_reportf_wrap(reports,
                         RPT_INFO,
-                         TIP_("Proxy lost from  object %s lib %s\n"),
+                         TIP_("Proxy lost from object %s lib %s\n"),
                         ob->id.name + 2,
                         ob->id.lib->filepath);
      }
      else {
        BLO_reportf_wrap(
-            reports, RPT_INFO, TIP_("Proxy lost from  object %s lib <NONE>\n"), ob->id.name + 2);
+            reports, RPT_INFO, TIP_("Proxy lost from object %s lib <NONE>\n"), ob->id.name + 2);
      }
      reports->count.missing_obproxies++;
    }
--- a/source/blender/blenkernel/intern/studiolight.c
+++ b/source/blender/blenkernel/intern/studiolight.c
@@ -478,8 +478,13 @@ static void studiolight_create_equirect_radiance_gputexture(StudioLight *sl)
    BKE_studiolight_ensure_flag(sl, STUDIOLIGHT_EXTERNAL_IMAGE_LOADED);
    ImBuf *ibuf = sl->equirect_radiance_buffer;

-    sl->equirect_radiance_gputexture = GPU_texture_create_2d(
-        "studiolight_radiance", ibuf->x, ibuf->y, 1, GPU_RGBA16F, ibuf->rect_float);
+    sl->equirect_radiance_gputexture = GPU_texture_create_2d_ex("studiolight_radiance",
+                                                                ibuf->x,
+                                                                ibuf->y,
+                                                                1,
+                                                                GPU_RGBA16F,
+                                                                GPU_TEXTURE_USAGE_SHADER_READ,
+                                                                ibuf->rect_float);
    GPUTexture *tex = sl->equirect_radiance_gputexture;
    GPU_texture_filter_mode(tex, true);
    GPU_texture_wrap_mode(tex, true, true);
@@ -499,7 +504,8 @@ static void studiolight_create_matcap_gputexture(StudioLightImage *sli)
    copy_v3_v3(*offset3, *offset4);
  }

-  sli->gputexture = GPU_texture_create_2d("matcap", ibuf->x, ibuf->y, 1, GPU_R11F_G11F_B10F, NULL);
+  sli->gputexture = GPU_texture_create_2d_ex(
+      "matcap", ibuf->x, ibuf->y, 1, GPU_R11F_G11F_B10F, GPU_TEXTURE_USAGE_SHADER_READ, NULL);
  GPU_texture_update(sli->gputexture, GPU_DATA_FLOAT, gpu_matcap_3components);

  MEM_SAFE_FREE(gpu_matcap_3components);
@@ -533,8 +539,13 @@ static void studiolight_create_equirect_irradiance_gputexture(StudioLight *sl)
  if (sl->flag & STUDIOLIGHT_EXTERNAL_FILE) {
    BKE_studiolight_ensure_flag(sl, STUDIOLIGHT_EQUIRECT_IRRADIANCE_IMAGE_CALCULATED);
    ImBuf *ibuf = sl->equirect_irradiance_buffer;
-    sl->equirect_irradiance_gputexture = GPU_texture_create_2d(
-        "studiolight_irradiance", ibuf->x, ibuf->y, 1, GPU_RGBA16F, ibuf->rect_float);
+    sl->equirect_irradiance_gputexture = GPU_texture_create_2d_ex("studiolight_irradiance",
+                                                                  ibuf->x,
+                                                                  ibuf->y,
+                                                                  1,
+                                                                  GPU_RGBA16F,
+                                                                  GPU_TEXTURE_USAGE_SHADER_READ,
+                                                                  ibuf->rect_float);
    GPUTexture *tex = sl->equirect_irradiance_gputexture;
    GPU_texture_filter_mode(tex, true);
    GPU_texture_wrap_mode(tex, true, true);
--- a/source/blender/blenlib/BLI_map.hh
+++ b/source/blender/blenlib/BLI_map.hh
@@ -990,6 +990,15 @@ class Map {
    occupied_and_removed_slots_ = 0;
  }

+  /**
+   * Removes all key-value-pairs from the map and frees any allocated memory.
+   */
+  void clear_and_shrink()
+  {
+    std::destroy_at(this);
+    new (this) Map(NoExceptConstructor{});
+  }
+
  /**
   * Get the number of collisions that the probing strategy has to go through to find the key or
   * determine that it is not in the map.
--- a/source/blender/blenlib/BLI_math_rotation_legacy.hh
+++ b/source/blender/blenlib/BLI_math_rotation_legacy.hh
--- a/source/blender/blenlib/BLI_memory_utils.hh
+++ b/source/blender/blenlib/BLI_memory_utils.hh
@@ -4,11 +4,9 @@

 /** \file
 * \ingroup bli
- * Some of the functions below have very similar alternatives in the standard library. However, it
- * is rather annoying to use those when debugging. Therefore, some more specialized and easier to
- * debug functions are provided here.
 */

+#include <algorithm>
 #include <memory>
 #include <new>
 #include <type_traits>
@@ -33,280 +31,66 @@ template<typename T>
 inline constexpr bool is_trivially_move_constructible_extended_v =
    is_trivial_extended_v<T> || std::is_trivially_move_constructible_v<T>;

-/**
- * Call the destructor on n consecutive values. For trivially destructible types, this does
- * nothing.
- *
- * Exception Safety: Destructors shouldn't throw exceptions.
- *
- * Before:
- *  ptr: initialized
- * After:
- *  ptr: uninitialized
- */
 template<typename T> void destruct_n(T *ptr, int64_t n)
 {
-  BLI_assert(n >= 0);
-
-  static_assert(std::is_nothrow_destructible_v<T>,
-                "This should be true for all types. Destructors are noexcept by default.");
-
-  /* This is not strictly necessary, because the loop below will be optimized away anyway. It is
-   * nice to make behavior this explicitly, though. */
  if (is_trivially_destructible_extended_v<T>) {
    return;
  }

-  for (int64_t i = 0; i < n; i++) {
-    ptr[i].~T();
-  }
+  std::destroy_n(ptr, n);
 }

-/**
- * Call the default constructor on n consecutive elements. For trivially constructible types, this
- * does nothing.
- *
- * Exception Safety: Strong.
- *
- * Before:
- *  ptr: uninitialized
- * After:
- *  ptr: initialized
- */
 template<typename T> void default_construct_n(T *ptr, int64_t n)
 {
-  BLI_assert(n >= 0);
-
-  /* This is not strictly necessary, because the loop below will be optimized away anyway. It is
-   * nice to make behavior this explicitly, though. */
-  if (std::is_trivially_constructible_v<T>) {
-    return;
-  }
-
-  int64_t current = 0;
-  try {
-    for (; current < n; current++) {
-      new (static_cast<void *>(ptr + current)) T;
-    }
-  }
-  catch (...) {
-    destruct_n(ptr, current);
-    throw;
-  }
+  std::uninitialized_default_construct_n(ptr, n);
 }

-/**
- * Copy n values from src to dst.
- *
- * Exception Safety: Basic.
- *
- * Before:
- *  src: initialized
- *  dst: initialized
- * After:
- *  src: initialized
- *  dst: initialized
- */
 template<typename T> void initialized_copy_n(const T *src, int64_t n, T *dst)
 {
-  BLI_assert(n >= 0);
-
-  for (int64_t i = 0; i < n; i++) {
-    dst[i] = src[i];
-  }
+  std::copy_n(src, n, dst);
 }

-/**
- * Copy n values from src to dst.
- *
- * Exception Safety: Strong.
- *
- * Before:
- *  src: initialized
- *  dst: uninitialized
- * After:
- *  src: initialized
- *  dst: initialized
- */
 template<typename T> void uninitialized_copy_n(const T *src, int64_t n, T *dst)
 {
-  BLI_assert(n >= 0);
-
-  int64_t current = 0;
-  try {
-    for (; current < n; current++) {
-      new (static_cast<void *>(dst + current)) T(src[current]);
-    }
-  }
-  catch (...) {
-    destruct_n(dst, current);
-    throw;
-  }
+  std::uninitialized_copy_n(src, n, dst);
 }

-/**
- * Convert n values from type `From` to type `To`.
- *
- * Exception Safety: Strong.
- *
- * Before:
- *  src: initialized
- *  dst: uninitialized
- * After:
- *  src: initialized
- *  dst: initialized
- */
 template<typename From, typename To>
 void uninitialized_convert_n(const From *src, int64_t n, To *dst)
 {
-  BLI_assert(n >= 0);
-
-  int64_t current = 0;
-  try {
-    for (; current < n; current++) {
-      new (static_cast<void *>(dst + current)) To(static_cast<To>(src[current]));
-    }
-  }
-  catch (...) {
-    destruct_n(dst, current);
-    throw;
-  }
+  std::uninitialized_copy_n(src, n, dst);
 }

-/**
- * Move n values from src to dst.
- *
- * Exception Safety: Basic.
- *
- * Before:
- *  src: initialized
- *  dst: initialized
- * After:
- *  src: initialized, moved-from
- *  dst: initialized
- */
 template<typename T> void initialized_move_n(T *src, int64_t n, T *dst)
 {
-  BLI_assert(n >= 0);
-
-  for (int64_t i = 0; i < n; i++) {
-    dst[i] = std::move(src[i]);
-  }
+  std::copy_n(std::make_move_iterator(src), n, dst);
 }

-/**
- * Move n values from src to dst.
- *
- * Exception Safety: Basic.
- *
- * Before:
- *  src: initialized
- *  dst: uninitialized
- * After:
- *  src: initialized, moved-from
- *  dst: initialized
- */
 template<typename T> void uninitialized_move_n(T *src, int64_t n, T *dst)
 {
-  BLI_assert(n >= 0);
-
-  int64_t current = 0;
-  try {
-    for (; current < n; current++) {
-      new (static_cast<void *>(dst + current)) T(std::move(src[current]));
-    }
-  }
-  catch (...) {
-    destruct_n(dst, current);
-    throw;
-  }
+  std::uninitialized_copy_n(std::make_move_iterator(src), n, dst);
 }

-/**
- * Relocate n values from src to dst. Relocation is a move followed by destruction of the src
- * value.
- *
- * Exception Safety: Basic.
- *
- * Before:
- *  src: initialized
- *  dst: initialized
- * After:
- *  src: uninitialized
- *  dst: initialized
- */
 template<typename T> void initialized_relocate_n(T *src, int64_t n, T *dst)
 {
-  BLI_assert(n >= 0);
-
  initialized_move_n(src, n, dst);
  destruct_n(src, n);
 }

-/**
- * Relocate n values from src to dst. Relocation is a move followed by destruction of the src
- * value.
- *
- * Exception Safety: Basic.
- *
- * Before:
- *  src: initialized
- *  dst: uninitialized
- * After:
- *  src: uninitialized
- *  dst: initialized
- */
 template<typename T> void uninitialized_relocate_n(T *src, int64_t n, T *dst)
 {
-  BLI_assert(n >= 0);
-
  uninitialized_move_n(src, n, dst);
  destruct_n(src, n);
 }

-/**
- * Copy the value to n consecutive elements.
- *
- * Exception Safety: Basic.
- *
- * Before:
- *  dst: initialized
- * After:
- *  dst: initialized
- */
 template<typename T> void initialized_fill_n(T *dst, int64_t n, const T &value)
 {
-  BLI_assert(n >= 0);
-
-  for (int64_t i = 0; i < n; i++) {
-    dst[i] = value;
-  }
+  std::fill_n(dst, n, value);
 }

-/**
- * Copy the value to n consecutive elements.
- *
- *  Exception Safety: Strong.
- *
- * Before:
- *  dst: uninitialized
- * After:
- *  dst: initialized
- */
 template<typename T> void uninitialized_fill_n(T *dst, int64_t n, const T &value)
 {
-  BLI_assert(n >= 0);
-
-  int64_t current = 0;
-  try {
-    for (; current < n; current++) {
-      new (static_cast<void *>(dst + current)) T(value);
-    }
-  }
-  catch (...) {
-    destruct_n(dst, current);
-    throw;
-  }
+  std::uninitialized_fill_n(dst, n, value);
 }

 template<typename T> struct DestructValueAtAddress {
--- a/source/blender/blenlib/BLI_multi_value_map.hh
+++ b/source/blender/blenlib/BLI_multi_value_map.hh
@@ -150,6 +150,11 @@ template<typename Key, typename Value> class MultiValueMap {
  {
    map_.clear();
  }
+
+  void clear_and_shrink()
+  {
+    map_.clear_and_shrink();
+  }
 };

 }  // namespace blender
--- a/source/blender/blenlib/BLI_set.hh
+++ b/source/blender/blenlib/BLI_set.hh
@@ -542,6 +542,15 @@ class Set {
    occupied_and_removed_slots_ = 0;
  }

+  /**
+   * Removes all keys from the set and frees any allocated memory.
+   */
+  void clear_and_shrink()
+  {
+    std::destroy_at(this);
+    new (this) Set(NoExceptConstructor{});
+  }
+
  /**
   * Creates a new slot array and reinserts all keys inside of that. This method can be used to get
   * rid of removed slots. Also this is useful for benchmarking the grow function.
--- a/source/blender/blenlib/BLI_stack.hh
+++ b/source/blender/blenlib/BLI_stack.hh
@@ -329,6 +329,15 @@ class Stack {
    top_ = top_chunk_->begin;
  }

+  /**
+   * Removes all elements from the stack and frees any allocated memory.
+   */
+  void clear_and_shrink()
+  {
+    std::destroy_at(this);
+    new (this) Stack(NoExceptConstructor{});
+  }
+
  /* This should only be called by unit tests. */
  bool is_invariant_maintained() const
  {
--- a/source/blender/blenlib/BLI_vector.hh
+++ b/source/blender/blenlib/BLI_vector.hh
@@ -410,7 +410,7 @@ class Vector {
   * Afterwards the vector has 0 elements and any allocated memory
   * will be freed.
   */
-  void clear_and_make_inline()
+  void clear_and_shrink()
  {
    destruct_n(begin_, this->size());
    if (!this->is_inline()) {
--- a/source/blender/blenlib/BLI_vector_set.hh
+++ b/source/blender/blenlib/BLI_vector_set.hh
@@ -560,6 +560,15 @@ class VectorSet {
    occupied_and_removed_slots_ = 0;
  }

+  /**
+   * Removes all keys from the set and frees any allocated memory.
+   */
+  void clear_and_shrink()
+  {
+    std::destroy_at(this);
+    new (this) VectorSet(NoExceptConstructor{});
+  }
+
  /**
   * Get the number of collisions that the probing strategy has to go through to find the key or
   * determine that it is not in the set.
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -272,7 +272,7 @@ set(SRC
  BLI_math_matrix.h
  BLI_math_mpq.hh
  BLI_math_rotation.h
-  BLI_math_rotation.hh
+  BLI_math_rotation_legacy.hh
  BLI_math_solvers.h
  BLI_math_statistics.h
  BLI_math_time.h
--- a/source/blender/blenlib/intern/math_rotation.cc
+++ b/source/blender/blenlib/intern/math_rotation.cc
@@ -5,7 +5,7 @@
 */

 #include "BLI_math_base.h"
-#include "BLI_math_rotation.hh"
+#include "BLI_math_rotation_legacy.hh"
 #include "BLI_math_vector.h"
 #include "BLI_math_vector.hh"

--- a/source/blender/blenlib/tests/BLI_math_rotation_test.cc
+++ b/source/blender/blenlib/tests/BLI_math_rotation_test.cc
@@ -5,7 +5,7 @@
 #include "BLI_math_base.h"
 #include "BLI_math_matrix.h"
 #include "BLI_math_rotation.h"
-#include "BLI_math_rotation.hh"
+#include "BLI_math_rotation_legacy.hh"
 #include "BLI_math_vector.hh"

 #include "BLI_vector.hh"
--- a/source/blender/blenlib/tests/BLI_memory_utils_test.cc
+++ b/source/blender/blenlib/tests/BLI_memory_utils_test.cc
@@ -7,121 +7,6 @@

 namespace blender::tests {

-namespace {
-struct MyValue {
-  static inline int alive = 0;
-
-  MyValue()
-  {
-    if (alive == 15) {
-      throw std::exception();
-    }
-
-    alive++;
-  }
-
-  MyValue(const MyValue & /*other*/)
-  {
-    if (alive == 15) {
-      throw std::exception();
-    }
-
-    alive++;
-  }
-
-  ~MyValue()
-  {
-    alive--;
-  }
-};
-}  // namespace
-
-TEST(memory_utils, DefaultConstructN_ActuallyCallsConstructor)
-{
-  constexpr int amount = 10;
-  TypedBuffer<MyValue, amount> buffer;
-
-  EXPECT_EQ(MyValue::alive, 0);
-  default_construct_n(buffer.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, amount);
-  destruct_n(buffer.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, 0);
-}
-
-TEST(memory_utils, DefaultConstructN_StrongExceptionSafety)
-{
-  constexpr int amount = 20;
-  TypedBuffer<MyValue, amount> buffer;
-
-  EXPECT_EQ(MyValue::alive, 0);
-  EXPECT_THROW(default_construct_n(buffer.ptr(), amount), std::exception);
-  EXPECT_EQ(MyValue::alive, 0);
-}
-
-TEST(memory_utils, UninitializedCopyN_ActuallyCopies)
-{
-  constexpr int amount = 5;
-  TypedBuffer<MyValue, amount> buffer1;
-  TypedBuffer<MyValue, amount> buffer2;
-
-  EXPECT_EQ(MyValue::alive, 0);
-  default_construct_n(buffer1.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, amount);
-  uninitialized_copy_n(buffer1.ptr(), amount, buffer2.ptr());
-  EXPECT_EQ(MyValue::alive, 2 * amount);
-  destruct_n(buffer1.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, amount);
-  destruct_n(buffer2.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, 0);
-}
-
-TEST(memory_utils, UninitializedCopyN_StrongExceptionSafety)
-{
-  constexpr int amount = 10;
-  TypedBuffer<MyValue, amount> buffer1;
-  TypedBuffer<MyValue, amount> buffer2;
-
-  EXPECT_EQ(MyValue::alive, 0);
-  default_construct_n(buffer1.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, amount);
-  EXPECT_THROW(uninitialized_copy_n(buffer1.ptr(), amount, buffer2.ptr()), std::exception);
-  EXPECT_EQ(MyValue::alive, amount);
-  destruct_n(buffer1.ptr(), amount);
-  EXPECT_EQ(MyValue::alive, 0);
-}
-
-TEST(memory_utils, UninitializedFillN_ActuallyCopies)
-{
-  constexpr int amount = 10;
-  TypedBuffer<MyValue, amount> buffer;
-
-  EXPECT_EQ(MyValue::alive, 0);
-  {
-    MyValue value;
-    EXPECT_EQ(MyValue::alive, 1);
-    uninitialized_fill_n(buffer.ptr(), amount, value);
-    EXPECT_EQ(MyValue::alive, 1 + amount);
-    destruct_n(buffer.ptr(), amount);
-    EXPECT_EQ(MyValue::alive, 1);
-  }
-  EXPECT_EQ(MyValue::alive, 0);
-}
-
-TEST(memory_utils, UninitializedFillN_StrongExceptionSafety)
-{
-  constexpr int amount = 20;
-  TypedBuffer<MyValue, amount> buffer;
-
-  EXPECT_EQ(MyValue::alive, 0);
-  {
-    MyValue value;
-    EXPECT_EQ(MyValue::alive, 1);
-    EXPECT_THROW(uninitialized_fill_n(buffer.ptr(), amount, value), std::exception);
-    EXPECT_EQ(MyValue::alive, 1);
-  }
-  EXPECT_EQ(MyValue::alive, 0);
-}
-
 class TestBaseClass {
  virtual void mymethod(){};
 };
--- a/source/blender/blenloader/intern/readfile.cc
+++ b/source/blender/blenloader/intern/readfile.cc
@@ -301,7 +301,7 @@ static void oldnewmap_clear(OldNewMap *onm)
      MEM_freeN(new_addr.newp);
    }
  }
-  onm->map.clear();
+  onm->map.clear_and_shrink();
 }

 static void oldnewmap_free(OldNewMap *onm)
--- a/source/blender/blenloader/intern/versioning_270.c
+++ b/source/blender/blenloader/intern/versioning_270.c
@@ -276,13 +276,22 @@ static void do_version_hue_sat_node(bNodeTree *ntree, bNode *node)
    return;
  }

-  /* Make sure new sockets are properly created. */
-  node_verify_sockets(ntree, node, false);
  /* Convert value from old storage to new sockets. */
  NodeHueSat *nhs = node->storage;
-  bNodeSocket *hue = nodeFindSocket(node, SOCK_IN, "Hue"),
-              *saturation = nodeFindSocket(node, SOCK_IN, "Saturation"),
-              *value = nodeFindSocket(node, SOCK_IN, "Value");
+  bNodeSocket *hue = nodeFindSocket(node, SOCK_IN, "Hue");
+  bNodeSocket *saturation = nodeFindSocket(node, SOCK_IN, "Saturation");
+  bNodeSocket *value = nodeFindSocket(node, SOCK_IN, "Value");
+  if (hue == NULL) {
+    hue = nodeAddStaticSocket(ntree, node, SOCK_IN, SOCK_FLOAT, PROP_FACTOR, "Hue", "Hue");
+  }
+  if (saturation == NULL) {
+    saturation = nodeAddStaticSocket(
+        ntree, node, SOCK_IN, SOCK_FLOAT, PROP_FACTOR, "Saturation", "Saturation");
+  }
+  if (value == NULL) {
+    value = nodeAddStaticSocket(ntree, node, SOCK_IN, SOCK_FLOAT, PROP_FACTOR, "Value", "Value");
+  }
+
  ((bNodeSocketValueFloat *)hue->default_value)->value = nhs->hue;
  ((bNodeSocketValueFloat *)saturation->default_value)->value = nhs->sat;
  ((bNodeSocketValueFloat *)value->default_value)->value = nhs->val;
--- a/source/blender/compositor/intern/COM_WorkScheduler.cc
+++ b/source/blender/compositor/intern/COM_WorkScheduler.cc
@@ -266,7 +266,7 @@ static void opencl_initialize(const bool use_opencl)

 static void opencl_deinitialize()
 {
-  g_work_scheduler.opencl.devices.clear_and_make_inline();
+  g_work_scheduler.opencl.devices.clear_and_shrink();

  if (g_work_scheduler.opencl.program) {
    clReleaseProgram(g_work_scheduler.opencl.program);
@@ -364,7 +364,7 @@ static void threading_model_queue_deinitialize()
 {
  /* deinitialize CPU threads */
  if (g_work_scheduler.queue.initialized) {
-    g_work_scheduler.queue.devices.clear_and_make_inline();
+    g_work_scheduler.queue.devices.clear_and_shrink();

    BLI_thread_local_delete(g_thread_device);
    g_work_scheduler.queue.initialized = false;
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@@ -60,8 +60,10 @@ set(SRC
  COM_utilities.hh

  algorithms/intern/algorithm_parallel_reduction.cc
+  algorithms/intern/symmetric_separable_blur.cc

  algorithms/COM_algorithm_parallel_reduction.hh
+  algorithms/COM_algorithm_symmetric_separable_blur.hh

  cached_resources/intern/morphological_distance_feather_weights.cc
  cached_resources/intern/symmetric_blur_weights.cc
@@ -96,6 +98,14 @@ set(GLSL_SRC
  shaders/compositor_ellipse_mask.glsl
  shaders/compositor_filter.glsl
  shaders/compositor_flip.glsl
+  shaders/compositor_glare_ghost_accumulate.glsl
+  shaders/compositor_glare_ghost_base.glsl
+  shaders/compositor_glare_highlights.glsl
+  shaders/compositor_glare_mix.glsl
+  shaders/compositor_glare_simple_star_anti_diagonal_pass.glsl
+  shaders/compositor_glare_simple_star_diagonal_pass.glsl
+  shaders/compositor_glare_simple_star_horizontal_pass.glsl
+  shaders/compositor_glare_simple_star_vertical_pass.glsl
  shaders/compositor_image_crop.glsl
  shaders/compositor_morphological_distance.glsl
  shaders/compositor_morphological_distance_feather.glsl
@@ -129,6 +139,7 @@ set(GLSL_SRC
  shaders/library/gpu_shader_compositor_gamma.glsl
  shaders/library/gpu_shader_compositor_hue_correct.glsl
  shaders/library/gpu_shader_compositor_hue_saturation_value.glsl
+  shaders/library/gpu_shader_compositor_image_diagonals.glsl
  shaders/library/gpu_shader_compositor_invert.glsl
  shaders/library/gpu_shader_compositor_luminance_matte.glsl
  shaders/library/gpu_shader_compositor_main.glsl
@@ -181,6 +192,7 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_ellipse_mask_info.hh
  shaders/infos/compositor_filter_info.hh
  shaders/infos/compositor_flip_info.hh
+  shaders/infos/compositor_glare_info.hh
  shaders/infos/compositor_image_crop_info.hh
  shaders/infos/compositor_morphological_distance_feather_info.hh
  shaders/infos/compositor_morphological_distance_info.hh
--- a/source/blender/compositor/realtime_compositor/COM_result.hh
+++ b/source/blender/compositor/realtime_compositor/COM_result.hh
@@ -105,6 +105,11 @@ class Result {
   * and release the result's texture. */
  Result(ResultType type, TexturePool &texture_pool);

+  /* Identical to the standard constructor but initializes the reference count to 1. This is useful
+   * to construct temporary results that are created and released by the developer manually, which
+   * are typically used in operations that need temporary intermediate results. */
+  static Result Temporary(ResultType type, TexturePool &texture_pool);
+
  /* Declare the result to be a texture result, allocate a texture of an appropriate type with
   * the size of the given domain from the result's texture pool, and set the domain of the result
   * to the given domain. */
@@ -125,8 +130,9 @@ class Result {
  void bind_as_texture(GPUShader *shader, const char *texture_name) const;

  /* Bind the texture of the result to the image unit with the given name in the currently bound
-   * given shader. */
-  void bind_as_image(GPUShader *shader, const char *image_name) const;
+   * given shader. If read is true, a memory barrier will be inserted for image reads to ensure any
+   * prior writes to the images are reflected before reading from it. */
+  void bind_as_image(GPUShader *shader, const char *image_name, bool read = false) const;

  /* Unbind the texture which was previously bound using bind_as_texture. */
  void unbind_as_texture() const;
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_symmetric_separable_blur.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_symmetric_separable_blur.hh
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "BLI_math_vec_types.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::realtime_compositor {
+
+/* Blur the input using a horizontal and a vertical separable blur passes given a certain radius
+ * and filter type using SymmetricSeparableBlurWeights. The output is written to the given output
+ * result, which will be allocated internally and is thus expected not to be previously allocated.
+ * If extend_bounds is true, the output will have an extra radius amount of pixels on the boundary
+ * of the image, where blurring can take place assuming a fully transparent out of bound values. If
+ * gamma_correct is true, the input will be gamma corrected before blurring and then uncorrected
+ * after blurring, using a gamma coefficient of 2. */
+void symmetric_separable_blur(Context &context,
+                              Result &input,
+                              Result &output,
+                              float2 radius,
+                              int filter_type,
+                              bool extend_bounds,
+                              bool gamma_correct);
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/symmetric_separable_blur.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/symmetric_separable_blur.cc
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_math_base.hh"
+#include "BLI_math_vec_types.hh"
+#include "BLI_math_vector.hh"
+
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+
+#include "COM_context.hh"
+#include "COM_utilities.hh"
+
+#include "COM_algorithm_symmetric_separable_blur.hh"
+
+#include "COM_symmetric_separable_blur_weights.hh"
+
+namespace blender::realtime_compositor {
+
+static Result horizontal_pass(Context &context,
+                              Result &input,
+                              float radius,
+                              int filter_type,
+                              bool extend_bounds,
+                              bool gamma_correct)
+{
+  GPUShader *shader = context.shader_manager().get("compositor_symmetric_separable_blur");
+  GPU_shader_bind(shader);
+
+  GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
+  GPU_shader_uniform_1b(shader, "gamma_correct_input", gamma_correct);
+  GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", false);
+
+  input.bind_as_texture(shader, "input_tx");
+
+  const SymmetricSeparableBlurWeights &weights =
+      context.cache_manager().get_symmetric_separable_blur_weights(filter_type, radius);
+  weights.bind_as_texture(shader, "weights_tx");
+
+  Domain domain = input.domain();
+  if (extend_bounds) {
+    domain.size.x += int(math::ceil(radius)) * 2;
+  }
+
+  /* We allocate an output image of a transposed size, that is, with a height equivalent to the
+   * width of the input and vice versa. This is done as a performance optimization. The shader
+   * will blur the image horizontally and write it to the intermediate output transposed. Then
+   * the vertical pass will execute the same horizontal blur shader, but since its input is
+   * transposed, it will effectively do a vertical blur and write to the output transposed,
+   * effectively undoing the transposition in the horizontal pass. This is done to improve
+   * spatial cache locality in the shader and to avoid having two separate shaders for each blur
+   * pass. */
+  const int2 transposed_domain = int2(domain.size.y, domain.size.x);
+
+  Result output = Result::Temporary(ResultType::Color, context.texture_pool());
+  output.allocate_texture(transposed_domain);
+  output.bind_as_image(shader, "output_img");
+
+  compute_dispatch_threads_at_least(shader, domain.size);
+
+  GPU_shader_unbind();
+  input.unbind_as_texture();
+  weights.unbind_as_texture();
+  output.unbind_as_image();
+
+  return output;
+}
+
+static void vertical_pass(Context &context,
+                          Result &original_input,
+                          Result &horizontal_pass_result,
+                          Result &output,
+                          float2 radius,
+                          int filter_type,
+                          bool extend_bounds,
+                          bool gamma_correct)
+{
+  GPUShader *shader = context.shader_manager().get("compositor_symmetric_separable_blur");
+  GPU_shader_bind(shader);
+
+  GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
+  GPU_shader_uniform_1b(shader, "gamma_correct_input", false);
+  GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", gamma_correct);
+
+  horizontal_pass_result.bind_as_texture(shader, "input_tx");
+
+  const SymmetricSeparableBlurWeights &weights =
+      context.cache_manager().get_symmetric_separable_blur_weights(filter_type, radius.y);
+  weights.bind_as_texture(shader, "weights_tx");
+
+  Domain domain = original_input.domain();
+  if (extend_bounds) {
+    /* Add a radius amount of pixels in both sides of the image, hence the multiply by 2. */
+    domain.size += int2(math::ceil(radius)) * 2;
+  }
+
+  output.allocate_texture(domain);
+  output.bind_as_image(shader, "output_img");
+
+  /* Notice that the domain is transposed, see the note on the horizontal pass method for more
+   * information on the reasoning behind this. */
+  compute_dispatch_threads_at_least(shader, int2(domain.size.y, domain.size.x));
+
+  GPU_shader_unbind();
+  horizontal_pass_result.unbind_as_texture();
+  output.unbind_as_image();
+  weights.unbind_as_texture();
+}
+
+void symmetric_separable_blur(Context &context,
+                              Result &input,
+                              Result &output,
+                              float2 radius,
+                              int filter_type,
+                              bool extend_bounds,
+                              bool gamma_correct)
+{
+  Result horizontal_pass_result = horizontal_pass(
+      context, input, radius.x, filter_type, extend_bounds, gamma_correct);
+
+  vertical_pass(context,
+                input,
+                horizontal_pass_result,
+                output,
+                radius,
+                filter_type,
+                extend_bounds,
+                gamma_correct);
+
+  horizontal_pass_result.release();
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/intern/result.cc
+++ b/source/blender/compositor/realtime_compositor/intern/result.cc
@@ -18,6 +18,13 @@ Result::Result(ResultType type, TexturePool &texture_pool)
 {
 }

+Result Result::Temporary(ResultType type, TexturePool &texture_pool)
+{
+  Result result = Result(type, texture_pool);
+  result.increment_reference_count();
+  return result;
+}
+
 void Result::allocate_texture(Domain domain)
 {
  is_single_value_ = false;
@@ -79,8 +86,13 @@ void Result::bind_as_texture(GPUShader *shader, const char *texture_name) const
  GPU_texture_bind(texture_, texture_image_unit);
 }

-void Result::bind_as_image(GPUShader *shader, const char *image_name) const
+void Result::bind_as_image(GPUShader *shader, const char *image_name, bool read) const
 {
+  /* Make sure any prior writes to the texture are reflected before reading from it. */
+  if (read) {
+    GPU_memory_barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
+  }
+
  const int image_unit = GPU_shader_get_texture_binding(shader, image_name);
  GPU_texture_image_bind(texture_, image_unit);
 }
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_ghost_accumulate.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_ghost_accumulate.glsl
@@ -0,0 +1,37 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  ivec2 input_size = texture_size(input_ghost_tx);
+
+  /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image size
+   * to get the coordinates into the sampler's expected [0, 1] range*/
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / input_size;
+
+  /* We accumulate four variants of the input ghost texture, each is scaled by some amount and
+   * possibly multiplied by some color as a form of color modulation. */
+  vec4 accumulated_ghost = vec4(0.0);
+  for (int i = 0; i < 4; i++) {
+    float scale = scales[i];
+    vec4 color_modulator = color_modulators[i];
+
+    /* Scale the coordinates for the ghost, pre subtract 0.5 and post add 0.5 to use 0.5 as the
+     * origin of the scaling. */
+    vec2 scaled_coordinates = (coordinates - 0.5) * scale + 0.5;
+
+    /* The value of the ghost is attenuated by a scalar multiple of the inverse distance to the
+     * center, such that it is maximum at the center and become zero further from the center,
+     * making sure to take the scale into account. The scaler multiple of 1 / 4 is chosen using
+     * visual judgement. */
+    float distance_to_center = distance(coordinates, vec2(0.5)) * 2.0;
+    float attenuator = max(0.0, 1.0 - distance_to_center * abs(scale)) / 4.0;
+
+    /* Accumulate the scaled ghost after attenuating and color modulating its value. */
+    vec4 multiplier = attenuator * color_modulator;
+    accumulated_ghost += texture(input_ghost_tx, scaled_coordinates) * multiplier;
+  }
+
+  vec4 current_accumulated_ghost = imageLoad(accumulated_ghost_img, texel);
+  imageStore(accumulated_ghost_img, texel, current_accumulated_ghost + accumulated_ghost);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_ghost_base.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_ghost_base.glsl
@@ -0,0 +1,37 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  ivec2 input_size = texture_size(small_ghost_tx);
+
+  /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the image size
+   * to get the coordinates into the sampler's expected [0, 1] range*/
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / input_size;
+
+  /* The small ghost is scaled down with the origin as the center of the image by a factor of 2.13,
+   * while the big ghost is flipped and scaled up with the origin as the center of the image by a
+   * factor of 0.97. Note that 1) The negative scale implements the flipping. 2) Factors larger
+   * than 1 actually scales down the image since the factor multiplies the coordinates and not the
+   * images itself. 3) The values are arbitrarily chosen using visual judgement. */
+  float small_ghost_scale = 2.13;
+  float big_ghost_scale = -0.97;
+
+  /* Scale the coordinates for the small and big ghosts, pre subtract 0.5 and post add 0.5 to use
+   * 0.5 as the origin of the scaling. Notice that the big ghost is flipped due to the negative
+   * scale. */
+  vec2 small_ghost_coordinates = (coordinates - 0.5) * small_ghost_scale + 0.5;
+  vec2 big_ghost_coordinates = (coordinates - 0.5) * big_ghost_scale + 0.5;
+
+  /* The values of the ghosts are attenuated by the inverse distance to the center, such that they
+   * are maximum at the center and become zero further from the center, making sure to take the
+   * aforementioned scale into account. */
+  float distance_to_center = distance(coordinates, vec2(0.5)) * 2.0;
+  float small_ghost_attenuator = max(0.0, 1.0 - distance_to_center * small_ghost_scale);
+  float big_ghost_attenuator = max(0.0, 1.0 - distance_to_center * abs(big_ghost_scale));
+
+  vec4 small_ghost = texture(small_ghost_tx, small_ghost_coordinates) * small_ghost_attenuator;
+  vec4 big_ghost = texture(big_ghost_tx, big_ghost_coordinates) * big_ghost_attenuator;
+
+  imageStore(combined_ghost_img, texel, small_ghost + big_ghost);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_highlights.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_highlights.glsl
@@ -0,0 +1,31 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  /* The dispatch domain covers the output image size, which might be a fraction of the input image
+   * size, so you will notice the output image size used throughout the shader instead of the input
+   * one. */
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Since the output image might be a fraction of the input image size, and since we want to
+   * evaluate the input sampler at the center of the output pixel, we add an offset equal to half
+   * the number of input pixels that covers a single output pixel. In case the input and output
+   * have the same size, this will be 0.5, which is the offset required to evaluate the sampler at
+   * the center of the pixel. */
+  vec2 offset = (texture_size(input_tx) / imageSize(output_img)) / 2.0;
+
+  /* Add the aforementioned offset and divide by the output image size to get the coordinates into
+   * the sampler's expected [0, 1] range. */
+  vec2 normalized_coordinates = (vec2(texel) + offset) / imageSize(output_img);
+
+  vec4 input_color = texture(input_tx, normalized_coordinates);
+  float luminance = dot(input_color.rgb, luminance_coefficients);
+
+  /* The pixel whose luminance is less than the threshold luminance is not considered part of the
+   * highlights and is given a value of zero. Otherwise, the pixel is considered part of the
+   * highlights, whose value is the difference to the threshold value clamped to zero. */
+  bool is_highlights = luminance >= threshold;
+  vec3 highlights = is_highlights ? max(vec3(0.0), input_color.rgb - threshold) : vec3(0.0);
+
+  imageStore(output_img, texel, vec4(highlights, 1.0));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_mix.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_mix.glsl
@@ -0,0 +1,28 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+
+  /* Add 0.5 to evaluate the input sampler at the center of the pixel and divide by the input image
+   * size to get the relevant coordinates into the sampler's expected [0, 1] range. Make sure the
+   * input color is not negative to avoid a subtractive effect when mixing the glare. */
+  vec2 normalized_coordinates = (vec2(texel) + vec2(0.5)) / texture_size(input_tx);
+  vec4 glare_color = texture(glare_tx, normalized_coordinates);
+  vec4 input_color = max(vec4(0.0), texture_load(input_tx, texel));
+
+  /* The mix factor is in the range [-1, 1] and linearly interpolate between the three values such
+   * that:
+   *   1 => Glare only.
+   *   0 => Input + Glare.
+   *  -1 => Input only.
+   * We implement that as a weighted sum as follows. When the mix factor is 1, the glare weight
+   * should be 1 and the input weight should be 0. When the mix factor is -1, the glare weight
+   * should be 0 and the input weight should be 1. When the mix factor is 0, both weights should
+   * be 1. This can be expressed using the following compact min max expressions. */
+  float input_weight = 1.0 - max(0.0, mix_factor);
+  float glare_weight = 1.0 + min(0.0, mix_factor);
+  vec3 highlights = input_weight * input_color.rgb + glare_weight * glare_color.rgb;
+
+  imageStore(output_img, texel, vec4(highlights, input_color.a));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_anti_diagonal_pass.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_anti_diagonal_pass.glsl
@@ -0,0 +1,55 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_image_diagonals.glsl)
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 size = imageSize(anti_diagonal_img);
+  int index = int(gl_GlobalInvocationID.x);
+  int anti_diagonal_length = compute_anti_diagonal_length(size, index);
+  ivec2 start = compute_anti_diagonal_start(size, index);
+  ivec2 direction = get_anti_diagonal_direction();
+  ivec2 end = start + (anti_diagonal_length - 1) * direction;
+
+  /* For each iteration, apply a causal filter followed by a non causal filters along the anti
+   * diagonal mapped to the current thread invocation. */
+  for (int i = 0; i < iterations; i++) {
+    /* Causal Pass:
+     * Sequentially apply a causal filter running from the start of the anti diagonal to its end by
+     * mixing the value of the pixel in the anti diagonal with the average value of the previous
+     * output and next input in the same anti diagonal. */
+    for (int j = 0; j < anti_diagonal_length; j++) {
+      ivec2 texel = start + j * direction;
+      vec4 previous_output = imageLoad(anti_diagonal_img, texel - i * direction);
+      vec4 current_input = imageLoad(anti_diagonal_img, texel);
+      vec4 next_input = imageLoad(anti_diagonal_img, texel + i * direction);
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(anti_diagonal_img, texel, causal_output);
+    }
+
+    /* Non Causal Pass:
+     * Sequentially apply a non causal filter running from the end of the diagonal to its start by
+     * mixing the value of the pixel in the diagonal with the average value of the previous output
+     * and next input in the same diagonal. */
+    for (int j = 0; j < anti_diagonal_length; j++) {
+      ivec2 texel = end - j * direction;
+      vec4 previous_output = imageLoad(anti_diagonal_img, texel + i * direction);
+      vec4 current_input = imageLoad(anti_diagonal_img, texel);
+      vec4 next_input = imageLoad(anti_diagonal_img, texel - i * direction);
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 non_causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(anti_diagonal_img, texel, non_causal_output);
+    }
+  }
+
+  /* For each pixel in the anti diagonal mapped to the current invocation thread, add the result of
+   * the diagonal pass to the vertical pass. */
+  for (int j = 0; j < anti_diagonal_length; j++) {
+    ivec2 texel = start + j * direction;
+    vec4 horizontal = texture_load(diagonal_tx, texel);
+    vec4 vertical = imageLoad(anti_diagonal_img, texel);
+    imageStore(anti_diagonal_img, texel, horizontal + vertical);
+  }
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_diagonal_pass.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_diagonal_pass.glsl
@@ -0,0 +1,45 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_image_diagonals.glsl)
+
+void main()
+{
+  ivec2 size = imageSize(diagonal_img);
+  int index = int(gl_GlobalInvocationID.x);
+  int diagonal_length = compute_diagonal_length(size, index);
+  ivec2 start = compute_diagonal_start(size, index);
+  ivec2 direction = get_diagonal_direction();
+  ivec2 end = start + (diagonal_length - 1) * direction;
+
+  /* For each iteration, apply a causal filter followed by a non causal filters along the diagonal
+   * mapped to the current thread invocation. */
+  for (int i = 0; i < iterations; i++) {
+    /* Causal Pass:
+     * Sequentially apply a causal filter running from the start of the diagonal to its end by
+     * mixing the value of the pixel in the diagonal with the average value of the previous output
+     * and next input in the same diagonal. */
+    for (int j = 0; j < diagonal_length; j++) {
+      ivec2 texel = start + j * direction;
+      vec4 previous_output = imageLoad(diagonal_img, texel - i * direction);
+      vec4 current_input = imageLoad(diagonal_img, texel);
+      vec4 next_input = imageLoad(diagonal_img, texel + i * direction);
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(diagonal_img, texel, causal_output);
+    }
+
+    /* Non Causal Pass:
+     * Sequentially apply a non causal filter running from the end of the diagonal to its start by
+     * mixing the value of the pixel in the diagonal with the average value of the previous output
+     * and next input in the same diagonal. */
+    for (int j = 0; j < diagonal_length; j++) {
+      ivec2 texel = end - j * direction;
+      vec4 previous_output = imageLoad(diagonal_img, texel + i * direction);
+      vec4 current_input = imageLoad(diagonal_img, texel);
+      vec4 next_input = imageLoad(diagonal_img, texel - i * direction);
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 non_causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(diagonal_img, texel, non_causal_output);
+    }
+  }
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_horizontal_pass.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_horizontal_pass.glsl
@@ -0,0 +1,38 @@
+void main()
+{
+  int width = imageSize(horizontal_img).x;
+
+  /* For each iteration, apply a causal filter followed by a non causal filters along the row
+   * mapped to the current thread invocation. */
+  for (int i = 0; i < iterations; i++) {
+    /* Causal Pass:
+     * Sequentially apply a causal filter running from left to right by mixing the value of the
+     * pixel in the row with the average value of the previous output and next input in the same
+     * row. */
+    for (int x = 0; x < width; x++) {
+      ivec2 texel = ivec2(x, gl_GlobalInvocationID.x);
+      vec4 previous_output = imageLoad(horizontal_img, texel - ivec2(i, 0));
+      vec4 current_input = imageLoad(horizontal_img, texel);
+      vec4 next_input = imageLoad(horizontal_img, texel + ivec2(i, 0));
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(horizontal_img, texel, causal_output);
+    }
+
+    /* Non Causal Pass:
+     * Sequentially apply a non causal filter running from right to left by mixing the value of the
+     * pixel in the row with the average value of the previous output and next input in the same
+     * row. */
+    for (int x = width - 1; x >= 0; x--) {
+      ivec2 texel = ivec2(x, gl_GlobalInvocationID.x);
+      vec4 previous_output = imageLoad(horizontal_img, texel + ivec2(i, 0));
+      vec4 current_input = imageLoad(horizontal_img, texel);
+      vec4 next_input = imageLoad(horizontal_img, texel - ivec2(i, 0));
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 non_causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(horizontal_img, texel, non_causal_output);
+    }
+  }
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_vertical_pass.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_glare_simple_star_vertical_pass.glsl
@@ -0,0 +1,49 @@
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  int height = imageSize(vertical_img).y;
+
+  /* For each iteration, apply a causal filter followed by a non causal filters along the column
+   * mapped to the current thread invocation. */
+  for (int i = 0; i < iterations; i++) {
+    /* Causal Pass:
+     * Sequentially apply a causal filter running from bottom to top by mixing the value of the
+     * pixel in the column with the average value of the previous output and next input in the same
+     * column. */
+    for (int y = 0; y < height; y++) {
+      ivec2 texel = ivec2(gl_GlobalInvocationID.x, y);
+      vec4 previous_output = imageLoad(vertical_img, texel - ivec2(0, i));
+      vec4 current_input = imageLoad(vertical_img, texel);
+      vec4 next_input = imageLoad(vertical_img, texel + ivec2(0, i));
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(vertical_img, texel, causal_output);
+    }
+
+    /* Non Causal Pass:
+     * Sequentially apply a non causal filter running from top to bottom by mixing the value of the
+     * pixel in the column with the average value of the previous output and next input in the same
+     * column. */
+    for (int y = height - 1; y >= 0; y--) {
+      ivec2 texel = ivec2(gl_GlobalInvocationID.x, y);
+      vec4 previous_output = imageLoad(vertical_img, texel + ivec2(0, i));
+      vec4 current_input = imageLoad(vertical_img, texel);
+      vec4 next_input = imageLoad(vertical_img, texel - ivec2(0, i));
+
+      vec4 neighbour_average = (previous_output + next_input) / 2.0;
+      vec4 non_causal_output = mix(current_input, neighbour_average, fade_factor);
+      imageStore(vertical_img, texel, non_causal_output);
+    }
+  }
+
+  /* For each pixel in the column mapped to the current invocation thread, add the result of the
+   * horizontal pass to the vertical pass. */
+  for (int y = 0; y < height; y++) {
+    ivec2 texel = ivec2(gl_GlobalInvocationID.x, y);
+    vec4 horizontal = texture_load(horizontal_tx, texel);
+    vec4 vertical = imageLoad(vertical_img, texel);
+    imageStore(vertical_img, texel, horizontal + vertical);
+  }
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_screen_lens_distortion.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_screen_lens_distortion.glsl
@@ -20,9 +20,9 @@ vec3 compute_chromatic_distortion_scale(float distance_squared)
 /* Compute the image coordinates after distortion by the given distortion scale computed by the
 * compute_distortion_scale function. Note that the function expects centered normalized UV
 * coordinates but outputs non-centered image coordinates. */
-vec2 compute_distorted_uv(vec2 uv, float scale)
+vec2 compute_distorted_uv(vec2 uv, float uv_scale)
 {
-  return (uv * scale + 0.5) * texture_size(input_tx) - 0.5;
+  return (uv * uv_scale + 0.5) * texture_size(input_tx) - 0.5;
 }

 /* Compute the number of integration steps that should be used to approximate the distorted pixel
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_glare_info.hh
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+/* -------
+ * Common.
+ * ------- */
+
+GPU_SHADER_CREATE_INFO(compositor_glare_highlights)
+    .local_group_size(16, 16)
+    .push_constant(Type::FLOAT, "threshold")
+    .push_constant(Type::VEC3, "luminance_coefficients")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_glare_highlights.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_mix)
+    .local_group_size(16, 16)
+    .push_constant(Type::FLOAT, "mix_factor")
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .sampler(1, ImageType::FLOAT_2D, "glare_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_glare_mix.glsl")
+    .do_static_compilation(true);
+
+/* ------------
+ * Ghost Glare.
+ * ------------ */
+
+GPU_SHADER_CREATE_INFO(compositor_glare_ghost_base)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "small_ghost_tx")
+    .sampler(1, ImageType::FLOAT_2D, "big_ghost_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_ghost_img")
+    .compute_source("compositor_glare_ghost_base.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_ghost_accumulate)
+    .local_group_size(16, 16)
+    .push_constant(Type::VEC4, "scales")
+    .push_constant(Type::VEC4, "color_modulators", 4)
+    .sampler(0, ImageType::FLOAT_2D, "input_ghost_tx")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "accumulated_ghost_img")
+    .compute_source("compositor_glare_ghost_accumulate.glsl")
+    .do_static_compilation(true);
+
+/* -----------
+ * Simple Star
+ * ----------- */
+
+GPU_SHADER_CREATE_INFO(compositor_glare_simple_star_horizontal_pass)
+    .local_group_size(16)
+    .push_constant(Type::INT, "iterations")
+    .push_constant(Type::FLOAT, "fade_factor")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "horizontal_img")
+    .compute_source("compositor_glare_simple_star_horizontal_pass.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_simple_star_vertical_pass)
+    .local_group_size(16)
+    .push_constant(Type::INT, "iterations")
+    .push_constant(Type::FLOAT, "fade_factor")
+    .sampler(0, ImageType::FLOAT_2D, "horizontal_tx")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "vertical_img")
+    .compute_source("compositor_glare_simple_star_vertical_pass.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_simple_star_diagonal_pass)
+    .local_group_size(16)
+    .push_constant(Type::INT, "iterations")
+    .push_constant(Type::FLOAT, "fade_factor")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "diagonal_img")
+    .compute_source("compositor_glare_simple_star_diagonal_pass.glsl")
+    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_glare_simple_star_anti_diagonal_pass)
+    .local_group_size(16)
+    .push_constant(Type::INT, "iterations")
+    .push_constant(Type::FLOAT, "fade_factor")
+    .sampler(0, ImageType::FLOAT_2D, "diagonal_tx")
+    .image(0, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "anti_diagonal_img")
+    .compute_source("compositor_glare_simple_star_anti_diagonal_pass.glsl")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/library/gpu_shader_compositor_image_diagonals.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/library/gpu_shader_compositor_image_diagonals.glsl
@@ -0,0 +1,170 @@
+/* Computes the number of diagonals in the matrix of the given size, where the diagonals are
+ * indexed from the upper left corner to the lower right corner such that their start is at the
+ * left and bottom edges of the matrix as shown in the diagram below. The numbers in the diagram
+ * denote the index of the diagonal. The number of diagonals is then intuitively the number of
+ * values on the left and bottom edges, which is equal to:
+ *
+ *   Number Of Diagonals => width + height - 1
+ *
+ * Notice that the minus one is due to the shared value in the corner.
+ *
+ *         Width = 6
+ * +---+---+---+---+---+---+
+ * | 0 | 1 | 2 | 3 | 4 | 5 |
+ * +---+---+---+---+---+---+
+ * | 1 | 2 | 3 | 4 | 5 | 6 |  Height = 3
+ * +---+---+---+---+---+---+
+ * | 2 | 3 | 4 | 5 | 6 | 7 |
+ * +---+---+---+---+---+---+
+ */
+int compute_number_of_diagonals(ivec2 size)
+{
+  return size.x + size.y - 1;
+}
+
+/* Computes the number of values in the diagonal of the given index in the matrix with the given
+ * size, where the diagonals are indexed from the upper left corner to the lower right corner such
+ * that their start is at the left and bottom edges of the matrix as shown in the diagram below.
+ * The numbers in the diagram denote the index of the diagonal and its length.
+ *
+ *             Width = 6
+ *     +---+---+---+---+---+---+
+ *  1  | 0 | 1 | 2 | 3 | 4 | 5 |
+ *     +---+---+---+---+---+---+
+ *  2  | 1 | 2 | 3 | 4 | 5 | 6 |  Height = 3
+ *     +---+---+---+---+---+---+
+ *     | 2 | 3 | 4 | 5 | 6 | 7 |
+ *     +---+---+---+---+---+---+
+ *  3        3   3   3   2   1
+ *
+ * To derive the length of the diagonal from the index, we note that the lengths of the diagonals
+ * start at 1 and linearly increase up to the length of the longest diagonal, then remain constant
+ * until it linearly decrease to 1 at the end. The length of the longest diagonal is intuitively
+ * the smaller of the width and height of the matrix. The linearly increasing and constant parts of
+ * the sequence can be described using the following compact equation:
+ *
+ *   Length => min(Longest Length, index + 1)
+ *
+ * While the constant and deceasing end parts of the sequence can be described using the following
+ * compact equation:
+ *
+ *   Length => min(Longest Length, Number Of Diagonals - index)
+ *
+ * All three parts of the sequence can then be combined using the minimum operation because they
+ * all share the same maximum value, that is, the longest length:
+ *
+ *   Length => min(Longest Length, index + 1, Number Of Diagonals - index)
+ *
+ */
+int compute_diagonal_length(ivec2 size, int diagonal_index)
+{
+  int length_of_longest_diagonal = min(size.x, size.y);
+  int start_sequence = diagonal_index + 1;
+  int end_sequence = compute_number_of_diagonals(size) - diagonal_index;
+  return min(length_of_longest_diagonal, min(start_sequence, end_sequence));
+}
+
+/* Computes the position of the start of the diagonal of the given index in the matrix with the
+ * given size, where the diagonals are indexed from the upper left corner to the lower right corner
+ * such that their start is at the left and bottom edges of the matrix as shown in the diagram
+ * below. The numbers in the diagram denote the index of the diagonal and the position of its
+ * start.
+ *
+ *                      Width = 6
+ *         +-----+-----+-----+-----+-----+-----+
+ *  (0, 2) |  0  |  1  |  2  |  3  |  4  |  5  |
+ *         +-----+-----+-----+-----+-----+-----+
+ *  (0, 1) |  1  |  2  |  3  |  4  |  5  |  6  |  Height = 3
+ *         +-----+-----+-----+-----+-----+-----+
+ *         |  2  |  3  |  4  |  5  |  6  |  7  |
+ *         +-----+-----+-----+-----+-----+-----+
+ *  (0, 0)        (1,0) (2,0) (3,0) (4,0) (5,0)
+ *
+ * To derive the start position from the index, we consider each axis separately. For the X
+ * position, indices up to (height - 1) have zero x positions, while other indices linearly
+ * increase from (height) to the end. Which can be described using the compact equation:
+ *
+ *   X => max(0, index - (height - 1))
+ *
+ * For the Y position, indices up to (height - 1) linearly decrease from (height - 1) to zero,
+ * while other indices are zero. Which can be described using the compact equation:
+ *
+ *   Y => max(0, (height - 1) - index)
+ *
+ */
+ivec2 compute_diagonal_start(ivec2 size, int index)
+{
+  return ivec2(max(0, index - (size.y - 1)), max(0, (size.y - 1) - index));
+}
+
+/* Computes a direction vector such that when added to the position of a value in a matrix will
+ * yield the position of the next value in the same diagonal. According to the choice of the start
+ * of the diagonal in compute_diagonal_start, this is (1, 1). */
+ivec2 get_diagonal_direction()
+{
+  return ivec2(1);
+}
+
+/* Computes the number of values in the anti diagonal of the given index in the matrix with the
+ * given size, where the anti diagonals are indexed from the lower left corner to the upper right
+ * corner such that that their start is at the bottom and right edges of the matrix as shown in the
+ * diagram below. The numbers in the diagram denote the index of the anti diagonal and its length.
+ *
+ *                     Width = 6
+ *             +---+---+---+---+---+---+
+ *             | 2 | 3 | 4 | 5 | 6 | 7 |  1
+ *             +---+---+---+---+---+---+
+ *  Height = 3 | 1 | 2 | 3 | 4 | 5 | 6 |  2
+ *             +---+---+---+---+---+---+
+ *             | 0 | 1 | 2 | 3 | 4 | 5 |
+ *             +---+---+---+---+---+---+
+ *               1   2   3   3   3        3
+ *
+ * The length of the anti diagonal is identical to the length of the diagonal of the same index, as
+ * can be seen by comparing the above diagram with the one in the compute_diagonal_length function,
+ * since the anti diagonals are merely flipped diagonals. */
+int compute_anti_diagonal_length(ivec2 size, int diagonal_index)
+{
+  return compute_diagonal_length(size, diagonal_index);
+}
+
+/* Computes the position of the start of the anti diagonal of the given index in the matrix with
+ * the given size, where the anti diagonals are indexed from the lower left corner to the upper
+ * right corner such that their start is at the bottom and right edges of the matrix as shown in
+ * the diagram below. The numbers in the diagram denote the index of the anti diagonal and the
+ * position of its start.
+ *
+ *                           Width = 6
+ *              +-----+-----+-----+-----+-----+-----+
+ *              |  2  |  3  |  4  |  5  |  6  |  7  |  (5,2)
+ *              +-----+-----+-----+-----+-----+-----+
+ *  Height = 3  |  1  |  2  |  3  |  4  |  5  |  6  |  (5,1)
+ *              +-----+-----+-----+-----+-----+-----+
+ *              |  0  |  1  |  2  |  3  |  4  |  5  |
+ *              +-----+-----+-----+-----+-----+-----+
+ *               (0,0) (1,0) (2,0) (3,0) (4,0)         (5,0)
+ *
+ * To derive the start position from the index, we consider each axis separately. For the X
+ * position, indices up to (width - 1) linearly increase from zero, while other indices are all
+ * (width - 1). Which can be described using the compact equation:
+ *
+ *   X => min((width - 1), index)
+ *
+ * For the Y position, indices up to (width - 1) are zero, while other indices linearly increase
+ * from zero to (height - 1). Which can be described using the compact equation:
+ *
+ *   Y => max(0, index - (width - 1))
+ *
+ */
+ivec2 compute_anti_diagonal_start(ivec2 size, int index)
+{
+  return ivec2(min(size.x - 1, index), max(0, index - (size.x - 1)));
+}
+
+/* Computes a direction vector such that when added to the position of a value in a matrix will
+ * yield the position of the next value in the same anti diagonal. According to the choice of the
+ * start of the anti diagonal in compute_anti_diagonal_start, this is (-1, 1). */
+ivec2 get_anti_diagonal_direction()
+{
+  return ivec2(-1, 1);
+}
--- a/source/blender/compositor/realtime_compositor/shaders/library/gpu_shader_compositor_texture_utilities.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/library/gpu_shader_compositor_texture_utilities.glsl
@@ -1,35 +1,35 @@
 /* A shorthand for 1D textureSize with a zero LOD. */
-int texture_size(sampler1D sampler)
+int texture_size(sampler1D sampler_1d)
 {
-  return textureSize(sampler, 0);
+  return textureSize(sampler_1d, 0);
 }

 /* A shorthand for 1D texelFetch with zero LOD and bounded access clamped to border. */
-vec4 texture_load(sampler1D sampler, int x)
+vec4 texture_load(sampler1D sampler_1d, int x)
 {
-  const int texture_bound = texture_size(sampler) - 1;
-  return texelFetch(sampler, clamp(x, 0, texture_bound), 0);
+  const int texture_bound = texture_size(sampler_1d) - 1;
+  return texelFetch(sampler_1d, clamp(x, 0, texture_bound), 0);
 }

 /* A shorthand for 2D textureSize with a zero LOD. */
-ivec2 texture_size(sampler2D sampler)
+ivec2 texture_size(sampler2D sampler_2d)
 {
-  return textureSize(sampler, 0);
+  return textureSize(sampler_2d, 0);
 }

 /* A shorthand for 2D texelFetch with zero LOD and bounded access clamped to border. */
-vec4 texture_load(sampler2D sampler, ivec2 texel)
+vec4 texture_load(sampler2D sampler_2d, ivec2 texel)
 {
-  const ivec2 texture_bounds = texture_size(sampler) - ivec2(1);
-  return texelFetch(sampler, clamp(texel, ivec2(0), texture_bounds), 0);
+  const ivec2 texture_bounds = texture_size(sampler_2d) - ivec2(1);
+  return texelFetch(sampler_2d, clamp(texel, ivec2(0), texture_bounds), 0);
 }

 /* A shorthand for 2D texelFetch with zero LOD and a fallback value for out-of-bound access. */
-vec4 texture_load(sampler2D sampler, ivec2 texel, vec4 fallback)
+vec4 texture_load(sampler2D sampler_2d, ivec2 texel, vec4 fallback)
 {
-  const ivec2 texture_bounds = texture_size(sampler) - ivec2(1);
+  const ivec2 texture_bounds = texture_size(sampler_2d) - ivec2(1);
  if (any(lessThan(texel, ivec2(0))) || any(greaterThan(texel, texture_bounds))) {
    return fallback;
  }
-  return texelFetch(sampler, texel, 0);
+  return texelFetch(sampler_2d, texel, 0);
 }
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -477,9 +477,11 @@ set(GLSL_SRC
  engines/workbench/shaders/workbench_prepass_pointcloud_vert.glsl
  engines/workbench/shaders/workbench_prepass_vert.glsl
  engines/workbench/shaders/workbench_shadow_caps_geom.glsl
+  engines/workbench/shaders/workbench_shadow_caps_vert_no_geom.glsl
  engines/workbench/shaders/workbench_shadow_debug_frag.glsl
  engines/workbench/shaders/workbench_shadow_geom.glsl
  engines/workbench/shaders/workbench_shadow_vert.glsl
+  engines/workbench/shaders/workbench_shadow_vert_no_geom.glsl
  engines/workbench/shaders/workbench_transparent_accum_frag.glsl
  engines/workbench/shaders/workbench_transparent_resolve_frag.glsl
  engines/workbench/shaders/workbench_volume_frag.glsl
--- a/source/blender/draw/engines/eevee/eevee_depth_of_field.c
+++ b/source/blender/draw/engines/eevee/eevee_depth_of_field.c
@@ -362,9 +362,13 @@ static void dof_bokeh_pass_init(EEVEE_FramebufferList *fbl,
  DRW_shgroup_uniform_vec2_copy(grp, "bokehAnisotropyInv", fx->dof_bokeh_aniso_inv);
  DRW_shgroup_call_procedural_triangles(grp, NULL, 1);

-  fx->dof_bokeh_gather_lut_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_RG16F, owner);
-  fx->dof_bokeh_scatter_lut_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner);
-  fx->dof_bokeh_resolve_lut_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+  fx->dof_bokeh_gather_lut_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), GPU_RG16F, usage, owner);
+  fx->dof_bokeh_scatter_lut_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), GPU_R16F, usage, owner);
+  fx->dof_bokeh_resolve_lut_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), GPU_R16F, usage, owner);

  GPU_framebuffer_ensure_config(&fbl->dof_bokeh_fb,
                                {
@@ -398,8 +402,10 @@ static void dof_setup_pass_init(EEVEE_FramebufferList *fbl,
  DRW_shgroup_uniform_float_copy(grp, "bokehMaxSize", fx->dof_bokeh_max_size);
  DRW_shgroup_call_procedural_triangles(grp, NULL, 1);

-  fx->dof_half_res_color_tx = DRW_texture_pool_query_2d(UNPACK2(res), COLOR_FORMAT, owner);
-  fx->dof_half_res_coc_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_RG16F, owner);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+  fx->dof_half_res_color_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), COLOR_FORMAT, usage, owner);
+  fx->dof_half_res_coc_tx = DRW_texture_pool_query_2d_ex(UNPACK2(res), GPU_RG16F, usage, owner);

  GPU_framebuffer_ensure_config(&fbl->dof_setup_fb,
                                {
@@ -429,8 +435,11 @@ static void dof_flatten_tiles_pass_init(EEVEE_FramebufferList *fbl,
      grp, "halfResCocBuffer", &fx->dof_half_res_coc_tx, NO_FILTERING);
  DRW_shgroup_call_procedural_triangles(grp, NULL, 1);

-  fx->dof_coc_tiles_fg_tx = DRW_texture_pool_query_2d(UNPACK2(res), FG_TILE_FORMAT, owner);
-  fx->dof_coc_tiles_bg_tx = DRW_texture_pool_query_2d(UNPACK2(res), BG_TILE_FORMAT, owner);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+  fx->dof_coc_tiles_fg_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), FG_TILE_FORMAT, usage, owner);
+  fx->dof_coc_tiles_bg_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), BG_TILE_FORMAT, usage, owner);

  GPU_framebuffer_ensure_config(&fbl->dof_flatten_tiles_fb,
                                {
@@ -468,9 +477,11 @@ static void dof_dilate_tiles_pass_init(EEVEE_FramebufferList *fbl,
    DRW_shgroup_uniform_int(grp, "ringWidthMultiplier", &fx->dof_dilate_ring_width_multiplier, 1);
    DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
  }
-
-  fx->dof_coc_dilated_tiles_fg_tx = DRW_texture_pool_query_2d(UNPACK2(res), FG_TILE_FORMAT, owner);
-  fx->dof_coc_dilated_tiles_bg_tx = DRW_texture_pool_query_2d(UNPACK2(res), BG_TILE_FORMAT, owner);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+  fx->dof_coc_dilated_tiles_fg_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), FG_TILE_FORMAT, usage, owner);
+  fx->dof_coc_dilated_tiles_bg_tx = DRW_texture_pool_query_2d_ex(
+      UNPACK2(res), BG_TILE_FORMAT, usage, owner);

  GPU_framebuffer_ensure_config(&fbl->dof_dilate_tiles_fb,
                                {
@@ -563,7 +574,9 @@ static void dof_reduce_pass_init(EEVEE_FramebufferList *fbl,
    DRW_shgroup_call_procedural_triangles(grp, NULL, 1);

    void *owner = (void *)&EEVEE_depth_of_field_init;
-    fx->dof_downsample_tx = DRW_texture_pool_query_2d(UNPACK2(quater_res), COLOR_FORMAT, owner);
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    fx->dof_downsample_tx = DRW_texture_pool_query_2d_ex(
+        UNPACK2(quater_res), COLOR_FORMAT, usage, owner);

    GPU_framebuffer_ensure_config(&fbl->dof_downsample_fb,
                                  {
@@ -593,7 +606,9 @@ static void dof_reduce_pass_init(EEVEE_FramebufferList *fbl,
    DRW_shgroup_call_procedural_triangles(grp, NULL, 1);

    void *owner = (void *)&EEVEE_depth_of_field_init;
-    fx->dof_scatter_src_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R11F_G11F_B10F, owner);
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    fx->dof_scatter_src_tx = DRW_texture_pool_query_2d_ex(
+        UNPACK2(res), GPU_R11F_G11F_B10F, usage, owner);
  }

  {
@@ -622,10 +637,12 @@ static void dof_reduce_pass_init(EEVEE_FramebufferList *fbl,
  if (txl->dof_reduced_color == NULL) {
    /* Color needs to be signed format here. See note in shader for explanation. */
    /* Do not use texture pool because of needs mipmaps. */
-    txl->dof_reduced_color = GPU_texture_create_2d(
-        "dof_reduced_color", UNPACK2(res), mip_count, GPU_RGBA16F, NULL);
-    txl->dof_reduced_coc = GPU_texture_create_2d(
-        "dof_reduced_coc", UNPACK2(res), mip_count, GPU_R16F, NULL);
+    eGPUTextureUsage tex_flags = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT |
+                                 GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
+    txl->dof_reduced_color = GPU_texture_create_2d_ex(
+        "dof_reduced_color", UNPACK2(res), mip_count, GPU_RGBA16F, tex_flags, NULL);
+    txl->dof_reduced_coc = GPU_texture_create_2d_ex(
+        "dof_reduced_coc", UNPACK2(res), mip_count, GPU_R16F, tex_flags, NULL);
  }

  GPU_framebuffer_ensure_config(&fbl->dof_reduce_fb,
@@ -681,8 +698,10 @@ static void dof_gather_pass_init(EEVEE_FramebufferList *fbl,
    /* Reuse textures from the setup pass. */
    /* NOTE: We could use the texture pool do that for us but it does not track usage and it might
     * backfire (it does in practice). */
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
    fx->dof_fg_holefill_color_tx = fx->dof_half_res_color_tx;
-    fx->dof_fg_holefill_weight_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner);
+    fx->dof_fg_holefill_weight_tx = DRW_texture_pool_query_2d_ex(
+        UNPACK2(res), GPU_R16F, usage, owner);

    GPU_framebuffer_ensure_config(&fbl->dof_gather_fg_holefill_fb,
                                  {
@@ -714,9 +733,9 @@ static void dof_gather_pass_init(EEVEE_FramebufferList *fbl,
      negate_v2(fx->dof_bokeh_aniso);
    }
    DRW_shgroup_call_procedural_triangles(grp, NULL, 1);
-
-    fx->dof_fg_color_tx = DRW_texture_pool_query_2d(UNPACK2(res), COLOR_FORMAT, owner);
-    fx->dof_fg_weight_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner);
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    fx->dof_fg_color_tx = DRW_texture_pool_query_2d_ex(UNPACK2(res), COLOR_FORMAT, usage, owner);
+    fx->dof_fg_weight_tx = DRW_texture_pool_query_2d_ex(UNPACK2(res), GPU_R16F, usage, owner);
    /* Reuse textures from the setup pass. */
    /* NOTE: We could use the texture pool do that for us but it does not track usage and it might
     * backfire (it does in practice). */
@@ -752,8 +771,9 @@ static void dof_gather_pass_init(EEVEE_FramebufferList *fbl,
    }
    DRW_shgroup_call_procedural_triangles(grp, NULL, 1);

-    fx->dof_bg_color_tx = DRW_texture_pool_query_2d(UNPACK2(res), COLOR_FORMAT, owner);
-    fx->dof_bg_weight_tx = DRW_texture_pool_query_2d(UNPACK2(res), GPU_R16F, owner);
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    fx->dof_bg_color_tx = DRW_texture_pool_query_2d_ex(UNPACK2(res), COLOR_FORMAT, usage, owner);
+    fx->dof_bg_weight_tx = DRW_texture_pool_query_2d_ex(UNPACK2(res), GPU_R16F, usage, owner);
    /* Reuse, since only used for scatter. Foreground is processed before background. */
    fx->dof_bg_occlusion_tx = fx->dof_fg_occlusion_tx;

--- a/source/blender/draw/engines/eevee/eevee_lightcache.c
+++ b/source/blender/draw/engines/eevee/eevee_lightcache.c
@@ -327,6 +327,8 @@ LightCache *EEVEE_lightcache_create(const int grid_len,
                                    const int vis_size,
                                    const int irr_size[3])
 {
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT |
+                           GPU_TEXTURE_USAGE_HOST_READ;
  LightCache *light_cache = MEM_callocN(sizeof(LightCache), "LightCache");

  light_cache->version = LIGHTCACHE_STATIC_VERSION;
@@ -335,8 +337,8 @@ LightCache *EEVEE_lightcache_create(const int grid_len,
  light_cache->cube_data = MEM_callocN(sizeof(EEVEE_LightProbe) * cube_len, "EEVEE_LightProbe");
  light_cache->grid_data = MEM_callocN(sizeof(EEVEE_LightGrid) * grid_len, "EEVEE_LightGrid");

-  light_cache->grid_tx.tex = DRW_texture_create_2d_array(
-      irr_size[0], irr_size[1], irr_size[2], IRRADIANCE_FORMAT, DRW_TEX_FILTER, NULL);
+  light_cache->grid_tx.tex = DRW_texture_create_2d_array_ex(
+      irr_size[0], irr_size[1], irr_size[2], IRRADIANCE_FORMAT, usage, DRW_TEX_FILTER, NULL);
  light_cache->grid_tx.tex_size[0] = irr_size[0];
  light_cache->grid_tx.tex_size[1] = irr_size[1];
  light_cache->grid_tx.tex_size[2] = irr_size[2];
@@ -345,12 +347,12 @@ LightCache *EEVEE_lightcache_create(const int grid_len,

  /* Try to create a cubemap array. */
  DRWTextureFlag cube_texflag = DRW_TEX_FILTER | DRW_TEX_MIPMAP;
-  light_cache->cube_tx.tex = DRW_texture_create_cube_array(
-      cube_size, cube_len, GPU_R11F_G11F_B10F, cube_texflag, NULL);
+  light_cache->cube_tx.tex = DRW_texture_create_cube_array_ex(
+      cube_size, cube_len, GPU_R11F_G11F_B10F, usage, cube_texflag, NULL);
  if (light_cache->cube_tx.tex == NULL) {
    /* Try fallback to 2D array. */
-    light_cache->cube_tx.tex = DRW_texture_create_2d_array(
-        cube_size, cube_size, cube_len * 6, GPU_R11F_G11F_B10F, cube_texflag, NULL);
+    light_cache->cube_tx.tex = DRW_texture_create_2d_array_ex(
+        cube_size, cube_size, cube_len * 6, GPU_R11F_G11F_B10F, usage, cube_texflag, NULL);
  }

  light_cache->cube_tx.tex_size[0] = cube_size;
@@ -393,8 +395,13 @@ static bool eevee_lightcache_static_load(LightCache *lcache)
  }

  if (lcache->grid_tx.tex == NULL) {
-    lcache->grid_tx.tex = GPU_texture_create_2d_array(
-        "lightcache_irradiance", UNPACK3(lcache->grid_tx.tex_size), 1, IRRADIANCE_FORMAT, NULL);
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    lcache->grid_tx.tex = GPU_texture_create_2d_array_ex("lightcache_irradiance",
+                                                         UNPACK3(lcache->grid_tx.tex_size),
+                                                         1,
+                                                         IRRADIANCE_FORMAT,
+                                                         usage,
+                                                         NULL);
    GPU_texture_update(lcache->grid_tx.tex, GPU_DATA_UBYTE, lcache->grid_tx.data);

    if (lcache->grid_tx.tex == NULL) {
@@ -406,21 +413,27 @@ static bool eevee_lightcache_static_load(LightCache *lcache)
  }

  if (lcache->cube_tx.tex == NULL) {
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT |
+                             GPU_TEXTURE_USAGE_HOST_READ;
+
    /* Try to create a cubemap array. */
-    lcache->cube_tx.tex = GPU_texture_create_cube_array("lightcache_cubemaps",
-                                                        lcache->cube_tx.tex_size[0],
-                                                        lcache->cube_tx.tex_size[2] / 6,
-                                                        lcache->mips_len + 1,
-                                                        GPU_R11F_G11F_B10F,
-                                                        NULL);
+    lcache->cube_tx.tex = GPU_texture_create_cube_array_ex("lightcache_cubemaps",
+                                                           lcache->cube_tx.tex_size[0],
+                                                           lcache->cube_tx.tex_size[2] / 6,
+                                                           lcache->mips_len + 1,
+                                                           GPU_R11F_G11F_B10F,
+                                                           usage,
+                                                           NULL);

    if (lcache->cube_tx.tex == NULL) {
      /* Try fallback to 2D array. */
-      lcache->cube_tx.tex = GPU_texture_create_2d_array("lightcache_cubemaps_fallback",
-                                                        UNPACK3(lcache->cube_tx.tex_size),
-                                                        lcache->mips_len + 1,
-                                                        GPU_R11F_G11F_B10F,
-                                                        NULL);
+
+      lcache->cube_tx.tex = GPU_texture_create_2d_array_ex("lightcache_cubemaps_fallback",
+                                                           UNPACK3(lcache->cube_tx.tex_size),
+                                                           lcache->mips_len + 1,
+                                                           GPU_R11F_G11F_B10F,
+                                                           usage,
+                                                           NULL);
    }

    if (lcache->cube_tx.tex == NULL) {
@@ -669,9 +682,11 @@ static void eevee_lightbake_count_probes(EEVEE_LightBake *lbake)

 static void eevee_lightbake_create_render_target(EEVEE_LightBake *lbake, int rt_res)
 {
-  lbake->rt_depth = DRW_texture_create_cube(rt_res, GPU_DEPTH_COMPONENT24, 0, NULL);
-  lbake->rt_color = DRW_texture_create_cube(
-      rt_res, GPU_RGBA16F, DRW_TEX_FILTER | DRW_TEX_MIPMAP, NULL);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT |
+                           GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW;
+  lbake->rt_depth = DRW_texture_create_cube_ex(rt_res, GPU_DEPTH_COMPONENT24, usage, 0, NULL);
+  lbake->rt_color = DRW_texture_create_cube_ex(
+      rt_res, GPU_RGBA16F, usage, DRW_TEX_FILTER | DRW_TEX_MIPMAP, NULL);

  for (int i = 0; i < 6; i++) {
    GPU_framebuffer_ensure_config(&lbake->rt_fb[i],
@@ -697,12 +712,13 @@ static void eevee_lightbake_create_resources(EEVEE_LightBake *lbake)
  lbake->cube_prb = MEM_callocN(sizeof(LightProbe *) * lbake->cube_len, "EEVEE Cube visgroup ptr");
  lbake->grid_prb = MEM_callocN(sizeof(LightProbe *) * lbake->grid_len, "EEVEE Grid visgroup ptr");

-  lbake->grid_prev = DRW_texture_create_2d_array(lbake->irr_size[0],
-                                                 lbake->irr_size[1],
-                                                 lbake->irr_size[2],
-                                                 IRRADIANCE_FORMAT,
-                                                 DRW_TEX_FILTER,
-                                                 NULL);
+  lbake->grid_prev = DRW_texture_create_2d_array_ex(lbake->irr_size[0],
+                                                    lbake->irr_size[1],
+                                                    lbake->irr_size[2],
+                                                    IRRADIANCE_FORMAT,
+                                                    GPU_TEXTURE_USAGE_SHADER_READ,
+                                                    DRW_TEX_FILTER,
+                                                    NULL);

  /* Ensure Light Cache is ready to accept new data. If not recreate one.
   * WARNING: All the following must be threadsafe. It's currently protected
@@ -983,12 +999,13 @@ static void eevee_lightbake_copy_irradiance(EEVEE_LightBake *lbake, LightCache *

  /* Copy texture by reading back and re-uploading it. */
  float *tex = GPU_texture_read(lcache->grid_tx.tex, GPU_DATA_FLOAT, 0);
-  lbake->grid_prev = DRW_texture_create_2d_array(lbake->irr_size[0],
-                                                 lbake->irr_size[1],
-                                                 lbake->irr_size[2],
-                                                 IRRADIANCE_FORMAT,
-                                                 DRW_TEX_FILTER,
-                                                 tex);
+  lbake->grid_prev = DRW_texture_create_2d_array_ex(lbake->irr_size[0],
+                                                    lbake->irr_size[1],
+                                                    lbake->irr_size[2],
+                                                    IRRADIANCE_FORMAT,
+                                                    GPU_TEXTURE_USAGE_SHADER_READ,
+                                                    DRW_TEX_FILTER,
+                                                    tex);

  MEM_freeN(tex);
 }
--- a/source/blender/draw/engines/eevee/eevee_lightprobes.c
+++ b/source/blender/draw/engines/eevee/eevee_lightprobes.c
@@ -79,7 +79,7 @@ static void planar_pool_ensure_alloc(EEVEE_Data *vedata, int num_planar_ref)
  EEVEE_StorageList *stl = vedata->stl;
  EEVEE_EffectsInfo *fx = stl->effects;

-  /* XXX TODO: OPTIMIZATION: This is a complete waist of texture memory.
+  /* XXX TODO: OPTIMIZATION: This is a complete waste of texture memory.
   * Instead of allocating each planar probe for each viewport,
   * only alloc them once using the biggest viewport resolution. */

--- a/source/blender/draw/engines/eevee/eevee_motion_blur.c
+++ b/source/blender/draw/engines/eevee/eevee_motion_blur.c
@@ -64,17 +64,17 @@ int EEVEE_motion_blur_init(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *veda
        1 + ((int)fs_size[0] / EEVEE_VELOCITY_TILE_SIZE),
        1 + ((int)fs_size[1] / EEVEE_VELOCITY_TILE_SIZE),
    };
-
-    effects->velocity_tiles_x_tx = DRW_texture_pool_query_2d(
-        tx_size[0], fs_size[1], GPU_RGBA16, &draw_engine_eevee_type);
+    eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+    effects->velocity_tiles_x_tx = DRW_texture_pool_query_2d_ex(
+        tx_size[0], fs_size[1], GPU_RGBA16, usage, &draw_engine_eevee_type);
    GPU_framebuffer_ensure_config(&fbl->velocity_tiles_fb[0],
                                  {
                                      GPU_ATTACHMENT_NONE,
                                      GPU_ATTACHMENT_TEXTURE(effects->velocity_tiles_x_tx),
                                  });

-    effects->velocity_tiles_tx = DRW_texture_pool_query_2d(
-        tx_size[0], tx_size[1], GPU_RGBA16, &draw_engine_eevee_type);
+    effects->velocity_tiles_tx = DRW_texture_pool_query_2d_ex(
+        tx_size[0], tx_size[1], GPU_RGBA16, usage, &draw_engine_eevee_type);
    GPU_framebuffer_ensure_config(&fbl->velocity_tiles_fb[1],
                                  {
                                      GPU_ATTACHMENT_NONE,
--- a/source/blender/draw/engines/eevee/shaders/infos/eevee_legacy_lightprobe_info.hh
+++ b/source/blender/draw/engines/eevee/shaders/infos/eevee_legacy_lightprobe_info.hh
@@ -48,7 +48,8 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_probe_filter_glossy_no_geom)
    .push_constant(Type::FLOAT, "fireflyFactor")
    .push_constant(Type::FLOAT, "sampleCount")
    .fragment_out(0, Type::VEC4, "FragColor")
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

@@ -87,7 +88,8 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_effect_downsample_cube_no_geom)
    .sampler(0, ImageType::FLOAT_CUBE, "source")
    .push_constant(Type::FLOAT, "texelSize")
    .fragment_out(0, Type::VEC4, "FragColor")
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

@@ -231,7 +233,8 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_lightprobe_planar_downsample)
 GPU_SHADER_CREATE_INFO(eevee_legacy_lightprobe_planar_downsample_no_geom)
    .additional_info("eevee_legacy_lightprobe_planar_downsample_common")
    .vertex_out(eevee_legacy_probe_planar_downsample_geom_frag_iface)
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

--- a/source/blender/draw/engines/eevee/shaders/infos/eevee_legacy_volume_info.hh
+++ b/source/blender/draw/engines/eevee/shaders/infos/eevee_legacy_volume_info.hh
@@ -52,7 +52,8 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_clear_no_geom)
    .fragment_out(1, Type::VEC4, "volumeExtinction")
    .fragment_out(2, Type::VEC4, "volumeEmissive")
    .fragment_out(3, Type::VEC4, "volumePhase")
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

@@ -93,7 +94,8 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter)
 GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_no_geom)
    .additional_info("eevee_legacy_volumes_scatter_common")
    .vertex_out(legacy_volume_geom_frag_iface)
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

@@ -110,7 +112,8 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights)
 GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_scatter_with_lights_no_geom)
    .additional_info("eevee_legacy_volumes_scatter_with_lights_common")
    .additional_info("eevee_legacy_volumes_scatter_no_geom")
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

@@ -167,13 +170,15 @@ GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_integration_OPTI)
 GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_integration_no_geom)
    .additional_info("eevee_legacy_volumes_integration_common_no_geom")
    .additional_info("eevee_legacy_volumes_integration_common_no_opti")
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);

 GPU_SHADER_CREATE_INFO(eevee_legacy_volumes_integration_OPTI_no_geom)
    .additional_info("eevee_legacy_volumes_integration_common_no_geom")
    .additional_info("eevee_legacy_volumes_integration_common_opti")
-    // .do_static_compilation(true)
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .auto_resource_location(true);
 #endif

--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -9,7 +9,9 @@
 * dragging larger headers into the createInfo pipeline which would cause problems.
 */

-#pragma once
+#ifndef GPU_SHADER
+#  pragma once
+#endif

 /* Hierarchical Z down-sampling. */
 #define HIZ_MIP_COUNT 8
--- a/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_depth_of_field.cc
@@ -166,8 +166,9 @@ void DepthOfField::sync()
  /* Now that we know the maximum render resolution of every view, using depth of field, allocate
   * the reduced buffers. Color needs to be signed format here. See note in shader for
   * explanation. Do not use texture pool because of needs mipmaps. */
-  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, nullptr, DOF_MIP_COUNT);
-  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, nullptr, DOF_MIP_COUNT);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+  reduced_color_tx_.ensure_2d(GPU_RGBA16F, reduce_size, usage, nullptr, DOF_MIP_COUNT);
+  reduced_coc_tx_.ensure_2d(GPU_R16F, reduce_size, usage, nullptr, DOF_MIP_COUNT);
  reduced_color_tx_.ensure_mip_views();
  reduced_coc_tx_.ensure_mip_views();

--- a/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_hizbuffer.cc
@@ -24,7 +24,8 @@ void HiZBuffer::sync()
  int2 hiz_extent = math::ceil_to_multiple(render_extent, int2(1u << (HIZ_MIP_COUNT - 1)));
  int2 dispatch_size = math::divide_ceil(hiz_extent, int2(HIZ_GROUP_SIZE));

-  hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, nullptr, HIZ_MIP_COUNT);
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
+  hiz_tx_.ensure_2d(GPU_R32F, hiz_extent, usage, nullptr, HIZ_MIP_COUNT);
  hiz_tx_.ensure_mip_views();
  GPU_texture_mipmap_mode(hiz_tx_, true, false);

--- a/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_pipeline.hh
@@ -107,7 +107,13 @@ class UtilityTexture : public Texture {
  static constexpr int layer_count = 4 + UTIL_BTDF_LAYER_COUNT;

 public:
-  UtilityTexture() : Texture("UtilityTx", GPU_RGBA16F, int2(lut_size), layer_count, nullptr)
+  UtilityTexture()
+      : Texture("UtilityTx",
+                GPU_RGBA16F,
+                GPU_TEXTURE_USAGE_SHADER_READ,
+                int2(lut_size),
+                layer_count,
+                nullptr)
  {
 #ifdef RUNTIME_LUT_CREATION
    float *bsdf_ggx_lut = EEVEE_lut_update_ggx_brdf(lut_size);
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_tiles_flatten_comp.glsl
@@ -26,7 +26,7 @@ shared uint bg_min_coc;
 shared uint bg_max_coc;
 shared uint bg_min_intersectable_coc;

-const uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc);
+uint dof_tile_large_coc_uint = floatBitsToUint(dof_tile_large_coc);

 void main()
 {
--- a/source/blender/draw/engines/gpencil/gpencil_antialiasing.c
+++ b/source/blender/draw/engines/gpencil/gpencil_antialiasing.c
@@ -40,13 +40,16 @@ void GPENCIL_antialiasing_init(struct GPENCIL_Data *vedata)
    return;
  }

+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_ATTACHMENT;
+
  if (txl->smaa_search_tx == NULL) {
-    txl->smaa_search_tx = GPU_texture_create_2d(
-        "smaa_search", SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, 1, GPU_R8, NULL);
+
+    txl->smaa_search_tx = GPU_texture_create_2d_ex(
+        "smaa_search", SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, 1, GPU_R8, usage, NULL);
    GPU_texture_update(txl->smaa_search_tx, GPU_DATA_UBYTE, searchTexBytes);

-    txl->smaa_area_tx = GPU_texture_create_2d(
-        "smaa_area", AREATEX_WIDTH, AREATEX_HEIGHT, 1, GPU_RG8, NULL);
+    txl->smaa_area_tx = GPU_texture_create_2d_ex(
+        "smaa_area", AREATEX_WIDTH, AREATEX_HEIGHT, 1, GPU_RG8, usage, NULL);
    GPU_texture_update(txl->smaa_area_tx, GPU_DATA_UBYTE, areaTexBytes);

    GPU_texture_filter_mode(txl->smaa_search_tx, true);
@@ -54,10 +57,10 @@ void GPENCIL_antialiasing_init(struct GPENCIL_Data *vedata)
  }

  {
-    pd->smaa_edge_tx = DRW_texture_pool_query_2d(
-        size[0], size[1], GPU_RG8, &draw_engine_gpencil_type);
-    pd->smaa_weight_tx = DRW_texture_pool_query_2d(
-        size[0], size[1], GPU_RGBA8, &draw_engine_gpencil_type);
+    pd->smaa_edge_tx = DRW_texture_pool_query_2d_ex(
+        size[0], size[1], GPU_RG8, usage, &draw_engine_gpencil_type);
+    pd->smaa_weight_tx = DRW_texture_pool_query_2d_ex(
+        size[0], size[1], GPU_RGBA8, usage, &draw_engine_gpencil_type);

    GPU_framebuffer_ensure_config(&fbl->smaa_edge_fb,
                                  {
--- a/source/blender/draw/engines/image/image_batches.hh
+++ b/source/blender/draw/engines/image/image_batches.hh
@@ -9,6 +9,8 @@

 #include "image_texture_info.hh"

+namespace blender::draw::image_engine {
+
 /** \brief Create GPUBatch for a IMAGE_ScreenSpaceTextureInfo. */
 class BatchUpdater {
  TextureInfo &info;
@@ -29,11 +31,6 @@ class BatchUpdater {
    init_batch();
  }

-  void discard_batch()
-  {
-    GPU_BATCH_DISCARD_SAFE(info.batch);
-  }
-
 private:
  void ensure_clear_batch()
  {
@@ -89,3 +86,5 @@ class BatchUpdater {
    }
  }
 };
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_buffer_cache.hh
+++ b/source/blender/draw/engines/image/image_buffer_cache.hh
@@ -9,9 +9,12 @@

 #include "BLI_vector.hh"

+#include "IMB_colormanagement.h"
 #include "IMB_imbuf.h"
 #include "IMB_imbuf_types.h"

+namespace blender::draw::image_engine {
+
 struct FloatImageBuffer {
  ImBuf *source_buffer = nullptr;
  ImBuf *float_buffer = nullptr;
@@ -58,20 +61,21 @@ struct FloatImageBuffer {
 *
 * For this reason we store the float buffer in separate image buffers. The FloatBufferCache keep
 * track of the cached buffers and if they are still used.
- *
- * TODO: When an image buffer has a float buffer but not stored in scene linear, it currently
- * doesn't apply color management. In this case we still need to create another float buffer, but
- * with the buffer converted to scene linear.
 */
 struct FloatBufferCache {
 private:
-  blender::Vector<FloatImageBuffer> cache_;
+  Vector<FloatImageBuffer> cache_;

 public:
  ImBuf *cached_float_buffer(ImBuf *image_buffer)
  {
    /* Check if we can use the float buffer of the given image_buffer. */
    if (image_buffer->rect_float != nullptr) {
+      BLI_assert_msg(
+          IMB_colormanagement_space_name_is_scene_linear(
+              IMB_colormanagement_get_float_colorspace(image_buffer)),
+          "Expected rect_float to be scene_linear - if there are code paths where this "
+          "isn't the case we should convert those and add to the FloatBufferCache as well.");
      return image_buffer;
    }

@@ -128,3 +132,5 @@ struct FloatBufferCache {
    cache_.clear();
  }
 };
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_drawing_mode.hh
+++ b/source/blender/draw/engines/image/image_drawing_mode.hh
@@ -22,117 +22,143 @@ namespace blender::draw::image_engine {
 constexpr float EPSILON_UV_BOUNDS = 0.00001f;

 /**
- * \brief Screen space method using a 4 textures spawning the whole screen.
+ * \brief Screen space method using a multiple textures covering the region.
+ *
 */
-struct FullScreenTextures {
+template<size_t Divisions> class ScreenTileTextures {
+ public:
+  static const size_t TexturesPerDimension = Divisions + 1;
+  static const size_t TexturesRequired = TexturesPerDimension * TexturesPerDimension;
+  static const size_t VerticesPerDimension = TexturesPerDimension + 1;
+
+ private:
+  /**
+   * \brief Helper struct to pair a texture info and a region in uv space of the area.
+   */
+  struct TextureInfoBounds {
+    TextureInfo *info = nullptr;
+    rctf uv_bounds;
+  };
+
  IMAGE_InstanceData *instance_data;

-  FullScreenTextures(IMAGE_InstanceData *instance_data) : instance_data(instance_data)
+ public:
+  ScreenTileTextures(IMAGE_InstanceData *instance_data) : instance_data(instance_data)
  {
  }

+  /**
+   * \brief Ensure enough texture infos are allocated in `instance_data`.
+   */
+  void ensure_texture_infos()
+  {
+    instance_data->texture_infos.resize(TexturesRequired);
+  }
+
  /**
   * \brief Update the uv and region bounds of all texture_infos of instance_data.
   */
  void update_bounds(const ARegion *region)
  {
-    // determine uv_area of the region.
+    /* determine uv_area of the region. */
+    Vector<TextureInfo *> unassigned_textures;
    float4x4 mat = float4x4(instance_data->ss_to_texture).inverted();
    float2 region_uv_min = float2(mat * float3(0.0f, 0.0f, 0.0f));
    float2 region_uv_max = float2(mat * float3(1.0f, 1.0f, 0.0f));
    float2 region_uv_span = region_uv_max - region_uv_min;
+
+    /* Calculate uv coordinates of each vert in the grid of textures. */
+
+    /* Construct the uv bounds of the 4 textures that are needed to fill the region. */
+    Vector<TextureInfoBounds> info_bounds = create_uv_bounds(region_uv_span, region_uv_min);
+    assign_texture_infos_by_uv_bounds(info_bounds, unassigned_textures);
+    assign_unused_texture_infos(info_bounds, unassigned_textures);
+
+    /* Calculate the region bounds from the uv bounds. */
    rctf region_uv_bounds;
    BLI_rctf_init(
        &region_uv_bounds, region_uv_min.x, region_uv_max.x, region_uv_min.y, region_uv_max.y);
+    update_region_bounds_from_uv_bounds(region_uv_bounds, float2(region->winx, region->winy));
+  }

-    /* Calculate 9 coordinates that will be used as uv bounds of the 4 textures. */
-    float2 onscreen_multiple = (blender::math::floor(region_uv_min / region_uv_span) +
+  void ensure_gpu_textures_allocation()
+  {
+    float2 viewport_size = DRW_viewport_size_get();
+    int2 texture_size(ceil(viewport_size.x / Divisions), ceil(viewport_size.y / Divisions));
+    for (TextureInfo &info : instance_data->texture_infos) {
+      info.ensure_gpu_texture(texture_size);
+    }
+  }
+
+ private:
+  Vector<TextureInfoBounds> create_uv_bounds(float2 region_uv_span, float2 region_uv_min)
+  {
+    float2 uv_coords[VerticesPerDimension][VerticesPerDimension];
+    float2 region_tile_uv_span = region_uv_span / float2(float(Divisions));
+    float2 onscreen_multiple = (blender::math::floor(region_uv_min / region_tile_uv_span) +
                                float2(1.0f)) *
-                               region_uv_span;
-    BLI_assert(onscreen_multiple.x > region_uv_min.x);
-    BLI_assert(onscreen_multiple.y > region_uv_min.y);
-    BLI_assert(onscreen_multiple.x < region_uv_max.x);
-    BLI_assert(onscreen_multiple.y < region_uv_max.y);
-    float2 uv_coords[3][3];
-    uv_coords[0][0] = onscreen_multiple + float2(-region_uv_span.x, -region_uv_span.y);
-    uv_coords[0][1] = onscreen_multiple + float2(-region_uv_span.x, 0.0);
-    uv_coords[0][2] = onscreen_multiple + float2(-region_uv_span.x, region_uv_span.y);
-    uv_coords[1][0] = onscreen_multiple + float2(0.0f, -region_uv_span.y);
-    uv_coords[1][1] = onscreen_multiple + float2(0.0f, 0.0);
-    uv_coords[1][2] = onscreen_multiple + float2(0.0f, region_uv_span.y);
-    uv_coords[2][0] = onscreen_multiple + float2(region_uv_span.x, -region_uv_span.y);
-    uv_coords[2][1] = onscreen_multiple + float2(region_uv_span.x, 0.0);
-    uv_coords[2][2] = onscreen_multiple + float2(region_uv_span.x, region_uv_span.y);
+                               region_tile_uv_span;
+    for (int y = 0; y < VerticesPerDimension; y++) {
+      for (int x = 0; x < VerticesPerDimension; x++) {
+        uv_coords[x][y] = region_tile_uv_span * float2(float(x - 1), float(y - 1)) +
+                          onscreen_multiple;
+      }
+    }

-    /* Construct the uv bounds of the 4 textures that are needed to fill the region. */
-    Vector<TextureInfo *> unassigned_textures;
-    struct TextureInfoBounds {
-      TextureInfo *info = nullptr;
-      rctf uv_bounds;
-    };
-    TextureInfoBounds bottom_left;
-    TextureInfoBounds bottom_right;
-    TextureInfoBounds top_left;
-    TextureInfoBounds top_right;
+    Vector<TextureInfoBounds> info_bounds;
+    for (int x = 0; x < TexturesPerDimension; x++) {
+      for (int y = 0; y < TexturesPerDimension; y++) {
+        TextureInfoBounds texture_info_bounds;
+        BLI_rctf_init(&texture_info_bounds.uv_bounds,
+                      uv_coords[x][y].x,
+                      uv_coords[x + 1][y + 1].x,
+                      uv_coords[x][y].y,
+                      uv_coords[x + 1][y + 1].y);
+        info_bounds.append(texture_info_bounds);
+      }
+    }
+    return info_bounds;
+  }

-    BLI_rctf_init(&bottom_left.uv_bounds,
-                  uv_coords[0][0].x,
-                  uv_coords[1][1].x,
-                  uv_coords[0][0].y,
-                  uv_coords[1][1].y);
-    BLI_rctf_init(&bottom_right.uv_bounds,
-                  uv_coords[1][0].x,
-                  uv_coords[2][1].x,
-                  uv_coords[1][0].y,
-                  uv_coords[2][1].y);
-    BLI_rctf_init(&top_left.uv_bounds,
-                  uv_coords[0][1].x,
-                  uv_coords[1][2].x,
-                  uv_coords[0][1].y,
-                  uv_coords[1][2].y);
-    BLI_rctf_init(&top_right.uv_bounds,
-                  uv_coords[1][1].x,
-                  uv_coords[2][2].x,
-                  uv_coords[1][1].y,
-                  uv_coords[2][2].y);
-    Vector<TextureInfoBounds *> info_bounds;
-    info_bounds.append(&bottom_left);
-    info_bounds.append(&bottom_right);
-    info_bounds.append(&top_left);
-    info_bounds.append(&top_right);
-
-    /* Assign any existing texture that matches uv bounds. */
+  void assign_texture_infos_by_uv_bounds(Vector<TextureInfoBounds> &info_bounds,
+                                         Vector<TextureInfo *> &r_unassigned_textures)
+  {
    for (TextureInfo &info : instance_data->texture_infos) {
      bool assigned = false;
-      for (TextureInfoBounds *info_bound : info_bounds) {
-        if (info_bound->info == nullptr &&
-            BLI_rctf_compare(&info_bound->uv_bounds, &info.clipping_uv_bounds, 0.001)) {
-          info_bound->info = &info;
+      for (TextureInfoBounds &info_bound : info_bounds) {
+        if (info_bound.info == nullptr &&
+            BLI_rctf_compare(&info_bound.uv_bounds, &info.clipping_uv_bounds, 0.001)) {
+          info_bound.info = &info;
          assigned = true;
          break;
        }
      }
      if (!assigned) {
-        unassigned_textures.append(&info);
+        r_unassigned_textures.append(&info);
      }
    }
+  }

-    /* Assign free textures to bounds that weren't found. */
-    for (TextureInfoBounds *info_bound : info_bounds) {
-      if (info_bound->info == nullptr) {
-        info_bound->info = unassigned_textures.pop_last();
-        info_bound->info->need_full_update = true;
-        info_bound->info->clipping_uv_bounds = info_bound->uv_bounds;
+  void assign_unused_texture_infos(Vector<TextureInfoBounds> &info_bounds,
+                                   Vector<TextureInfo *> &unassigned_textures)
+  {
+    for (TextureInfoBounds &info_bound : info_bounds) {
+      if (info_bound.info == nullptr) {
+        info_bound.info = unassigned_textures.pop_last();
+        info_bound.info->need_full_update = true;
+        info_bound.info->clipping_uv_bounds = info_bound.uv_bounds;
      }
    }
+  }

-    /* Calculate the region bounds from the uv bounds. */
+  void update_region_bounds_from_uv_bounds(const rctf &region_uv_bounds, const float2 region_size)
+  {
    rctf region_bounds;
-    BLI_rctf_init(&region_bounds, 0.0, region->winx, 0.0, region->winy);
+    BLI_rctf_init(&region_bounds, 0.0, region_size.x, 0.0, region_size.y);
    float4x4 uv_to_screen;
    BLI_rctf_transform_calc_m4_pivot_min(&region_uv_bounds, &region_bounds, uv_to_screen.ptr());
    for (TextureInfo &info : instance_data->texture_infos) {
-      info.calc_region_bounds_from_uv_bounds(uv_to_screen);
+      info.update_region_bounds_from_uv_bounds(uv_to_screen);
    }
  }
 };
@@ -166,13 +192,12 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
    DRWShadingGroup *shgrp = DRW_shgroup_create(shader, instance_data->passes.image_pass);
    DRW_shgroup_uniform_vec2_copy(shgrp, "farNearDistances", sh_params.far_near);
    DRW_shgroup_uniform_vec4_copy(shgrp, "shuffle", sh_params.shuffle);
-    DRW_shgroup_uniform_int_copy(shgrp, "drawFlags", sh_params.flags);
+    DRW_shgroup_uniform_int_copy(shgrp, "drawFlags", static_cast<int32_t>(sh_params.flags));
    DRW_shgroup_uniform_bool_copy(shgrp, "imgPremultiplied", sh_params.use_premul_alpha);
    DRW_shgroup_uniform_texture(shgrp, "depth_texture", dtxl->depth);
    float image_mat[4][4];
    unit_m4(image_mat);
-    for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-      const TextureInfo &info = instance_data->texture_infos[i];
+    for (const TextureInfo &info : instance_data->texture_infos) {
      DRWShadingGroup *shgrp_sub = DRW_shgroup_create_sub(shgrp);
      DRW_shgroup_uniform_ivec2_copy(shgrp_sub, "offset", info.offset());
      DRW_shgroup_uniform_texture_ex(shgrp_sub, "imageTexture", info.texture, GPU_SAMPLER_DEFAULT);
@@ -200,8 +225,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
      tile_user = *image_user;
    }

-    for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-      const TextureInfo &info = instance_data.texture_infos[i];
+    for (const TextureInfo &info : instance_data.texture_infos) {
      LISTBASE_FOREACH (ImageTile *, image_tile_ptr, &image->tiles) {
        const ImageTileWrapper image_tile(image_tile_ptr);
        const int tile_x = image_tile.get_tile_x_offset();
@@ -305,8 +329,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
      const float tile_width = float(iterator.tile_data.tile_buffer->x);
      const float tile_height = float(iterator.tile_data.tile_buffer->y);

-      for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-        const TextureInfo &info = instance_data.texture_infos[i];
+      for (const TextureInfo &info : instance_data.texture_infos) {
        /* Dirty images will receive a full update. No need to do a partial one now. */
        if (info.need_full_update) {
          continue;
@@ -407,8 +430,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
  void do_full_update_for_dirty_textures(IMAGE_InstanceData &instance_data,
                                         const ImageUser *image_user) const
  {
-    for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-      TextureInfo &info = instance_data.texture_infos[i];
+    for (TextureInfo &info : instance_data.texture_infos) {
      if (!info.need_full_update) {
        continue;
      }
@@ -499,33 +521,35 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
  }

 public:
-  void cache_init(IMAGE_Data *vedata) const override
+  void begin_sync(IMAGE_Data *vedata) const override
  {
    IMAGE_InstanceData *instance_data = vedata->instance_data;
    instance_data->passes.image_pass = create_image_pass();
    instance_data->passes.depth_pass = create_depth_pass();
  }

-  void cache_image(IMAGE_Data *vedata, Image *image, ImageUser *iuser) const override
+  void image_sync(IMAGE_Data *vedata, Image *image, ImageUser *iuser) const override
  {
    const DRWContextState *draw_ctx = DRW_context_state_get();
    IMAGE_InstanceData *instance_data = vedata->instance_data;
+
    TextureMethod method(instance_data);
+    method.ensure_texture_infos();

    instance_data->partial_update.ensure_image(image);
    instance_data->clear_need_full_update_flag();
    instance_data->float_buffers.reset_usage_flags();

-    /* Step: Find out which screen space textures are needed to draw on the screen. Remove the
-     * screen space textures that aren't needed. */
+    /* Step: Find out which screen space textures are needed to draw on the screen. Recycle
+     * textures that are not on screen anymore. */
    const ARegion *region = draw_ctx->region;
    method.update_bounds(region);

-    /* Check for changes in the image user compared to the last time. */
+    /* Step: Check for changes in the image user compared to the last time. */
    instance_data->update_image_usage(iuser);

    /* Step: Update the GPU textures based on the changes in the image. */
-    instance_data->update_gpu_texture_allocations();
+    method.ensure_gpu_textures_allocation();
    update_textures(*instance_data, image, iuser);

    /* Step: Add the GPU textures to the shgroup. */
@@ -542,7 +566,7 @@ template<typename TextureMethod> class ScreenSpaceDrawingMode : public AbstractD
    instance_data->float_buffers.remove_unused_buffers();
  }

-  void draw_scene(IMAGE_Data *vedata) const override
+  void draw_viewport(IMAGE_Data *vedata) const override
  {
    IMAGE_InstanceData *instance_data = vedata->instance_data;

--- a/source/blender/draw/engines/image/image_engine.cc
+++ b/source/blender/draw/engines/image/image_engine.cc
@@ -53,7 +53,7 @@ template<
     *
     * Useful during development to switch between drawing implementations.
     */
-    typename DrawingMode = ScreenSpaceDrawingMode<FullScreenTextures>>
+    typename DrawingMode = ScreenSpaceDrawingMode<ScreenTileTextures<1>>>
 class ImageEngine {
 private:
  const DRWContextState *draw_ctx;
@@ -69,10 +69,10 @@ class ImageEngine {

  virtual ~ImageEngine() = default;

-  void cache_init()
+  void begin_sync()
  {
    IMAGE_InstanceData *instance_data = vedata->instance_data;
-    drawing_mode.cache_init(vedata);
+    drawing_mode.begin_sync(vedata);

    /* Setup full screen view matrix. */
    const ARegion *region = draw_ctx->region;
@@ -82,7 +82,7 @@ class ImageEngine {
    instance_data->view = DRW_view_create(viewmat, winmat, nullptr, nullptr, nullptr);
  }

-  void cache_populate()
+  void image_sync()
  {
    IMAGE_InstanceData *instance_data = vedata->instance_data;
    Main *bmain = CTX_data_main(draw_ctx->evil_C);
@@ -113,7 +113,7 @@ class ImageEngine {
    else {
      BKE_image_multiview_index(instance_data->image, iuser);
    }
-    drawing_mode.cache_image(vedata, instance_data->image, iuser);
+    drawing_mode.image_sync(vedata, instance_data->image, iuser);
  }

  void draw_finish()
@@ -124,11 +124,11 @@ class ImageEngine {
    instance_data->image = nullptr;
  }

-  void draw_scene()
+  void draw_viewport()
  {
-    drawing_mode.draw_scene(vedata);
+    drawing_mode.draw_viewport(vedata);
  }
-};  // namespace blender::draw::image_engine
+};

 /* -------------------------------------------------------------------- */
 /** \name Engine Callbacks
@@ -146,8 +146,8 @@ static void IMAGE_cache_init(void *vedata)
 {
  const DRWContextState *draw_ctx = DRW_context_state_get();
  ImageEngine image_engine(draw_ctx, static_cast<IMAGE_Data *>(vedata));
-  image_engine.cache_init();
-  image_engine.cache_populate();
+  image_engine.begin_sync();
+  image_engine.image_sync();
 }

 static void IMAGE_cache_populate(void * /*vedata*/, Object * /*ob*/)
@@ -159,7 +159,7 @@ static void IMAGE_draw_scene(void *vedata)
 {
  const DRWContextState *draw_ctx = DRW_context_state_get();
  ImageEngine image_engine(draw_ctx, static_cast<IMAGE_Data *>(vedata));
-  image_engine.draw_scene();
+  image_engine.draw_viewport();
  image_engine.draw_finish();
 }

--- a/source/blender/draw/engines/image/image_enums.hh
+++ b/source/blender/draw/engines/image/image_enums.hh
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ * Copyright 2021 Blender Foundation. */
+
+/** \file
+ * \ingroup draw_engine
+ */
+
+#pragma once
+
+#include "BLI_utildefines.h"
+
+namespace blender::draw::image_engine {
+
+/* Shader parameters. */
+enum class ImageDrawFlags {
+  Default = 0,
+  ShowAlpha = (1 << 0),
+  ApplyAlpha = (1 << 1),
+  Shuffling = (1 << 2),
+  Depth = (1 << 3)
+};
+ENUM_OPERATORS(ImageDrawFlags, ImageDrawFlags::Depth);
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_instance_data.hh
+++ b/source/blender/draw/engines/image/image_instance_data.hh
@@ -19,14 +19,7 @@

 #include "DRW_render.h"

-/**
- * \brief max allowed textures to use by the ScreenSpaceDrawingMode.
- *
- * The image engine uses 4 full screen textures to draw the image. With 4 textures it is possible
- * to pan the screen where only the texture needs to be updated when they are not visible on the
- * screen.
- */
-constexpr int SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN = 4;
+namespace blender::draw::image_engine {

 struct IMAGE_InstanceData {
  struct Image *image;
@@ -59,7 +52,8 @@ struct IMAGE_InstanceData {

  /** \brief Transform matrix to convert a normalized screen space coordinates to texture space. */
  float ss_to_texture[4][4];
-  TextureInfo texture_infos[SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN];
+
+  Vector<TextureInfo> texture_infos;

 public:
  virtual ~IMAGE_InstanceData() = default;
@@ -73,33 +67,9 @@ struct IMAGE_InstanceData {
    reset_need_full_update(true);
  }

-  void update_gpu_texture_allocations()
-  {
-    for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-      TextureInfo &info = texture_infos[i];
-      const bool is_allocated = info.texture != nullptr;
-      const bool resolution_changed = assign_if_different(info.last_viewport_size,
-                                                          float2(DRW_viewport_size_get()));
-      const bool should_be_freed = is_allocated && resolution_changed;
-      const bool should_be_created = !is_allocated || resolution_changed;
-
-      if (should_be_freed) {
-        GPU_texture_free(info.texture);
-        info.texture = nullptr;
-      }
-
-      if (should_be_created) {
-        DRW_texture_ensure_fullscreen_2d(
-            &info.texture, GPU_RGBA16F, static_cast<DRWTextureFlag>(0));
-      }
-      info.need_full_update |= should_be_created;
-    }
-  }
-
  void update_batches()
  {
-    for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-      TextureInfo &info = texture_infos[i];
+    for (TextureInfo &info : texture_infos) {
      BatchUpdater batch_updater(info);
      batch_updater.update_batch();
    }
@@ -119,8 +89,10 @@ struct IMAGE_InstanceData {
  /** \brief Set dirty flag of all texture slots to the given value. */
  void reset_need_full_update(bool new_value)
  {
-    for (int i = 0; i < SCREEN_SPACE_DRAWING_MODE_TEXTURE_LEN; i++) {
-      texture_infos[i].need_full_update = new_value;
+    for (TextureInfo &info : texture_infos) {
+      info.need_full_update = new_value;
    }
  }
 };
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_private.hh
+++ b/source/blender/draw/engines/image/image_private.hh
@@ -34,11 +34,6 @@ struct IMAGE_Data {
  IMAGE_InstanceData *instance_data;
 };

-/* Shader parameters. */
-#define IMAGE_DRAW_FLAG_SHOW_ALPHA (1 << 0)
-#define IMAGE_DRAW_FLAG_APPLY_ALPHA (1 << 1)
-#define IMAGE_DRAW_FLAG_SHUFFLING (1 << 2)
-#define IMAGE_DRAW_FLAG_DEPTH (1 << 3)

 /**
 * Abstract class for a drawing mode of the image engine.
@@ -49,9 +44,9 @@ struct IMAGE_Data {
 class AbstractDrawingMode {
 public:
  virtual ~AbstractDrawingMode() = default;
-  virtual void cache_init(IMAGE_Data *vedata) const = 0;
-  virtual void cache_image(IMAGE_Data *vedata, Image *image, ImageUser *iuser) const = 0;
-  virtual void draw_scene(IMAGE_Data *vedata) const = 0;
+  virtual void begin_sync(IMAGE_Data *vedata) const = 0;
+  virtual void image_sync(IMAGE_Data *vedata, Image *image, ImageUser *iuser) const = 0;
+  virtual void draw_viewport(IMAGE_Data *vedata) const = 0;
  virtual void draw_finish(IMAGE_Data *vedata) const = 0;
 };

--- a/source/blender/draw/engines/image/image_shader_params.hh
+++ b/source/blender/draw/engines/image/image_shader_params.hh
@@ -17,17 +17,20 @@

 #include "BLI_math.h"

+#include "image_enums.hh"
 #include "image_space.hh"

+namespace blender::draw::image_engine {
+
 struct ShaderParameters {
-  int flags = 0;
+  ImageDrawFlags flags = ImageDrawFlags::Default;
  float shuffle[4];
  float far_near[2];
  bool use_premul_alpha = false;

  void update(AbstractSpaceAccessor *space, const Scene *scene, Image *image, ImBuf *image_buffer)
  {
-    flags = 0;
+    flags = ImageDrawFlags::Default;
    copy_v4_fl(shuffle, 1.0f);
    copy_v2_fl2(far_near, 100.0f, 0.0f);

@@ -40,3 +43,5 @@ struct ShaderParameters {
    space->get_shader_parameters(*this, image_buffer);
  }
 };
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_space.hh
+++ b/source/blender/draw/engines/image/image_space.hh
@@ -7,6 +7,8 @@

 #pragma once

+namespace blender::draw::image_engine {
+
 struct ShaderParameters;

 /**
@@ -59,16 +61,6 @@ class AbstractSpaceAccessor {
  virtual void get_shader_parameters(ShaderParameters &r_shader_parameters,
                                     ImBuf *image_buffer) = 0;

-  /**
-   * Retrieve the gpu textures to draw.
-   */
-  virtual void get_gpu_textures(Image *image,
-                                ImageUser *iuser,
-                                ImBuf *image_buffer,
-                                GPUTexture **r_gpu_texture,
-                                bool *r_owns_texture,
-                                GPUTexture **r_tex_tile_data) = 0;
-
  /** \brief Is (wrap) repeat option enabled in the space. */
  virtual bool use_tile_drawing() const = 0;

@@ -79,5 +71,6 @@ class AbstractSpaceAccessor {
  virtual void init_ss_to_texture_matrix(const ARegion *region,
                                         const float image_resolution[2],
                                         float r_uv_to_texture[4][4]) const = 0;
+};

-};  // namespace blender::draw::image_engine
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_space_image.hh
+++ b/source/blender/draw/engines/image/image_space_image.hh
@@ -44,104 +44,44 @@ class SpaceImageAccessor : public AbstractSpaceAccessor {
    const int sima_flag = sima->flag & ED_space_image_get_display_channel_mask(image_buffer);
    if ((sima_flag & SI_USE_ALPHA) != 0) {
      /* Show RGBA */
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHOW_ALPHA | IMAGE_DRAW_FLAG_APPLY_ALPHA;
+      r_shader_parameters.flags |= ImageDrawFlags::ShowAlpha | ImageDrawFlags::ApplyAlpha;
    }
    else if ((sima_flag & SI_SHOW_ALPHA) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      copy_v4_fl4(r_shader_parameters.shuffle, 0.0f, 0.0f, 0.0f, 1.0f);
    }
    else if ((sima_flag & SI_SHOW_ZBUF) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_DEPTH | IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Depth | ImageDrawFlags::Shuffling;
      copy_v4_fl4(r_shader_parameters.shuffle, 1.0f, 0.0f, 0.0f, 0.0f);
    }
    else if ((sima_flag & SI_SHOW_R) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      if (IMB_alpha_affects_rgb(image_buffer)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
      copy_v4_fl4(r_shader_parameters.shuffle, 1.0f, 0.0f, 0.0f, 0.0f);
    }
    else if ((sima_flag & SI_SHOW_G) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      if (IMB_alpha_affects_rgb(image_buffer)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
      copy_v4_fl4(r_shader_parameters.shuffle, 0.0f, 1.0f, 0.0f, 0.0f);
    }
    else if ((sima_flag & SI_SHOW_B) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      if (IMB_alpha_affects_rgb(image_buffer)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
      copy_v4_fl4(r_shader_parameters.shuffle, 0.0f, 0.0f, 1.0f, 0.0f);
    }
    else /* RGB */ {
      if (IMB_alpha_affects_rgb(image_buffer)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
    }
  }

-  void get_gpu_textures(Image *image,
-                        ImageUser *iuser,
-                        ImBuf *image_buffer,
-                        GPUTexture **r_gpu_texture,
-                        bool *r_owns_texture,
-                        GPUTexture **r_tex_tile_data) override
-  {
-    if (image->rr != nullptr) {
-      /* Update multi-index and pass for the current eye. */
-      BKE_image_multilayer_index(image->rr, iuser);
-    }
-    else {
-      BKE_image_multiview_index(image, iuser);
-    }
-
-    if (image_buffer == nullptr) {
-      return;
-    }
-
-    if (image_buffer->rect == nullptr && image_buffer->rect_float == nullptr) {
-      /* This code-path is only supposed to happen when drawing a lazily-allocatable render result.
-       * In all the other cases the `ED_space_image_acquire_buffer()` is expected to return nullptr
-       * as an image buffer when it has no pixels. */
-
-      BLI_assert(image->type == IMA_TYPE_R_RESULT);
-
-      float zero[4] = {0, 0, 0, 0};
-      *r_gpu_texture = GPU_texture_create_2d(__func__, 1, 1, 0, GPU_RGBA16F, zero);
-      *r_owns_texture = true;
-      return;
-    }
-
-    const int sima_flag = sima->flag & ED_space_image_get_display_channel_mask(image_buffer);
-    if (sima_flag & SI_SHOW_ZBUF &&
-        (image_buffer->zbuf || image_buffer->zbuf_float || (image_buffer->channels == 1))) {
-      if (image_buffer->zbuf) {
-        BLI_assert_msg(0, "Integer based depth buffers not supported");
-      }
-      else if (image_buffer->zbuf_float) {
-        *r_gpu_texture = GPU_texture_create_2d(
-            __func__, image_buffer->x, image_buffer->y, 0, GPU_R16F, image_buffer->zbuf_float);
-        *r_owns_texture = true;
-      }
-      else if (image_buffer->rect_float && image_buffer->channels == 1) {
-        *r_gpu_texture = GPU_texture_create_2d(
-            __func__, image_buffer->x, image_buffer->y, 0, GPU_R16F, image_buffer->rect_float);
-        *r_owns_texture = true;
-      }
-    }
-    else if (image->source == IMA_SRC_TILED) {
-      *r_gpu_texture = BKE_image_get_gpu_tiles(image, iuser, image_buffer);
-      *r_tex_tile_data = BKE_image_get_gpu_tilemap(image, iuser, nullptr);
-      *r_owns_texture = false;
-    }
-    else {
-      *r_gpu_texture = BKE_image_get_gpu_texture(image, iuser, image_buffer);
-      *r_owns_texture = false;
-    }
-  }
-
  bool use_tile_drawing() const override
  {
    return (sima->flag & SI_DRAW_TILE) != 0;
--- a/source/blender/draw/engines/image/image_space_node.hh
+++ b/source/blender/draw/engines/image/image_space_node.hh
@@ -43,52 +43,40 @@ class SpaceNodeAccessor : public AbstractSpaceAccessor {
  {
    if ((snode->flag & SNODE_USE_ALPHA) != 0) {
      /* Show RGBA */
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHOW_ALPHA | IMAGE_DRAW_FLAG_APPLY_ALPHA;
+      r_shader_parameters.flags |= ImageDrawFlags::ShowAlpha | ImageDrawFlags::ApplyAlpha;
    }
    else if ((snode->flag & SNODE_SHOW_ALPHA) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      copy_v4_fl4(r_shader_parameters.shuffle, 0.0f, 0.0f, 0.0f, 1.0f);
    }
    else if ((snode->flag & SNODE_SHOW_R) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      if (IMB_alpha_affects_rgb(ibuf)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
      copy_v4_fl4(r_shader_parameters.shuffle, 1.0f, 0.0f, 0.0f, 0.0f);
    }
    else if ((snode->flag & SNODE_SHOW_G) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      if (IMB_alpha_affects_rgb(ibuf)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
      copy_v4_fl4(r_shader_parameters.shuffle, 0.0f, 1.0f, 0.0f, 0.0f);
    }
    else if ((snode->flag & SNODE_SHOW_B) != 0) {
-      r_shader_parameters.flags |= IMAGE_DRAW_FLAG_SHUFFLING;
+      r_shader_parameters.flags |= ImageDrawFlags::Shuffling;
      if (IMB_alpha_affects_rgb(ibuf)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
      copy_v4_fl4(r_shader_parameters.shuffle, 0.0f, 0.0f, 1.0f, 0.0f);
    }
    else /* RGB */ {
      if (IMB_alpha_affects_rgb(ibuf)) {
-        r_shader_parameters.flags |= IMAGE_DRAW_FLAG_APPLY_ALPHA;
+        r_shader_parameters.flags |= ImageDrawFlags::ApplyAlpha;
      }
    }
  }

-  void get_gpu_textures(Image *image,
-                        ImageUser *iuser,
-                        ImBuf *ibuf,
-                        GPUTexture **r_gpu_texture,
-                        bool *r_owns_texture,
-                        GPUTexture **r_tex_tile_data) override
-  {
-    *r_gpu_texture = BKE_image_get_gpu_texture(image, iuser, ibuf);
-    *r_owns_texture = false;
-    *r_tex_tile_data = nullptr;
-  }
-
  bool use_tile_drawing() const override
  {
    return false;
--- a/source/blender/draw/engines/image/image_texture_info.hh
+++ b/source/blender/draw/engines/image/image_texture_info.hh
@@ -13,6 +13,8 @@
 #include "GPU_batch.h"
 #include "GPU_texture.h"

+namespace blender::draw::image_engine {
+
 struct TextureInfo {
  /**
   * \brief does this texture need a full update.
@@ -33,14 +35,14 @@ struct TextureInfo {
   * `pos` (2xF32) is relative to the origin of the space.
   * `uv` (2xF32) reflect the uv bounds.
   */
-  GPUBatch *batch;
+  GPUBatch *batch = nullptr;

  /**
   * \brief GPU Texture for a partial region of the image editor.
   */
-  GPUTexture *texture;
+  GPUTexture *texture = nullptr;

-  float2 last_viewport_size = float2(0.0f, 0.0f);
+  int2 last_texture_size = int2(0);

  ~TextureInfo()
  {
@@ -69,7 +71,7 @@ struct TextureInfo {
  /**
   * \brief Update the region bounds from the uv bounds by applying the given transform matrix.
   */
-  void calc_region_bounds_from_uv_bounds(const float4x4 &uv_to_region)
+  void update_region_bounds_from_uv_bounds(const float4x4 &uv_to_region)
  {
    float3 bottom_left_uv = float3(clipping_uv_bounds.xmin, clipping_uv_bounds.ymin, 0.0f);
    float3 top_right_uv = float3(clipping_uv_bounds.xmax, clipping_uv_bounds.ymax, 0.0f);
@@ -81,4 +83,28 @@ struct TextureInfo {
                  bottom_left_region.y,
                  top_right_region.y);
  }
+
+  void ensure_gpu_texture(int2 texture_size)
+  {
+    const bool is_allocated = texture != nullptr;
+    const bool resolution_changed = assign_if_different(last_texture_size, texture_size);
+    const bool should_be_freed = is_allocated && resolution_changed;
+    const bool should_be_created = !is_allocated || resolution_changed;
+
+    if (should_be_freed) {
+      GPU_texture_free(texture);
+      texture = nullptr;
+    }
+
+    if (should_be_created) {
+      texture = DRW_texture_create_2d_ex(UNPACK2(texture_size),
+                                         GPU_RGBA16F,
+                                         GPU_TEXTURE_USAGE_GENERAL,
+                                         static_cast<DRWTextureFlag>(0),
+                                         nullptr);
+    }
+    need_full_update |= should_be_created;
+  }
 };
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/image/image_usage.hh
+++ b/source/blender/draw/engines/image/image_usage.hh
@@ -7,6 +7,8 @@

 #pragma once

+namespace blender::draw::image_engine {
+
 /**
 * ImageUsage contains data of the image and image user to identify changes that require a rebuild
 * the texture slots.
@@ -47,3 +49,5 @@ struct ImageUsage {
    return !(*this == other);
  }
 };
+
+}  // namespace blender::draw::image_engine
--- a/source/blender/draw/engines/overlay/overlay_edit_uv.cc
+++ b/source/blender/draw/engines/overlay/overlay_edit_uv.cc
@@ -80,7 +80,8 @@ static GPUTexture *edit_uv_mask_texture(

  /* Free memory. */
  BKE_maskrasterize_handle_free(handle);
-  GPUTexture *texture = GPU_texture_create_2d(mask->id.name, width, height, 1, GPU_R16F, buffer);
+  GPUTexture *texture = GPU_texture_create_2d_ex(
+      mask->id.name, width, height, 1, GPU_R16F, GPU_TEXTURE_USAGE_SHADER_READ, buffer);
  MEM_freeN(buffer);
  return texture;
 }
--- a/source/blender/draw/engines/overlay/shaders/infos/overlay_armature_info.hh
+++ b/source/blender/draw/engines/overlay/shaders/infos/overlay_armature_info.hh
@@ -91,7 +91,8 @@ GPU_SHADER_CREATE_INFO(overlay_armature_shape_outline)
    .additional_info("overlay_frag_output", "overlay_armature_common", "draw_globals");

 GPU_SHADER_CREATE_INFO(overlay_armature_shape_outline_no_geom)
-    // .do_static_compilation(true) /* TODO fix on GL */
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .vertex_in(0, Type::VEC3, "pos")
    .vertex_in(1, Type::VEC3, "snor")
    /* Per instance. */
@@ -107,7 +108,8 @@ GPU_SHADER_CREATE_INFO(overlay_armature_shape_outline_clipped)
    .additional_info("overlay_armature_shape_outline", "drw_clipped");

 GPU_SHADER_CREATE_INFO(overlay_armature_shape_outline_clipped_no_geom)
-    // .do_static_compilation(true) /* TODO fix on GL */
+    .metal_backend_only(true)
+    .do_static_compilation(true)
    .additional_info("overlay_armature_shape_outline_no_geom", "drw_clipped");

 GPU_SHADER_INTERFACE_INFO(overlay_armature_shape_solid_iface, "")
--- a/Show More
+++ b/Show More