292 changed files with 4099 additions and 2587 deletions
--- a/.clang-format
+++ b/.clang-format
@ -61,17 +61,17 @@ ContinuationIndentWidth: 4
 # This tries to match Blender's style as much as possible. One
 BreakBeforeBraces: Custom
 BraceWrapping: {
-    AfterClass: 'false'
-    AfterControlStatement: 'false'
-    AfterEnum : 'false'
-    AfterFunction : 'true'
-    AfterNamespace : 'false'
-    AfterStruct : 'false'
-    AfterUnion : 'false'
-    BeforeCatch : 'true'
-    BeforeElse : 'true'
-    IndentBraces : 'false'
-    AfterObjCDeclaration: 'true'
+    AfterClass: 'false',
+    AfterControlStatement: 'false',
+    AfterEnum : 'false',
+    AfterFunction : 'true',
+    AfterNamespace : 'false',
+    AfterStruct : 'false',
+    AfterUnion : 'false',
+    BeforeCatch : 'true',
+    BeforeElse : 'true',
+    IndentBraces : 'false',
+    AfterObjCDeclaration: 'true',
 }

 # For switch statements, indent the cases.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -501,12 +501,14 @@ endif()
 if(NOT APPLE)
  option(WITH_CYCLES_DEVICE_ONEAPI "Enable Cycles oneAPI compute support" OFF)
  option(WITH_CYCLES_ONEAPI_BINARIES "Enable Ahead-Of-Time compilation for Cycles oneAPI device" OFF)
+  option(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION "Switch target of oneAPI implementation from SYCL devices to Host Task (single thread on CPU). This option is only for debugging purposes." OFF)

  # https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
  # acm-g10 is the target for the first Intel Arc Alchemist GPUs.
  set(CYCLES_ONEAPI_SPIR64_GEN_DEVICES "acm-g10" CACHE STRING "oneAPI Intel GPU architectures to build binaries for")
  set(CYCLES_ONEAPI_SYCL_TARGETS spir64 spir64_gen CACHE STRING "oneAPI targets to build AOT binaries for")

+  mark_as_advanced(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
  mark_as_advanced(CYCLES_ONEAPI_SPIR64_GEN_DEVICES)
  mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS)
 endif()
@ -830,27 +832,17 @@ endif()
 # enable boost for cycles, audaspace or i18n
 # otherwise if the user disabled

-set_and_warn_dependency(WITH_BOOST WITH_CYCLES         OFF)
 set_and_warn_dependency(WITH_BOOST WITH_INTERNATIONAL  OFF)
 set_and_warn_dependency(WITH_BOOST WITH_OPENVDB        OFF)
-set_and_warn_dependency(WITH_BOOST WITH_OPENCOLORIO    OFF)
 set_and_warn_dependency(WITH_BOOST WITH_QUADRIFLOW     OFF)
 set_and_warn_dependency(WITH_BOOST WITH_USD            OFF)
-set_and_warn_dependency(WITH_BOOST WITH_ALEMBIC        OFF)
 if(WITH_CYCLES)
+  set_and_warn_dependency(WITH_BOOST   WITH_CYCLES_OSL   OFF)
  set_and_warn_dependency(WITH_PUGIXML WITH_CYCLES_OSL   OFF)
 endif()
-set_and_warn_dependency(WITH_PUGIXML WITH_OPENIMAGEIO  OFF)
-
-if(WITH_BOOST AND NOT (WITH_CYCLES OR WITH_OPENIMAGEIO OR WITH_INTERNATIONAL OR
-  WITH_OPENVDB OR WITH_OPENCOLORIO OR WITH_USD OR WITH_ALEMBIC))
-  message(STATUS "No dependencies need 'WITH_BOOST' forcing WITH_BOOST=OFF")
-  set(WITH_BOOST OFF)
-endif()

 set_and_warn_dependency(WITH_TBB WITH_CYCLES            OFF)
 set_and_warn_dependency(WITH_TBB WITH_USD               OFF)
-set_and_warn_dependency(WITH_TBB WITH_OPENIMAGEDENOISE  OFF)
 set_and_warn_dependency(WITH_TBB WITH_OPENVDB           OFF)
 set_and_warn_dependency(WITH_TBB WITH_MOD_FLUID         OFF)

@ -859,14 +851,10 @@ set_and_warn_dependency(WITH_OPENVDB WITH_NANOVDB       OFF)

 # OpenVDB and OpenColorIO uses 'half' type from OpenEXR
 set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENVDB OFF)
-set_and_warn_dependency(WITH_IMAGE_OPENEXR WITH_OPENCOLORIO  OFF)

 # Haru needs `TIFFFaxBlackCodes` & `TIFFFaxWhiteCodes` symbols from TIFF.
 set_and_warn_dependency(WITH_IMAGE_TIFF WITH_HARU       OFF)

-# USD needs OpenSubDiv, since that is used by the Cycles Hydra render delegate.
-set_and_warn_dependency(WITH_OPENSUBDIV WITH_USD        OFF)
-
 # auto enable openimageio for cycles
 if(WITH_CYCLES)
  set(WITH_OPENIMAGEIO ON)
@ -880,17 +868,6 @@ else()
  set(WITH_CYCLES_OSL OFF)
 endif()

-# auto enable openimageio linking dependencies
-if(WITH_OPENIMAGEIO)
-  set(WITH_IMAGE_OPENEXR ON)
-  set(WITH_IMAGE_TIFF ON)
-endif()
-
-# auto enable alembic linking dependencies
-if(WITH_ALEMBIC)
-  set(WITH_IMAGE_OPENEXR ON)
-endif()
-
 # don't store paths to libs for portable distribution
 if(WITH_INSTALL_PORTABLE)
  set(CMAKE_SKIP_BUILD_RPATH TRUE)
@ -1093,14 +1070,6 @@ if(WITH_CYCLES)
      "Configure OIIO or disable WITH_CYCLES"
    )
  endif()
-  if(NOT WITH_BOOST)
-    message(
-      FATAL_ERROR
-      "Cycles requires WITH_BOOST, the library may not have been found. "
-      "Configure BOOST or disable WITH_CYCLES"
-    )
-  endif()
-
  if(WITH_CYCLES_OSL)
    if(NOT WITH_LLVM)
      message(
@ -2007,24 +1976,6 @@ if(0)
  print_all_vars()
 endif()

-set(LIBDIR_STALE)
-
-if(UNIX AND NOT APPLE)
-  # Only search for the path if it's found on the system.
-  if(EXISTS "../lib/linux_centos7_x86_64")
-    set(LIBDIR_STALE "/lib/linux_centos7_x86_64/")
-  endif()
-endif()
-
-if(LIBDIR_STALE)
-  print_cached_vars_containing_value(
-    "${LIBDIR_STALE}"
-    "\nWARNING: found cached references to old library paths!\n"
-    "\nIt is *strongly* recommended to reference updated library paths!\n"
-  )
-endif()
-unset(LIBDIR_STALE)
-
 # Should be the last step of configuration.
 if(POSTCONFIGURE_SCRIPT)
  include(${POSTCONFIGURE_SCRIPT})
--- a/build_files/cmake/macros.cmake
+++ b/build_files/cmake/macros.cmake
@ -1209,43 +1209,6 @@ function(print_all_vars)
  endforeach()
 endfunction()

-# Print a list of all cached variables with values containing `contents`.
-function(print_cached_vars_containing_value
-  contents
-  msg_header
-  msg_footer
-  )
-  set(_list_info)
-  set(_found)
-  get_cmake_property(_vars VARIABLES)
-  foreach(_var ${_vars})
-    if(DEFINED CACHE{${_var}})
-      # Skip "_" prefixed variables, these are used for internal book-keeping,
-      # not under user control.
-      string(FIND "${_var}" "_" _found)
-      if(NOT (_found EQUAL 0))
-        string(FIND "${${_var}}" "${contents}" _found)
-        if(NOT (_found EQUAL -1))
-          if(_found)
-            list(APPEND _list_info "${_var}=${${_var}}")
-          endif()
-        endif()
-      endif()
-    endif()
-  endforeach()
-  unset(_var)
-  unset(_vars)
-  unset(_found)
-  if(_list_info)
-    message(${msg_header})
-    foreach(_var ${_list_info})
-      message(" * ${_var}")
-    endforeach()
-    message(${msg_footer})
-  endif()
-  unset(_list_info)
-endfunction()
-
 macro(openmp_delayload
  projectname
  )
--- a/build_files/cmake/platform/platform_apple.cmake
+++ b/build_files/cmake/platform/platform_apple.cmake
@ -86,16 +86,14 @@ endif()

 if(WITH_USD)
  find_package(USD REQUIRED)
-  add_bundled_libraries(usd/lib)
 endif()
+add_bundled_libraries(usd/lib)

 if(WITH_MATERIALX)
  find_package(MaterialX)
  set_and_warn_library_found("MaterialX" MaterialX_FOUND WITH_MATERIALX)
-  if(WITH_MATERIALX)
-    add_bundled_libraries(materialx/lib)
-  endif()
 endif()
+add_bundled_libraries(materialx/lib)

 if(WITH_VULKAN_BACKEND)
  find_package(MoltenVK REQUIRED)
@ -117,8 +115,8 @@ endif()

 if(WITH_OPENSUBDIV)
  find_package(OpenSubdiv)
-  add_bundled_libraries(opensubdiv/lib)
 endif()
+add_bundled_libraries(opensubdiv/lib)

 if(WITH_CODEC_SNDFILE)
  find_package(SndFile)
@ -156,9 +154,9 @@ list(APPEND FREETYPE_LIBRARIES

 if(WITH_IMAGE_OPENEXR)
  find_package(OpenEXR)
-  add_bundled_libraries(openexr/lib)
-  add_bundled_libraries(imath/lib)
 endif()
+add_bundled_libraries(openexr/lib)
+add_bundled_libraries(imath/lib)

 if(WITH_CODEC_FFMPEG)
  set(FFMPEG_ROOT_DIR ${LIBDIR}/ffmpeg)
@ -270,12 +268,11 @@ if(WITH_BOOST)
  set(BOOST_INCLUDE_DIR ${Boost_INCLUDE_DIRS})
  set(BOOST_DEFINITIONS)

-  add_bundled_libraries(boost/lib)
-
  mark_as_advanced(Boost_LIBRARIES)
  mark_as_advanced(Boost_INCLUDE_DIRS)
  unset(_boost_FIND_COMPONENTS)
 endif()
+add_bundled_libraries(boost/lib)

 if(WITH_INTERNATIONAL OR WITH_CODEC_FFMPEG)
  string(APPEND PLATFORM_LINKFLAGS " -liconv") # boost_locale and ffmpeg needs it !
@ -297,13 +294,13 @@ if(WITH_OPENIMAGEIO)
  )
  set(OPENIMAGEIO_DEFINITIONS "-DOIIO_STATIC_BUILD")
  set(OPENIMAGEIO_IDIFF "${LIBDIR}/openimageio/bin/idiff")
-  add_bundled_libraries(openimageio/lib)
 endif()
+add_bundled_libraries(openimageio/lib)

 if(WITH_OPENCOLORIO)
  find_package(OpenColorIO 2.0.0 REQUIRED)
-  add_bundled_libraries(opencolorio/lib)
 endif()
+add_bundled_libraries(opencolorio/lib)

 if(WITH_OPENVDB)
  find_package(OpenVDB)
@ -314,8 +311,8 @@ if(WITH_OPENVDB)
    unset(BLOSC_LIBRARIES CACHE)
  endif()
  set(OPENVDB_DEFINITIONS)
-  add_bundled_libraries(openvdb/lib)
 endif()
+add_bundled_libraries(openvdb/lib)

 if(WITH_NANOVDB)
  find_package(NanoVDB)
@ -363,8 +360,8 @@ endif()

 if(WITH_TBB)
  find_package(TBB REQUIRED)
-  add_bundled_libraries(tbb/lib)
 endif()
+add_bundled_libraries(tbb/lib)

 if(WITH_POTRACE)
  find_package(Potrace REQUIRED)
@ -382,9 +379,9 @@ if(WITH_OPENMP)
    set(OpenMP_LIBRARY_DIR "${LIBDIR}/openmp/lib/")
    set(OpenMP_LINKER_FLAGS "-L'${OpenMP_LIBRARY_DIR}' -lomp")
    set(OpenMP_LIBRARY "${OpenMP_LIBRARY_DIR}/libomp.dylib")
-    add_bundled_libraries(openmp/lib)
  endif()
 endif()
+add_bundled_libraries(openmp/lib)

 if(WITH_XR_OPENXR)
  find_package(XR_OpenXR_SDK REQUIRED)
--- a/build_files/cmake/platform/platform_old_libs_update.cmake
+++ b/build_files/cmake/platform/platform_old_libs_update.cmake
@ -3,6 +3,7 @@

 # Auto update existing CMake caches for new libraries

+# Clear cached variables whose name matches `pattern`.
 function(unset_cache_variables pattern)
  get_cmake_property(_cache_variables CACHE_VARIABLES)
  foreach(_cache_variable ${_cache_variables})
@ -12,6 +13,30 @@ function(unset_cache_variables pattern)
  endforeach()
 endfunction()

+# Clear cached variables with values containing `contents`.
+function(unset_cached_varables_containting contents msg)
+  get_cmake_property(_cache_variables CACHE_VARIABLES)
+  set(_found)
+  set(_print_msg)
+  foreach(_cache_variable ${_cache_variables})
+    # Skip "_" prefixed variables, these are used for internal book-keeping,
+    # not under user control.
+    string(FIND "${_cache_variable}" "_" _found)
+    if(NOT (_found EQUAL 0))
+      string(FIND "${${_cache_variable}}" "${contents}" _found)
+      if(NOT (_found EQUAL -1))
+        if(_found)
+          unset(${_cache_variable} CACHE)
+          set(_print_msg ON)
+        endif()
+      endif()
+    endif()
+  endforeach()
+  if(_print_msg)
+    message(STATUS ${msg})
+  endif()
+endfunction()
+
 # Detect update from 3.1 to 3.2 libs.
 if(UNIX AND
   DEFINED OPENEXR_VERSION AND
@ -63,3 +88,13 @@ if(UNIX AND
  unset_cache_variables("^TBB")
  unset_cache_variables("^USD")
 endif()
+
+if(UNIX AND (NOT APPLE) AND LIBDIR AND (EXISTS ${LIBDIR}))
+  # Only search for the path if it's found on the system.
+  set(_libdir_stale "/lib/linux_centos7_x86_64/")
+  unset_cached_varables_containting(
+    "${_libdir_stale}"
+    "Auto clearing old ${_libdir_stale} paths from CMake configuration"
+  )
+  unset(_libdir_stale)
+endif()
--- a/build_files/cmake/platform/platform_unix.cmake
+++ b/build_files/cmake/platform/platform_unix.cmake
@ -166,11 +166,9 @@ endif()
 if(WITH_IMAGE_OPENEXR)
  find_package_wrapper(OpenEXR)  # our own module
  set_and_warn_library_found("OpenEXR" OPENEXR_FOUND WITH_IMAGE_OPENEXR)
-  if(WITH_IMAGE_OPENEXR)
-    add_bundled_libraries(openexr/lib)
-    add_bundled_libraries(imath/lib)
-  endif()
 endif()
+add_bundled_libraries(openexr/lib)
+add_bundled_libraries(imath/lib)

 if(WITH_IMAGE_OPENJPEG)
  find_package_wrapper(OpenJPEG)
@ -328,11 +326,8 @@ endif()
 if(WITH_OPENVDB)
  find_package(OpenVDB)
  set_and_warn_library_found("OpenVDB" OPENVDB_FOUND WITH_OPENVDB)
-
-  if(WITH_OPENVDB)
-    add_bundled_libraries(openvdb/lib)
-  endif()
 endif()
+add_bundled_libraries(openvdb/lib)

 if(WITH_NANOVDB)
  find_package_wrapper(NanoVDB)
@ -351,18 +346,14 @@ endif()
 if(WITH_USD)
  find_package_wrapper(USD)
  set_and_warn_library_found("USD" USD_FOUND WITH_USD)
-  if(WITH_USD)
-    add_bundled_libraries(usd/lib)
-  endif()
 endif()
+add_bundled_libraries(usd/lib)

 if(WITH_MATERIALX)
  find_package_wrapper(MaterialX)
  set_and_warn_library_found("MaterialX" MaterialX_FOUND WITH_MATERIALX)
-  if(WITH_MATERIALX)
-    add_bundled_libraries(materialx/lib)
-  endif()
 endif()
+add_bundled_libraries(materialx/lib)

 if(WITH_BOOST)
  # uses in build instructions to override include and library variables
@ -418,9 +409,8 @@ if(WITH_BOOST)
    find_package(IcuLinux)
    list(APPEND BOOST_LIBRARIES ${ICU_LIBRARIES})
  endif()
-
-  add_bundled_libraries(boost/lib)
 endif()
+add_bundled_libraries(boost/lib)

 if(WITH_PUGIXML)
  find_package_wrapper(PugiXML)
@ -455,21 +445,16 @@ if(WITH_OPENIMAGEIO)
  endif()

  set_and_warn_library_found("OPENIMAGEIO" OPENIMAGEIO_FOUND WITH_OPENIMAGEIO)
-  if(WITH_OPENIMAGEIO)
-    add_bundled_libraries(openimageio/lib)
-  endif()
 endif()
+add_bundled_libraries(openimageio/lib)

 if(WITH_OPENCOLORIO)
  find_package_wrapper(OpenColorIO 2.0.0)

  set(OPENCOLORIO_DEFINITIONS)
  set_and_warn_library_found("OpenColorIO" OPENCOLORIO_FOUND WITH_OPENCOLORIO)
-
-  if(WITH_OPENCOLORIO)
-    add_bundled_libraries(opencolorio/lib)
-  endif()
 endif()
+add_bundled_libraries(opencolorio/lib)

 if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
  find_package(Embree 3.8.0 REQUIRED)
@ -510,18 +495,14 @@ if(WITH_OPENSUBDIV)
  set(OPENSUBDIV_LIBPATH)  # TODO, remove and reference the absolute path everywhere

  set_and_warn_library_found("OpenSubdiv" OPENSUBDIV_FOUND WITH_OPENSUBDIV)
-  if(WITH_OPENSUBDIV)
-    add_bundled_libraries(opensubdiv/lib)
-  endif()
 endif()
+add_bundled_libraries(opensubdiv/lib)

 if(WITH_TBB)
  find_package_wrapper(TBB)
  set_and_warn_library_found("TBB" TBB_FOUND WITH_TBB)
-  if(WITH_TBB)
-    add_bundled_libraries(tbb/lib)
-  endif()
 endif()
+add_bundled_libraries(tbb/lib)

 if(WITH_XR_OPENXR)
  find_package(XR_OpenXR_SDK)
@ -1013,18 +994,6 @@ endfunction()

 configure_atomic_lib_if_needed()

-# Handle library inter-dependencies.
-# FIXME: find a better place to handle inter-library dependencies.
-# This is done near the end of the file to ensure bundled libraries are not added multiple times.
-if(WITH_USD)
-  if(NOT WITH_OPENIMAGEIO)
-    add_bundled_libraries(openimageio/lib)
-  endif()
-  if(NOT WITH_OPENVDB)
-    add_bundled_libraries(openvdb/lib)
-  endif()
-endif()
-
 if(PLATFORM_BUNDLED_LIBRARIES)
  # For the installed Python module and installed Blender executable, we set the
  # rpath to the relative path where the install step will copy the shared libraries.
--- a/intern/cycles/blender/display_driver.cpp
+++ b/intern/cycles/blender/display_driver.cpp
@ -1,12 +1,6 @@
 /* SPDX-License-Identifier: Apache-2.0
 * Copyright 2021-2022 Blender Foundation */

-#include "blender/display_driver.h"
-
-#include "device/device.h"
-#include "util/log.h"
-#include "util/math.h"
-
 #include "GPU_context.h"
 #include "GPU_immediate.h"
 #include "GPU_shader.h"
@ -15,6 +9,12 @@

 #include "RE_engine.h"

+#include "blender/display_driver.h"
+
+#include "device/device.h"
+#include "util/log.h"
+#include "util/math.h"
+
 CCL_NAMESPACE_BEGIN

 /* --------------------------------------------------------------------
--- a/intern/cycles/device/CMakeLists.txt
+++ b/intern/cycles/device/CMakeLists.txt
@ -163,6 +163,9 @@ if(WITH_CYCLES_DEVICE_METAL)
 endif()

 if(WITH_CYCLES_DEVICE_ONEAPI)
+  if(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
+    add_definitions(-DWITH_ONEAPI_SYCL_HOST_TASK)
+  endif()
  if(WITH_CYCLES_ONEAPI_BINARIES)
    set(cycles_kernel_oneapi_lib_suffix "_aot")
  else()
--- a/intern/cycles/device/device.h
+++ b/intern/cycles/device/device.h
@ -167,6 +167,17 @@ class Device {
    return true;
  }

+  /* Request cancellation of any long-running work. */
+  virtual void cancel()
+  {
+  }
+
+  /* Return true if device is ready for rendering, or report status if not. */
+  virtual bool is_ready(string & /*status*/) const
+  {
+    return true;
+  }
+
  /* GPU device only functions.
   * These may not be used on CPU or multi-devices. */

--- a/intern/cycles/device/metal/device_impl.h
+++ b/intern/cycles/device/metal/device_impl.h
@ -76,7 +76,20 @@ class MetalDevice : public Device {

  bool use_metalrt = false;
  MetalPipelineType kernel_specialization_level = PSO_GENERIC;
-  std::atomic_bool async_compile_and_load = false;
+
+  int device_id = 0;
+
+  static thread_mutex existing_devices_mutex;
+  static std::map<int, MetalDevice *> active_device_ids;
+
+  static bool is_device_cancelled(int device_id);
+
+  static MetalDevice *get_device_by_ID(int device_idID,
+                                       thread_scoped_lock &existing_devices_mutex_lock);
+
+  virtual bool is_ready(string &status) const override;
+
+  virtual void cancel() override;

  virtual BVHLayoutMask get_bvh_layout_mask() const override;

@ -92,14 +105,12 @@ class MetalDevice : public Device {

  bool use_adaptive_compilation();

+  bool make_source_and_check_if_compile_needed(MetalPipelineType pso_type);
+
  void make_source(MetalPipelineType pso_type, const uint kernel_features);

  virtual bool load_kernels(const uint kernel_features) override;

-  void reserve_local_memory(const uint kernel_features);
-
-  void init_host_memory();
-
  void load_texture_info();

  void erase_allocation(device_memory &mem);
@ -112,7 +123,7 @@ class MetalDevice : public Device {

  virtual void optimize_for_scene(Scene *scene) override;

-  bool compile_and_load(MetalPipelineType pso_type);
+  static void compile_and_load(int device_id, MetalPipelineType pso_type);

  /* ------------------------------------------------------------------ */
  /* low-level memory management */
--- a/intern/cycles/device/metal/device_impl.mm
+++ b/intern/cycles/device/metal/device_impl.mm
@ -13,10 +13,32 @@
 #  include "util/path.h"
 #  include "util/time.h"

+#  include <crt_externs.h>
+
 CCL_NAMESPACE_BEGIN

 class MetalDevice;

+thread_mutex MetalDevice::existing_devices_mutex;
+std::map<int, MetalDevice *> MetalDevice::active_device_ids;
+
+/* Thread-safe device access for async work. Calling code must pass an appropriatelty scoped lock
+ * to existing_devices_mutex to safeguard against destruction of the returned instance. */
+MetalDevice *MetalDevice::get_device_by_ID(int ID, thread_scoped_lock &existing_devices_mutex_lock)
+{
+  auto it = active_device_ids.find(ID);
+  if (it != active_device_ids.end()) {
+    return it->second;
+  }
+  return nullptr;
+}
+
+bool MetalDevice::is_device_cancelled(int ID)
+{
+  thread_scoped_lock lock(existing_devices_mutex);
+  return get_device_by_ID(ID, lock) == nullptr;
+}
+
 BVHLayoutMask MetalDevice::get_bvh_layout_mask() const
 {
  return use_metalrt ? BVH_LAYOUT_METAL : BVH_LAYOUT_BVH2;
@ -40,6 +62,15 @@ void MetalDevice::set_error(const string &error)
 MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
    : Device(info, stats, profiler), texture_info(this, "texture_info", MEM_GLOBAL)
 {
+  {
+    /* Assign an ID for this device which we can use to query whether async shader compilation
+     * requests are still relevant. */
+    thread_scoped_lock lock(existing_devices_mutex);
+    static int existing_devices_counter = 1;
+    device_id = existing_devices_counter++;
+    active_device_ids[device_id] = this;
+  }
+
  mtlDevId = info.num;

  /* select chosen device */
@ -57,7 +88,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
  if (@available(macos 11.0, *)) {
    if ([mtlDevice hasUnifiedMemory]) {
      default_storage_mode = MTLResourceStorageModeShared;
-      init_host_memory();
    }
  }

@ -181,6 +211,13 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile

 MetalDevice::~MetalDevice()
 {
+  /* Cancel any async shader compilations that are in flight. */
+  cancel();
+
+  /* This lock safeguards against destruction during use (see other uses of
+   * existing_devices_mutex). */
+  thread_scoped_lock lock(existing_devices_mutex);
+
  for (auto &tex : texture_slot_map) {
    if (tex) {
      [tex release];
@ -326,21 +363,66 @@ bool MetalDevice::load_kernels(const uint _kernel_features)
   * active, but may still need to be rendered without motion blur if that isn't active as well. */
  motion_blur = kernel_features & KERNEL_FEATURE_OBJECT_MOTION;

-  bool result = compile_and_load(PSO_GENERIC);
+  /* Only request generic kernels if they aren't cached in memory. */
+  if (make_source_and_check_if_compile_needed(PSO_GENERIC)) {
+    /* If needed, load them asynchronously in order to responsively message progess to the user. */
+    int this_device_id = this->device_id;
+    auto compile_kernels_fn = ^() {
+      compile_and_load(this_device_id, PSO_GENERIC);
+    };

-  reserve_local_memory(kernel_features);
-  return result;
+    dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
+                   compile_kernels_fn);
+  }
+
+  return true;
 }

-bool MetalDevice::compile_and_load(MetalPipelineType pso_type)
+bool MetalDevice::make_source_and_check_if_compile_needed(MetalPipelineType pso_type)
 {
-  make_source(pso_type, kernel_features);
-
-  if (!MetalDeviceKernels::should_load_kernels(this, pso_type)) {
-    /* We already have a full set of matching pipelines which are cached or queued. */
-    metal_printf("%s kernels already requested\n", kernel_type_as_string(pso_type));
-    return true;
+  if (this->source[pso_type].empty()) {
+    make_source(pso_type, kernel_features);
  }
+  return MetalDeviceKernels::should_load_kernels(this, pso_type);
+}
+
+void MetalDevice::compile_and_load(int device_id, MetalPipelineType pso_type)
+{
+  /* Thread-safe front-end compilation. Typically the MSL->AIR compilation can take a few seconds,
+   * so we avoid blocking device teardown if the user cancels a render immediately.
+   */
+
+  id<MTLDevice> mtlDevice;
+  string source;
+  MetalGPUVendor device_vendor;
+
+  /* Safely gather any state required for the MSL->AIR compilation. */
+  {
+    thread_scoped_lock lock(existing_devices_mutex);
+
+    /* Check whether the device still exists. */
+    MetalDevice *instance = get_device_by_ID(device_id, lock);
+    if (!instance) {
+      metal_printf("Ignoring %s compilation request - device no longer exists\n",
+                   kernel_type_as_string(pso_type));
+      return;
+    }
+
+    if (!instance->make_source_and_check_if_compile_needed(pso_type)) {
+      /* We already have a full set of matching pipelines which are cached or queued. Return early
+       * to avoid redundant MTLLibrary compilation. */
+      metal_printf("Ignoreing %s compilation request - kernels already requested\n",
+                   kernel_type_as_string(pso_type));
+      return;
+    }
+
+    mtlDevice = instance->mtlDevice;
+    device_vendor = instance->device_vendor;
+    source = instance->source[pso_type];
+  }
+
+  /* Perform the actual compilation using our cached context. The MetalDevice can safely destruct
+   * in this time. */

  MTLCompileOptions *options = [[MTLCompileOptions alloc] init];

@ -359,20 +441,15 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)

  if (getenv("CYCLES_METAL_PROFILING") || getenv("CYCLES_METAL_DEBUG")) {
    path_write_text(path_cache_get(string_printf("%s.metal", kernel_type_as_string(pso_type))),
-                    source[pso_type]);
+                    source);
  }

  const double starttime = time_dt();

  NSError *error = NULL;
-  mtlLibrary[pso_type] = [mtlDevice newLibraryWithSource:@(source[pso_type].c_str())
-                                                 options:options
-                                                   error:&error];
-
-  if (!mtlLibrary[pso_type]) {
-    NSString *err = [error localizedDescription];
-    set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
-  }
+  id<MTLLibrary> mtlLibrary = [mtlDevice newLibraryWithSource:@(source.c_str())
+                                                      options:options
+                                                        error:&error];

  metal_printf("Front-end compilation finished in %.1f seconds (%s)\n",
               time_dt() - starttime,
@ -380,17 +457,21 @@ bool MetalDevice::compile_and_load(MetalPipelineType pso_type)

  [options release];

-  return MetalDeviceKernels::load(this, pso_type);
-}
-
-void MetalDevice::reserve_local_memory(const uint kernel_features)
-{
-  /* METAL_WIP - implement this */
-}
-
-void MetalDevice::init_host_memory()
-{
-  /* METAL_WIP - implement this */
+  /* Save the compiled MTLLibrary and trigger the AIR->PSO builds (if the MetalDevice still
+   * exists). */
+  {
+    thread_scoped_lock lock(existing_devices_mutex);
+    if (MetalDevice *instance = get_device_by_ID(device_id, lock)) {
+      if (mtlLibrary) {
+        instance->mtlLibrary[pso_type] = mtlLibrary;
+        MetalDeviceKernels::load(instance, pso_type);
+      }
+      else {
+        NSString *err = [error localizedDescription];
+        instance->set_error(string_printf("Failed to compile library:\n%s", [err UTF8String]));
+      }
+    }
+  }
 }

 void MetalDevice::load_texture_info()
@ -700,55 +781,74 @@ device_ptr MetalDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, siz
  return 0;
 }

+void MetalDevice::cancel()
+{
+  /* Remove this device's ID from the list of active devices. Any pending compilation requests
+   * originating from this session will be cancelled. */
+  thread_scoped_lock lock(existing_devices_mutex);
+  if (device_id) {
+    active_device_ids.erase(device_id);
+    device_id = 0;
+  }
+}
+
+bool MetalDevice::is_ready(string &status) const
+{
+  int num_loaded = MetalDeviceKernels::get_loaded_kernel_count(this, PSO_GENERIC);
+  if (num_loaded < DEVICE_KERNEL_NUM) {
+    status = string_printf("%d / %d render kernels loaded (may take a few minutes the first time)",
+                           num_loaded,
+                           DEVICE_KERNEL_NUM);
+    return false;
+  }
+  metal_printf("MetalDevice::is_ready(...) --> true\n");
+  return true;
+}
+
 void MetalDevice::optimize_for_scene(Scene *scene)
 {
  MetalPipelineType specialization_level = kernel_specialization_level;

-  if (specialization_level < PSO_SPECIALIZED_INTERSECT) {
-    return;
-  }
-
-  /* PSO_SPECIALIZED_INTERSECT kernels are fast to specialize, so we always load them
-   * synchronously. */
-  compile_and_load(PSO_SPECIALIZED_INTERSECT);
-
-  if (specialization_level < PSO_SPECIALIZED_SHADE) {
-    return;
-  }
  if (!scene->params.background) {
-    /* Don't load PSO_SPECIALIZED_SHADE kernels during viewport rendering as they are slower to
-     * build. */
-    return;
+    /* In live viewport, don't specialize beyond intersection kernels for responsiveness. */
+    specialization_level = (MetalPipelineType)min(specialization_level, PSO_SPECIALIZED_INTERSECT);
  }

-  /* PSO_SPECIALIZED_SHADE kernels are slower to specialize, so we load them asynchronously, and
-   * only if there isn't an existing load in flight.
-   */
-  auto specialize_shade_fn = ^() {
-    compile_and_load(PSO_SPECIALIZED_SHADE);
-    async_compile_and_load = false;
+  /* For responsive rendering, specialize the kernels in the background, and only if there isn't an
+   * existing "optimize_for_scene" request in flight. */
+  int this_device_id = this->device_id;
+  auto specialize_kernels_fn = ^() {
+    for (int level = 1; level <= int(specialization_level); level++) {
+      compile_and_load(this_device_id, MetalPipelineType(level));
+    }
  };

-  bool async_specialize_shade = true;
+  /* In normal use, we always compile the specialized kernels in the background. */
+  bool specialize_in_background = true;

  /* Block if a per-kernel profiling is enabled (ensure steady rendering rate). */
  if (getenv("CYCLES_METAL_PROFILING") != nullptr) {
-    async_specialize_shade = false;
+    specialize_in_background = false;
  }

-  if (async_specialize_shade) {
-    if (!async_compile_and_load) {
-      async_compile_and_load = true;
+  /* Block during benchmark warm-up to ensure kernels are cached prior to the observed run. */
+  for (int i = 0; i < *_NSGetArgc(); i++) {
+    if (!strcmp((*_NSGetArgv())[i], "--warm-up")) {
+      specialize_in_background = false;
+    }
+  }
+
+  if (specialize_in_background) {
+    if (!MetalDeviceKernels::any_specialization_happening_now()) {
      dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0),
-                     specialize_shade_fn);
+                     specialize_kernels_fn);
    }
    else {
-      metal_printf(
-          "Async PSO_SPECIALIZED_SHADE load request already in progress - dropping request\n");
+      metal_printf("\"optimize_for_scene\" request already in flight - dropping request\n");
    }
  }
  else {
-    specialize_shade_fn();
+    specialize_kernels_fn();
  }
 }

--- a/intern/cycles/device/metal/kernel.h
+++ b/intern/cycles/device/metal/kernel.h
@ -64,6 +64,8 @@ struct MetalKernelPipeline {

  void compile();

+  int originating_device_id;
+
  id<MTLLibrary> mtlLibrary = nil;
  MetalPipelineType pso_type;
  string source_md5;
@ -94,7 +96,9 @@ struct MetalKernelPipeline {
 /* Cache of Metal kernels for each DeviceKernel. */
 namespace MetalDeviceKernels {

-bool should_load_kernels(MetalDevice *device, MetalPipelineType pso_type);
+bool any_specialization_happening_now();
+int get_loaded_kernel_count(MetalDevice const *device, MetalPipelineType pso_type);
+bool should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type);
 bool load(MetalDevice *device, MetalPipelineType pso_type);
 const MetalKernelPipeline *get_best_pipeline(const MetalDevice *device, DeviceKernel kernel);

--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@ -86,23 +86,17 @@ struct ShaderCache {
  void load_kernel(DeviceKernel kernel, MetalDevice *device, MetalPipelineType pso_type);

  bool should_load_kernel(DeviceKernel device_kernel,
-                          MetalDevice *device,
+                          MetalDevice const *device,
                          MetalPipelineType pso_type);

  void wait_for_all();

- private:
  friend ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice);

  void compile_thread_func(int thread_index);

  using PipelineCollection = std::vector<unique_ptr<MetalKernelPipeline>>;

-  struct PipelineRequest {
-    MetalKernelPipeline *pipeline = nullptr;
-    std::function<void(MetalKernelPipeline *)> completionHandler;
-  };
-
  struct OccupancyTuningParameters {
    int threads_per_threadgroup = 0;
    int num_threads_per_block = 0;
@ -113,13 +107,15 @@ struct ShaderCache {
  PipelineCollection pipelines[DEVICE_KERNEL_NUM];
  id<MTLDevice> mtlDevice;

-  bool running = false;
+  static bool running;
  std::condition_variable cond_var;
-  std::deque<PipelineRequest> request_queue;
+  std::deque<MetalKernelPipeline *> request_queue;
  std::vector<std::thread> compile_threads;
  std::atomic_int incomplete_requests = 0;
+  std::atomic_int incomplete_specialization_requests = 0;
 };

+bool ShaderCache::running = true;
 std::mutex g_shaderCacheMutex;
 std::map<id<MTLDevice>, unique_ptr<ShaderCache>> g_shaderCache;

@ -137,11 +133,25 @@ ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)

 ShaderCache::~ShaderCache()
 {
-  metal_printf("ShaderCache shutting down with incomplete_requests = %d\n",
-               int(incomplete_requests));
-
  running = false;
  cond_var.notify_all();
+
+  int num_incomplete = int(incomplete_requests);
+  if (num_incomplete) {
+    /* Shutting down the app with incomplete shader compilation requests. Give 1 second's grace for
+     * clean shutdown. */
+    metal_printf("ShaderCache busy (incomplete_requests = %d)...\n", num_incomplete);
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+    num_incomplete = int(incomplete_requests);
+  }
+
+  if (num_incomplete) {
+    metal_printf("ShaderCache still busy (incomplete_requests = %d). Terminating...\n",
+                 num_incomplete);
+    std::terminate();
+  }
+
+  metal_printf("ShaderCache idle. Shutting down.\n");
  for (auto &thread : compile_threads) {
    thread.join();
  }
@ -156,35 +166,69 @@ void ShaderCache::wait_for_all()

 void ShaderCache::compile_thread_func(int thread_index)
 {
-  while (1) {
+  while (running) {

    /* wait for / acquire next request */
-    PipelineRequest request;
+    MetalKernelPipeline *pipeline;
    {
      thread_scoped_lock lock(cache_mutex);
      cond_var.wait(lock, [&] { return !running || !request_queue.empty(); });
-      if (!running) {
-        break;
+      if (!running || request_queue.empty()) {
+        continue;
      }

-      if (!request_queue.empty()) {
-        request = request_queue.front();
-        request_queue.pop_front();
-      }
+      pipeline = request_queue.front();
+      request_queue.pop_front();
    }

-    /* service request */
-    if (request.pipeline) {
-      request.pipeline->compile();
-      incomplete_requests--;
+    /* Service the request. */
+    DeviceKernel device_kernel = pipeline->device_kernel;
+    MetalPipelineType pso_type = pipeline->pso_type;
+
+    if (MetalDevice::is_device_cancelled(pipeline->originating_device_id)) {
+      /* The originating MetalDevice is no longer active, so this request is obsolete. */
+      metal_printf("Cancelling compilation of %s (%s)\n",
+                   device_kernel_as_string(device_kernel),
+                   kernel_type_as_string(pso_type));
+    }
+    else {
+      /* Do the actual compilation. */
+      pipeline->compile();
+
+      thread_scoped_lock lock(cache_mutex);
+      auto &collection = pipelines[device_kernel];
+
+      /* Cache up to 3 kernel variants with the same pso_type in memory, purging oldest first. */
+      int max_entries_of_same_pso_type = 3;
+      for (int i = (int)collection.size() - 1; i >= 0; i--) {
+        if (collection[i]->pso_type == pso_type) {
+          max_entries_of_same_pso_type -= 1;
+          if (max_entries_of_same_pso_type == 0) {
+            metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
+                         kernel_type_as_string(pso_type),
+                         device_kernel_as_string(device_kernel));
+            collection.erase(collection.begin() + i);
+            break;
+          }
+        }
+      }
+      collection.push_back(unique_ptr<MetalKernelPipeline>(pipeline));
+    }
+    incomplete_requests--;
+    if (pso_type != PSO_GENERIC) {
+      incomplete_specialization_requests--;
    }
  }
 }

 bool ShaderCache::should_load_kernel(DeviceKernel device_kernel,
-                                     MetalDevice *device,
+                                     MetalDevice const *device,
                                     MetalPipelineType pso_type)
 {
+  if (!running) {
+    return false;
+  }
+
  if (device_kernel == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
    /* Skip megakernel. */
    return false;
@ -240,7 +284,6 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
    /* create compiler threads on first run */
    thread_scoped_lock lock(cache_mutex);
    if (compile_threads.empty()) {
-      running = true;
      for (int i = 0; i < max_mtlcompiler_threads; i++) {
        compile_threads.push_back(std::thread([&] { compile_thread_func(i); }));
      }
@ -252,53 +295,39 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
  }

  incomplete_requests++;
+  if (pso_type != PSO_GENERIC) {
+    incomplete_specialization_requests++;
+  }

-  PipelineRequest request;
-  request.pipeline = new MetalKernelPipeline;
-  memcpy(&request.pipeline->kernel_data_,
-         &device->launch_params.data,
-         sizeof(request.pipeline->kernel_data_));
-  request.pipeline->pso_type = pso_type;
-  request.pipeline->mtlDevice = mtlDevice;
-  request.pipeline->source_md5 = device->source_md5[pso_type];
-  request.pipeline->mtlLibrary = device->mtlLibrary[pso_type];
-  request.pipeline->device_kernel = device_kernel;
-  request.pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;
+  MetalKernelPipeline *pipeline = new MetalKernelPipeline;
+
+  /* Keep track of the originating device's ID so that we can cancel requests if the device ceases
+   * to be active. */
+  pipeline->originating_device_id = device->device_id;
+  memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
+  pipeline->pso_type = pso_type;
+  pipeline->mtlDevice = mtlDevice;
+  pipeline->source_md5 = device->source_md5[pso_type];
+  pipeline->mtlLibrary = device->mtlLibrary[pso_type];
+  pipeline->device_kernel = device_kernel;
+  pipeline->threads_per_threadgroup = device->max_threads_per_threadgroup;

  if (occupancy_tuning[device_kernel].threads_per_threadgroup) {
-    request.pipeline->threads_per_threadgroup =
+    pipeline->threads_per_threadgroup =
        occupancy_tuning[device_kernel].threads_per_threadgroup;
-    request.pipeline->num_threads_per_block =
+    pipeline->num_threads_per_block =
        occupancy_tuning[device_kernel].num_threads_per_block;
  }

  /* metalrt options */
-  request.pipeline->use_metalrt = device->use_metalrt;
-  request.pipeline->metalrt_features = device->use_metalrt ?
-                                           (device->kernel_features & METALRT_FEATURE_MASK) :
-                                           0;
+  pipeline->use_metalrt = device->use_metalrt;
+  pipeline->metalrt_features = device->use_metalrt ?
+                               (device->kernel_features & METALRT_FEATURE_MASK) :
+                               0;

  {
    thread_scoped_lock lock(cache_mutex);
-    auto &collection = pipelines[device_kernel];
-
-    /* Cache up to 3 kernel variants with the same pso_type, purging oldest first. */
-    int max_entries_of_same_pso_type = 3;
-    for (int i = (int)collection.size() - 1; i >= 0; i--) {
-      if (collection[i]->pso_type == pso_type) {
-        max_entries_of_same_pso_type -= 1;
-        if (max_entries_of_same_pso_type == 0) {
-          metal_printf("Purging oldest %s:%s kernel from ShaderCache\n",
-                       kernel_type_as_string(pso_type),
-                       device_kernel_as_string(device_kernel));
-          collection.erase(collection.begin() + i);
-          break;
-        }
-      }
-    }
-
-    collection.push_back(unique_ptr<MetalKernelPipeline>(request.pipeline));
-    request_queue.push_back(request);
+    request_queue.push_back(pipeline);
  }
  cond_var.notify_one();
 }
@ -664,51 +693,61 @@ void MetalKernelPipeline::compile()

  double starttime = time_dt();

-  MTLNewComputePipelineStateWithReflectionCompletionHandler completionHandler = ^(
-      id<MTLComputePipelineState> computePipelineState,
-      MTLComputePipelineReflection *reflection,
-      NSError *error) {
-    bool recreate_archive = false;
-    if (computePipelineState == nil && archive) {
+  /* Block on load to ensure we continue with a valid kernel function */
+  if (creating_new_archive) {
+    starttime = time_dt();
+    NSError *error;
+    if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
+                                                      error:&error]) {
      NSString *errStr = [error localizedDescription];
-      metal_printf(
-          "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
-          "(error: %s)\n",
-          device_kernel_as_string((DeviceKernel)device_kernel),
-          errStr ? [errStr UTF8String] : "nil");
-      computePipelineState = [mtlDevice
-          newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                        options:MTLPipelineOptionNone
-                                     reflection:nullptr
-                                          error:&error];
-      recreate_archive = true;
+      metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
    }
+  }

-    double duration = time_dt() - starttime;
+  pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+                                                      options:pipelineOptions
+                                                   reflection:nullptr
+                                                        error:&error];

-    if (computePipelineState == nil) {
-      NSString *errStr = [error localizedDescription];
-      error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
-                                device_kernel_as_string((DeviceKernel)device_kernel));
-      error_str += (errStr ? [errStr UTF8String] : "nil");
-      metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
-                   kernel_type_as_string(pso_type),
-                   device_kernel,
-                   device_kernel_as_string((DeviceKernel)device_kernel),
-                   duration);
-      return;
-    }
+  bool recreate_archive = false;
+  if (pipeline == nil && archive) {
+    NSString *errStr = [error localizedDescription];
+    metal_printf(
+        "Failed to create compute pipeline state \"%s\" from archive - attempting recreation... "
+        "(error: %s)\n",
+        device_kernel_as_string((DeviceKernel)device_kernel),
+        errStr ? [errStr UTF8String] : "nil");
+    pipeline = [mtlDevice newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
+                                                        options:MTLPipelineOptionNone
+                                                     reflection:nullptr
+                                                          error:&error];
+    recreate_archive = true;
+  }

-    if (!num_threads_per_block) {
-      num_threads_per_block = round_down(computePipelineState.maxTotalThreadsPerThreadgroup,
-                                         computePipelineState.threadExecutionWidth);
-      num_threads_per_block = std::max(num_threads_per_block,
-                                       (int)computePipelineState.threadExecutionWidth);
-    }
+  double duration = time_dt() - starttime;

-    this->pipeline = computePipelineState;
+  if (pipeline == nil) {
+    NSString *errStr = [error localizedDescription];
+    error_str = string_printf("Failed to create compute pipeline state \"%s\", error: \n",
+                              device_kernel_as_string((DeviceKernel)device_kernel));
+    error_str += (errStr ? [errStr UTF8String] : "nil");
+    metal_printf("%16s | %2d | %-55s | %7.2fs | FAILED!\n",
+                 kernel_type_as_string(pso_type),
+                 device_kernel,
+                 device_kernel_as_string((DeviceKernel)device_kernel),
+                 duration);
+    return;
+  }

-    if (@available(macOS 11.0, *)) {
+  if (!num_threads_per_block) {
+    num_threads_per_block = round_down(pipeline.maxTotalThreadsPerThreadgroup,
+                                       pipeline.threadExecutionWidth);
+    num_threads_per_block = std::max(num_threads_per_block,
+                                     (int)pipeline.threadExecutionWidth);
+  }
+
+  if (@available(macOS 11.0, *)) {
+    if (ShaderCache::running) {
      if (creating_new_archive || recreate_archive) {
        if (![archive serializeToURL:[NSURL fileURLWithPath:@(metalbin_path.c_str())]
                               error:&error]) {
@ -720,24 +759,7 @@ void MetalKernelPipeline::compile()
        }
      }
    }
-  };
-
-  /* Block on load to ensure we continue with a valid kernel function */
-  if (creating_new_archive) {
-    starttime = time_dt();
-    NSError *error;
-    if (![archive addComputePipelineFunctionsWithDescriptor:computePipelineStateDescriptor
-                                                      error:&error]) {
-      NSString *errStr = [error localizedDescription];
-      metal_printf("Failed to add PSO to archive:\n%s\n", errStr ? [errStr UTF8String] : "nil");
-    }
  }
-  id<MTLComputePipelineState> pipeline = [mtlDevice
-      newComputePipelineStateWithDescriptor:computePipelineStateDescriptor
-                                    options:pipelineOptions
-                                 reflection:nullptr
-                                      error:&error];
-  completionHandler(pipeline, nullptr, error);

  this->loaded = true;
  [computePipelineStateDescriptor release];
@ -763,8 +785,6 @@ void MetalKernelPipeline::compile()
    }
  }

-  double duration = time_dt() - starttime;
-
  if (!use_binary_archive) {
    metal_printf("%16s | %2d | %-55s | %7.2fs\n",
                 kernel_type_as_string(pso_type),
@ -791,24 +811,46 @@ bool MetalDeviceKernels::load(MetalDevice *device, MetalPipelineType pso_type)
    shader_cache->load_kernel((DeviceKernel)i, device, pso_type);
  }

-  shader_cache->wait_for_all();
-  metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
-               time_dt() - starttime,
-               kernel_type_as_string(pso_type));
+  if (getenv("CYCLES_METAL_PROFILING")) {
+    shader_cache->wait_for_all();
+    metal_printf("Back-end compilation finished in %.1f seconds (%s)\n",
+                 time_dt() - starttime,
+                 kernel_type_as_string(pso_type));
+  }
  return true;
 }

-bool MetalDeviceKernels::should_load_kernels(MetalDevice *device, MetalPipelineType pso_type)
+bool MetalDeviceKernels::any_specialization_happening_now()
 {
-  auto shader_cache = get_shader_cache(device->mtlDevice);
-  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
-    if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
+  /* Return true if any ShaderCaches have ongoing specialization requests (typically there will be
+   * only 1). */
+  thread_scoped_lock lock(g_shaderCacheMutex);
+  for (auto &it : g_shaderCache) {
+    if (it.second->incomplete_specialization_requests > 0) {
      return true;
    }
  }
  return false;
 }

+int MetalDeviceKernels::get_loaded_kernel_count(MetalDevice const *device,
+                                                MetalPipelineType pso_type)
+{
+  auto shader_cache = get_shader_cache(device->mtlDevice);
+  int loaded_count = DEVICE_KERNEL_NUM;
+  for (int i = 0; i < DEVICE_KERNEL_NUM; i++) {
+    if (shader_cache->should_load_kernel((DeviceKernel)i, device, pso_type)) {
+      loaded_count -= 1;
+    }
+  }
+  return loaded_count;
+}
+
+bool MetalDeviceKernels::should_load_kernels(MetalDevice const *device, MetalPipelineType pso_type)
+{
+  return get_loaded_kernel_count(device, pso_type) != DEVICE_KERNEL_NUM;
+}
+
 const MetalKernelPipeline *MetalDeviceKernels::get_best_pipeline(const MetalDevice *device,
                                                                 DeviceKernel kernel)
 {
--- a/intern/cycles/device/metal/queue.mm
+++ b/intern/cycles/device/metal/queue.mm
@ -702,6 +702,10 @@ bool MetalDeviceQueue::synchronize()

 void MetalDeviceQueue::zero_to_device(device_memory &mem)
 {
+  if (metal_device_->have_error()) {
+    return;
+  }
+
  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);

  if (mem.memory_size() == 0) {
@ -729,6 +733,10 @@ void MetalDeviceQueue::zero_to_device(device_memory &mem)

 void MetalDeviceQueue::copy_to_device(device_memory &mem)
 {
+  if (metal_device_->have_error()) {
+    return;
+  }
+
  if (mem.memory_size() == 0) {
    return;
  }
@ -771,6 +779,10 @@ void MetalDeviceQueue::copy_to_device(device_memory &mem)

 void MetalDeviceQueue::copy_from_device(device_memory &mem)
 {
+  if (metal_device_->have_error()) {
+    return;
+  }
+
  assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);

  if (mem.memory_size() == 0) {
--- a/intern/cycles/device/oneapi/device_impl.cpp
+++ b/intern/cycles/device/oneapi/device_impl.cpp
@ -429,7 +429,12 @@ void OneapiDevice::check_usm(SyclQueue *queue_, const void *usm_ptr, bool allow_
      queue->get_device().get_info<sycl::info::device::device_type>();
  sycl::usm::alloc usm_type = get_pointer_type(usm_ptr, queue->get_context());
  (void)usm_type;
-  assert(usm_type == sycl::usm::alloc::device ||
+#    ifndef WITH_ONEAPI_SYCL_HOST_TASK
+  const sycl::usm::alloc main_memory_type = sycl::usm::alloc::device;
+#    else
+  const sycl::usm::alloc main_memory_type = sycl::usm::alloc::host;
+#    endif
+  assert(usm_type == main_memory_type ||
         (usm_type == sycl::usm::alloc::host &&
          (allow_host || device_type == sycl::info::device_type::cpu)) ||
         usm_type == sycl::usm::alloc::unknown);
@ -478,7 +483,11 @@ void *OneapiDevice::usm_alloc_device(SyclQueue *queue_, size_t memory_size)
 {
  assert(queue_);
  sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
+#  ifndef WITH_ONEAPI_SYCL_HOST_TASK
  return sycl::malloc_device(memory_size, *queue);
+#  else
+  return sycl::malloc_host(memory_size, *queue);
+#  endif
 }

 void OneapiDevice::usm_free(SyclQueue *queue_, void *usm_ptr)
@ -736,7 +745,11 @@ char *OneapiDevice::device_capabilities()

  const std::vector<sycl::device> &oneapi_devices = available_devices();
  for (const sycl::device &device : oneapi_devices) {
+#  ifndef WITH_ONEAPI_SYCL_HOST_TASK
    const std::string &name = device.get_info<sycl::info::device::name>();
+#  else
+    const std::string &name = "SYCL Host Task (Debug)";
+#  endif

    capabilities << std::string("\t") << name << "\n";
 #  define WRITE_ATTR(attribute_name, attribute_variable) \
@ -813,7 +826,11 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
  for (sycl::device &device : devices) {
    const std::string &platform_name =
        device.get_platform().get_info<sycl::info::platform::name>();
+#  ifndef WITH_ONEAPI_SYCL_HOST_TASK
    std::string name = device.get_info<sycl::info::device::name>();
+#  else
+    std::string name = "SYCL Host Task (Debug)";
+#  endif
    std::string id = "ONEAPI_" + platform_name + "_" + name;
    if (device.has(sycl::aspect::ext_intel_pci_address)) {
      id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@ -390,6 +390,9 @@ void PathTrace::path_trace(RenderWork &render_work)
    const int num_samples = render_work.path_trace.num_samples;

    PathTraceWork *path_trace_work = path_trace_works_[i].get();
+    if (path_trace_work->get_device()->have_error()) {
+      return;
+    }

    PathTraceWork::RenderStatistics statistics;
    path_trace_work->render_samples(statistics,
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@ -752,6 +752,10 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
      ${SYCL_CPP_FLAGS}
      )

+  if (WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
+    list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_TASK)
+  endif()
+
  # Set defaults for spir64 and spir64_gen options
  if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
    set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect'")
@ -763,7 +767,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
  string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
  string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")

-  if(WITH_CYCLES_ONEAPI_BINARIES)
+  # Host execution won't use GPU binaries, no need to compile them.
+  if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
    # AoT binaries aren't currently reused when calling sycl::build.
    list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
    # Iterate over all targest and their options
--- a/intern/cycles/kernel/device/gpu/parallel_active_index.h
+++ b/intern/cycles/kernel/device/gpu/parallel_active_index.h
@ -30,6 +30,16 @@ void gpu_parallel_active_index_array_impl(const uint num_states,
                                          ccl_global int *ccl_restrict num_indices,
                                          IsActiveOp is_active_op)
 {
+#  ifdef WITH_ONEAPI_SYCL_HOST_TASK
+  int write_index = 0;
+  for (int state_index = 0; state_index < num_states; state_index++) {
+    if (is_active_op(state_index))
+      indices[write_index++] = state_index;
+  }
+  *num_indices = write_index;
+  return;
+#  endif /* WITH_ONEAPI_SYCL_HOST_TASK */
+
  const sycl::nd_item<1> &item_id = sycl::ext::oneapi::experimental::this_nd_item<1>();
  const uint blocksize = item_id.get_local_range(0);

--- a/intern/cycles/kernel/device/oneapi/compat.h
+++ b/intern/cycles/kernel/device/oneapi/compat.h
@ -56,7 +56,8 @@
 #define ccl_gpu_kernel(block_num_threads, thread_num_registers)
 #define ccl_gpu_kernel_threads(block_num_threads)

-#define ccl_gpu_kernel_signature(name, ...) \
+#ifndef WITH_ONEAPI_SYCL_HOST_TASK
+#  define ccl_gpu_kernel_signature(name, ...) \
 void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
                          size_t kernel_global_size, \
                          size_t kernel_local_size, \
@ -67,9 +68,37 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
          sycl::nd_range<1>(kernel_global_size, kernel_local_size), \
          [=](sycl::nd_item<1> item) {

-#define ccl_gpu_kernel_postfix \
+#  define ccl_gpu_kernel_postfix \
          }); \
    }
+#else
+/* Additional anonymous lambda is required to handle all "return" statements in the kernel code */
+#  define ccl_gpu_kernel_signature(name, ...) \
+void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
+                          size_t kernel_global_size, \
+                          size_t kernel_local_size, \
+                          sycl::handler &cgh, \
+                          __VA_ARGS__) { \
+      (kg); \
+      (kernel_local_size); \
+      cgh.host_task( \
+          [=]() {\
+            for (size_t gid = (size_t)0; gid < kernel_global_size; gid++) { \
+                kg->nd_item_local_id_0 = 0; \
+                kg->nd_item_local_range_0 = 1; \
+                kg->nd_item_group_id_0 = gid; \
+                kg->nd_item_group_range_0 = kernel_global_size; \
+                kg->nd_item_global_id_0 = gid; \
+                kg->nd_item_global_range_0 = kernel_global_size; \
+                auto kernel = [=]() {
+
+#  define ccl_gpu_kernel_postfix \
+                }; \
+                kernel(); \
+            } \
+      }); \
+}
+#endif

 #define ccl_gpu_kernel_call(x) ((ONEAPIKernelContext*)kg)->x

@ -83,23 +112,40 @@ void oneapi_kernel_##name(KernelGlobalsGPU *ccl_restrict kg, \
  } ccl_gpu_kernel_lambda_pass((ONEAPIKernelContext *)kg)

 /* GPU thread, block, grid size and index */
-#define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
-#define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
-#define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
-#define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
-#define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
-#define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))

-#define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
-#define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))
+#ifndef WITH_ONEAPI_SYCL_HOST_TASK
+#  define ccl_gpu_thread_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_id(0))
+#  define ccl_gpu_block_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_local_range(0))
+#  define ccl_gpu_block_idx_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group(0))
+#  define ccl_gpu_grid_dim_x (sycl::ext::oneapi::experimental::this_nd_item<1>().get_group_range(0))
+#  define ccl_gpu_warp_size (sycl::ext::oneapi::experimental::this_sub_group().get_local_range()[0])
+#  define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
+
+#  define ccl_gpu_global_id_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_id(0))
+#  define ccl_gpu_global_size_x() (sycl::ext::oneapi::experimental::this_nd_item<1>().get_global_range(0))

 /* GPU warp synchronization */
-#define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
-#define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
-#ifdef __SYCL_DEVICE_ONLY__
-  #define ccl_gpu_ballot(predicate) (sycl::ext::oneapi::group_ballot(sycl::ext::oneapi::experimental::this_sub_group(), predicate).count())
+#  define ccl_gpu_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier()
+#  define ccl_gpu_local_syncthreads() sycl::ext::oneapi::experimental::this_nd_item<1>().barrier(sycl::access::fence_space::local_space)
+#  ifdef __SYCL_DEVICE_ONLY__
+#    define ccl_gpu_ballot(predicate) (sycl::ext::oneapi::group_ballot(sycl::ext::oneapi::experimental::this_sub_group(), predicate).count())
+#  else
+#    define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
+#  endif
 #else
-  #define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
+#  define ccl_gpu_thread_idx_x (kg->nd_item_local_id_0)
+#  define ccl_gpu_block_dim_x (kg->nd_item_local_range_0)
+#  define ccl_gpu_block_idx_x (kg->nd_item_group_id_0)
+#  define ccl_gpu_grid_dim_x (kg->nd_item_group_range_0)
+#  define ccl_gpu_warp_size (1)
+#  define ccl_gpu_thread_mask(thread_warp) uint(0xFFFFFFFF >> (ccl_gpu_warp_size - thread_warp))
+
+#  define ccl_gpu_global_id_x() (kg->nd_item_global_id_0)
+#  define ccl_gpu_global_size_x() (kg->nd_item_global_range_0)
+
+#  define ccl_gpu_syncthreads()
+#  define ccl_gpu_local_syncthreads()
+#  define ccl_gpu_ballot(predicate) (predicate ? 1 : 0)
 #endif

 /* Debug defines */
--- a/intern/cycles/kernel/device/oneapi/globals.h
+++ b/intern/cycles/kernel/device/oneapi/globals.h
@ -23,6 +23,15 @@ typedef struct KernelGlobalsGPU {
 #undef KERNEL_DATA_ARRAY
  IntegratorStateGPU *integrator_state;
  const KernelData *__data;
+
+#ifdef WITH_ONEAPI_SYCL_HOST_TASK
+  size_t nd_item_local_id_0;
+  size_t nd_item_local_range_0;
+  size_t nd_item_group_id_0;
+  size_t nd_item_group_range_0;
+  size_t nd_item_global_id_0;
+  size_t nd_item_global_range_0;
+#endif
 } KernelGlobalsGPU;

 typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
--- a/intern/cycles/kernel/device/oneapi/kernel.cpp
+++ b/intern/cycles/kernel/device/oneapi/kernel.cpp
@ -230,6 +230,12 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
    /* NOTE(@nsirgien): As for now non-uniform work-groups don't work on most oneAPI devices,
     * we extend work size to fit uniformity requirements. */
    global_size = groups_count * local_size;
+
+#  ifdef WITH_ONEAPI_SYCL_HOST_TASK
+    /* Path array implementation is serial in case of SYCL Host Task execution. */
+    global_size = 1;
+    local_size = 1;
+#  endif
  }

  /* Let the compiler throw an error if there are any kernels missing in this implementation. */
--- a/intern/cycles/kernel/osl/services_gpu.h
+++ b/intern/cycles/kernel/osl/services_gpu.h
@ -1532,7 +1532,7 @@ ccl_device_extern void osl_texture_set_missingcolor_alpha(ccl_private OSLTexture
 ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg,
                                   DeviceString filename,
                                   ccl_private void *texture_handle,
-                                   OSLTextureOptions *opt,
+                                   ccl_private OSLTextureOptions *opt,
                                   float s,
                                   float t,
                                   float dsdx,
@ -1557,13 +1557,14 @@ ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg,

  const float4 rgba = kernel_tex_image_interp(nullptr, id, s, 1.0f - t);

-  result[0] = rgba.x;
+  if (nchannels > 0)
+    result[0] = rgba.x;
  if (nchannels > 1)
    result[1] = rgba.y;
  if (nchannels > 2)
    result[2] = rgba.z;
-  if (nchannels > 3)
-    result[3] = rgba.w;
+  if (alpha)
+    *alpha = rgba.w;

  return true;
 }
@ -1571,7 +1572,7 @@ ccl_device_extern bool osl_texture(ccl_private ShaderGlobals *sg,
 ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,
                                     DeviceString filename,
                                     ccl_private void *texture_handle,
-                                     OSLTextureOptions *opt,
+                                     ccl_private OSLTextureOptions *opt,
                                     ccl_private const float3 *P,
                                     ccl_private const float3 *dPdx,
                                     ccl_private const float3 *dPdy,
@ -1594,13 +1595,14 @@ ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,

  const float4 rgba = kernel_tex_image_interp_3d(nullptr, id, *P, INTERPOLATION_NONE);

-  result[0] = rgba.x;
+  if (nchannels > 0)
+    result[0] = rgba.x;
  if (nchannels > 1)
    result[1] = rgba.y;
  if (nchannels > 2)
    result[2] = rgba.z;
-  if (nchannels > 3)
-    result[3] = rgba.w;
+  if (alpha)
+    *alpha = rgba.w;

  return true;
 }
@ -1608,7 +1610,7 @@ ccl_device_extern bool osl_texture3d(ccl_private ShaderGlobals *sg,
 ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg,
                                       DeviceString filename,
                                       ccl_private void *texture_handle,
-                                       OSLTextureOptions *opt,
+                                       ccl_private OSLTextureOptions *opt,
                                       ccl_private const float3 *R,
                                       ccl_private const float3 *dRdx,
                                       ccl_private const float3 *dRdy,
@ -1621,13 +1623,14 @@ ccl_device_extern bool osl_environment(ccl_private ShaderGlobals *sg,
                                       ccl_private float *dalphay,
                                       ccl_private void *errormessage)
 {
-  result[0] = 1.0f;
+  if (nchannels > 0)
+    result[0] = 1.0f;
  if (nchannels > 1)
    result[1] = 0.0f;
  if (nchannels > 2)
    result[2] = 1.0f;
-  if (nchannels > 3)
-    result[3] = 1.0f;
+  if (alpha)
+    *alpha = 1.0f;

  return false;
 }
--- a/intern/cycles/scene/image_oiio.cpp
+++ b/intern/cycles/scene/image_oiio.cpp
@ -113,14 +113,18 @@ static void oiio_load_pixels(const ImageMetaData &metadata,

  if (depth <= 1) {
    size_t scanlinesize = width * components * sizeof(StorageType);
-    in->read_image(FileFormat,
+    in->read_image(0,
+                   0,
+                   0,
+                   components,
+                   FileFormat,
                   (uchar *)readpixels + (height - 1) * scanlinesize,
                   AutoStride,
                   -scanlinesize,
                   AutoStride);
  }
  else {
-    in->read_image(FileFormat, (uchar *)readpixels);
+    in->read_image(0, 0, 0, components, FileFormat, (uchar *)readpixels);
  }

  if (components > 4) {
--- a/intern/cycles/session/denoising.cpp
+++ b/intern/cycles/session/denoising.cpp
@ -439,9 +439,12 @@ bool DenoiseImage::read_previous_pixels(const DenoiseImageLayer &layer,
 {
  /* Load pixels from neighboring frames, and copy them into device buffer
   * with channels reshuffled. */
-  size_t num_pixels = (size_t)width * (size_t)height;
+  const size_t num_pixels = (size_t)width * (size_t)height;
+  const int num_channels = in_previous->spec().nchannels;
+
  array<float> neighbor_pixels(num_pixels * num_channels);
-  if (!in_previous->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) {
+
+  if (!in_previous->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, neighbor_pixels.data())) {
    return false;
  }

@ -491,7 +494,7 @@ bool DenoiseImage::load(const string &in_filepath, string &error)

  /* Read all channels into buffer. Reading all channels at once is faster
   * than individually due to interleaved EXR channel storage. */
-  if (!in->read_image(TypeDesc::FLOAT, pixels.data())) {
+  if (!in->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, pixels.data())) {
    error = "Failed to read image: " + in_filepath;
    return false;
  }
--- a/intern/cycles/session/merge.cpp
+++ b/intern/cycles/session/merge.cpp
@ -401,8 +401,8 @@ static bool merge_pixels(const vector<MergeImage> &images,
     * faster than individually due to interleaved EXR channel storage. */
    array<float> pixels;
    alloc_pixels(image.in->spec(), pixels);
-
-    if (!image.in->read_image(TypeDesc::FLOAT, pixels.data())) {
+    const int num_channels = image.in->spec().nchannels;
+    if (!image.in->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, pixels.data())) {
      error = "Failed to read image: " + image.filepath;
      return false;
    }
@ -538,6 +538,7 @@ static void read_layer_samples(vector<MergeImage> &images,
        /* Load the "Debug Sample Count" pass and add the samples to the layer's sample count. */
        array<float> sample_count_buffer;
        sample_count_buffer.resize(in_spec.width * in_spec.height);
+
        image.in->read_image(0,
                             0,
                             layer.sample_pass_offset,
--- a/intern/cycles/session/session.cpp
+++ b/intern/cycles/session/session.cpp
@ -113,6 +113,9 @@ void Session::start()

 void Session::cancel(bool quick)
 {
+  /* Cancel any long running device operations (e.g. shader compilations). */
+  device->cancel();
+
  /* Check if session thread is rendering. */
  const bool rendering = is_session_thread_rendering();

@ -401,6 +404,16 @@ RenderWork Session::run_update_for_next_iteration()
    path_trace_->load_kernels();
    path_trace_->alloc_work_memory();

+    /* Wait for device to be ready (e.g. finish any background compilations). */
+    string device_status;
+    while (!device->is_ready(device_status)) {
+      progress.set_status(device_status);
+      if (progress.get_cancel()) {
+        break;
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(200));
+    }
+
    progress.add_skip_time(update_timer, params.background);
  }

--- a/intern/cycles/session/tile.cpp
+++ b/intern/cycles/session/tile.cpp
@ -646,7 +646,8 @@ bool TileManager::read_full_buffer_from_disk(const string_view filename,
    return false;
  }

-  if (!in->read_image(TypeDesc::FLOAT, buffers->buffer.data())) {
+  const int num_channels = in->spec().nchannels;
+  if (!in->read_image(0, 0, 0, num_channels, TypeDesc::FLOAT, buffers->buffer.data())) {
    LOG(ERROR) << "Error reading pixels from the tile file " << in->geterror();
    return false;
  }
--- a/intern/ghost/intern/GHOST_SystemWayland.cpp
+++ b/intern/ghost/intern/GHOST_SystemWayland.cpp
@ -1223,13 +1223,12 @@ static void gwl_registry_entry_update_all(GWL_Display *display, const int interf
      continue;
    }

-    GWL_RegisteryUpdate_Params params = {
-        .name = reg->name,
-        .interface_slot = reg->interface_slot,
-        .version = reg->version,
+    GWL_RegisteryUpdate_Params params{};
+    params.name = reg->name;
+    params.interface_slot = reg->interface_slot;
+    params.version = reg->version;
+    params.user_data = reg->user_data;

-        .user_data = reg->user_data,
-    };
    handler->update_fn(display, &params);
  }
 }
@ -4535,18 +4534,7 @@ static void output_handle_scale(void *data, struct wl_output * /*wl_output*/, co
  CLOG_INFO(LOG, 2, "scale");
  GWL_Output *output = static_cast<GWL_Output *>(data);
  output->scale = factor;
-
-  GHOST_WindowManager *window_manager = output->system->getWindowManager();
-  if (window_manager) {
-    for (GHOST_IWindow *iwin : window_manager->getWindows()) {
-      GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
-      const std::vector<GWL_Output *> &outputs = win->outputs();
-      if (std::find(outputs.begin(), outputs.end(), output) == outputs.cend()) {
-        continue;
-      }
-      win->outputs_changed_update_scale();
-    }
-  }
+  output->system->output_scale_update_maybe_leave(output, false);
 }

 static const struct wl_output_listener output_listener = {
@ -4736,11 +4724,24 @@ static void gwl_registry_wl_output_update(GWL_Display *display,
 }
 static void gwl_registry_wl_output_remove(GWL_Display *display,
                                          void *user_data,
-                                          const bool /*on_exit*/)
+                                          const bool on_exit)
 {
  /* While windows & cursors hold references to outputs, there is no need to manually remove
-   * these references as the compositor will remove references via #wl_surface_listener.leave. */
+   * these references as the compositor will remove references via #wl_surface_listener.leave.
+   *
+   * WARNING: this is not the case for WLROOTS based compositors which have a (bug?)
+   * where surface leave events don't run. So `system->output_leave(..)` is needed
+   * until the issue is resolved in WLROOTS. */
  GWL_Output *output = static_cast<GWL_Output *>(user_data);
+
+  if (!on_exit) {
+    /* Needed for WLROOTS, does nothing if surface leave callbacks have already run. */
+    output->system->output_scale_update_maybe_leave(output, true);
+  }
+
+  if (output->xdg_output) {
+    zxdg_output_v1_destroy(output->xdg_output);
+  }
  wl_output_destroy(output->wl_output);
  std::vector<GWL_Output *>::iterator iter = std::find(
      display->outputs.begin(), display->outputs.end(), output);
@ -5176,11 +5177,10 @@ static void global_handle_add(void *data,
    const GWL_RegistryEntry *registry_entry_prev = display->registry_entry;

    /* The interface name that is ensured not to be freed. */
-    GWL_RegisteryAdd_Params params = {
-        .name = name,
-        .interface_slot = interface_slot,
-        .version = version,
-    };
+    GWL_RegisteryAdd_Params params{};
+    params.name = name;
+    params.interface_slot = interface_slot;
+    params.version = version;

    handler->add_fn(display, &params);

@ -6762,6 +6762,49 @@ void GHOST_SystemWayland::window_surface_unref(const wl_surface *wl_surface)
 #undef SURFACE_CLEAR_PTR
 }

+void GHOST_SystemWayland::output_scale_update_maybe_leave(GWL_Output *output, bool leave)
+{
+  /* Update scale, optionally leaving the outputs beforehand. */
+  GHOST_WindowManager *window_manager = output->system->getWindowManager();
+  if (window_manager) {
+    for (GHOST_IWindow *iwin : window_manager->getWindows()) {
+      GHOST_WindowWayland *win = static_cast<GHOST_WindowWayland *>(iwin);
+      const std::vector<GWL_Output *> &outputs = win->outputs();
+      bool found = leave ? win->outputs_leave(output) :
+                           !(std::find(outputs.begin(), outputs.end(), output) == outputs.cend());
+      if (found) {
+        win->outputs_changed_update_scale();
+      }
+    }
+  }
+
+  for (GWL_Seat *seat : display_->seats) {
+    bool found;
+
+    found = leave ? seat->pointer.outputs.erase(output) : seat->pointer.outputs.count(output);
+    if (found) {
+      if (seat->cursor.wl_surface_cursor != nullptr) {
+        update_cursor_scale(
+            seat->cursor, seat->system->wl_shm(), &seat->pointer, seat->cursor.wl_surface_cursor);
+      }
+    }
+
+    found = leave ? seat->tablet.outputs.erase(output) : seat->tablet.outputs.count(output);
+    if (found) {
+      for (struct zwp_tablet_tool_v2 *zwp_tablet_tool_v2 : seat->tablet_tools) {
+        GWL_TabletTool *tablet_tool = static_cast<GWL_TabletTool *>(
+            zwp_tablet_tool_v2_get_user_data(zwp_tablet_tool_v2));
+        if (tablet_tool->wl_surface_cursor != nullptr) {
+          update_cursor_scale(seat->cursor,
+                              seat->system->wl_shm(),
+                              &seat->pointer,
+                              tablet_tool->wl_surface_cursor);
+        }
+      }
+    }
+  }
+}
+
 bool GHOST_SystemWayland::window_cursor_grab_set(const GHOST_TGrabCursorMode mode,
                                                 const GHOST_TGrabCursorMode mode_current,
                                                 int32_t init_grab_xy[2],
--- a/intern/ghost/intern/GHOST_SystemWayland.h
+++ b/intern/ghost/intern/GHOST_SystemWayland.h
@ -194,6 +194,8 @@ class GHOST_SystemWayland : public GHOST_System {
  /** Set this seat to be active. */
  void seat_active_set(const struct GWL_Seat *seat);

+  void output_scale_update_maybe_leave(GWL_Output *output, bool leave);
+
  /** Clear all references to this surface to prevent accessing NULL pointers. */
  void window_surface_unref(const wl_surface *wl_surface);

--- a/intern/ghost/intern/GHOST_WindowWayland.cpp
+++ b/intern/ghost/intern/GHOST_WindowWayland.cpp
@ -1361,9 +1361,6 @@ GHOST_TSuccess GHOST_WindowWayland::notify_size()
 * Functionality only used for the WAYLAND implementation.
 * \{ */

-/**
- * Return true when the windows scale or DPI changes.
- */
 bool GHOST_WindowWayland::outputs_changed_update_scale()
 {
 #ifdef USE_EVENT_BACKGROUND_THREAD
--- a/intern/ghost/intern/GHOST_WindowWayland.h
+++ b/intern/ghost/intern/GHOST_WindowWayland.h
@ -156,6 +156,9 @@ class GHOST_WindowWayland : public GHOST_Window {
  bool outputs_enter(GWL_Output *output);
  bool outputs_leave(GWL_Output *output);

+  /**
+   * Return true when the windows scale or DPI changes.
+   */
  bool outputs_changed_update_scale();

 #ifdef USE_EVENT_BACKGROUND_THREAD
--- a/intern/guardedalloc/CMakeLists.txt
+++ b/intern/guardedalloc/CMakeLists.txt
@ -20,6 +20,7 @@ set(SRC
  ./intern/mallocn.c
  ./intern/mallocn_guarded_impl.c
  ./intern/mallocn_lockfree_impl.c
+  ./intern/memory_usage.cc

  MEM_guardedalloc.h
  ./intern/mallocn_inline.h
--- a/intern/guardedalloc/intern/leak_detector.cc
+++ b/intern/guardedalloc/intern/leak_detector.cc
@ -53,6 +53,9 @@ class MemLeakPrinter {

 void MEM_init_memleak_detection()
 {
+  /* Calling this ensures that the memory usage counters outlive the memory leak detection. */
+  memory_usage_init();
+
  /**
   * This variable is constructed when this function is first called. This should happen as soon as
   * possible when the program starts.
--- a/intern/guardedalloc/intern/mallocn_intern.h
+++ b/intern/guardedalloc/intern/mallocn_intern.h
@ -89,6 +89,14 @@ void aligned_free(void *ptr);
 extern bool leak_detector_has_run;
 extern char free_after_leak_detection_message[];

+void memory_usage_init(void);
+void memory_usage_block_alloc(size_t size);
+void memory_usage_block_free(size_t size);
+size_t memory_usage_block_num(void);
+size_t memory_usage_current(void);
+size_t memory_usage_peak(void);
+void memory_usage_peak_reset(void);
+
 /* Prototypes for counted allocator functions */
 size_t MEM_lockfree_allocN_len(const void *vmemh) ATTR_WARN_UNUSED_RESULT;
 void MEM_lockfree_freeN(void *vmemh);
--- a/intern/guardedalloc/intern/mallocn_lockfree_impl.c
+++ b/intern/guardedalloc/intern/mallocn_lockfree_impl.c
@ -30,8 +30,6 @@ typedef struct MemHeadAligned {
  size_t len;
 } MemHeadAligned;

-static unsigned int totblock = 0;
-static size_t mem_in_use = 0, peak_mem = 0;
 static bool malloc_debug_memset = false;

 static void (*error_callback)(const char *) = NULL;
@ -46,18 +44,6 @@ enum {
 #define MEMHEAD_IS_ALIGNED(memhead) ((memhead)->len & (size_t)MEMHEAD_ALIGN_FLAG)
 #define MEMHEAD_LEN(memhead) ((memhead)->len & ~((size_t)(MEMHEAD_ALIGN_FLAG)))

-/* Uncomment this to have proper peak counter. */
-#define USE_ATOMIC_MAX
-
-MEM_INLINE void update_maximum(size_t *maximum_value, size_t value)
-{
-#ifdef USE_ATOMIC_MAX
-  atomic_fetch_and_update_max_z(maximum_value, value);
-#else
-  *maximum_value = value > *maximum_value ? value : *maximum_value;
-#endif
-}
-
 #ifdef __GNUC__
 __attribute__((format(printf, 1, 2)))
 #endif
@ -103,8 +89,7 @@ void MEM_lockfree_freeN(void *vmemh)
  MemHead *memh = MEMHEAD_FROM_PTR(vmemh);
  size_t len = MEMHEAD_LEN(memh);

-  atomic_sub_and_fetch_u(&totblock, 1);
-  atomic_sub_and_fetch_z(&mem_in_use, len);
+  memory_usage_block_free(len);

  if (UNLIKELY(malloc_debug_memset && len)) {
    memset(memh + 1, 255, len);
@ -224,16 +209,14 @@ void *MEM_lockfree_callocN(size_t len, const char *str)

  if (LIKELY(memh)) {
    memh->len = len;
-    atomic_add_and_fetch_u(&totblock, 1);
-    atomic_add_and_fetch_z(&mem_in_use, len);
-    update_maximum(&peak_mem, mem_in_use);
+    memory_usage_block_alloc(len);

    return PTR_FROM_MEMHEAD(memh);
  }
  print_error("Calloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
              SIZET_ARG(len),
              str,
-              (uint)mem_in_use);
+              (uint)memory_usage_current());
  return NULL;
 }

@ -247,7 +230,7 @@ void *MEM_lockfree_calloc_arrayN(size_t len, size_t size, const char *str)
        SIZET_ARG(len),
        SIZET_ARG(size),
        str,
-        (unsigned int)mem_in_use);
+        (unsigned int)memory_usage_current());
    abort();
    return NULL;
  }
@ -269,16 +252,14 @@ void *MEM_lockfree_mallocN(size_t len, const char *str)
    }

    memh->len = len;
-    atomic_add_and_fetch_u(&totblock, 1);
-    atomic_add_and_fetch_z(&mem_in_use, len);
-    update_maximum(&peak_mem, mem_in_use);
+    memory_usage_block_alloc(len);

    return PTR_FROM_MEMHEAD(memh);
  }
  print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
              SIZET_ARG(len),
              str,
-              (uint)mem_in_use);
+              (uint)memory_usage_current());
  return NULL;
 }

@ -292,7 +273,7 @@ void *MEM_lockfree_malloc_arrayN(size_t len, size_t size, const char *str)
        SIZET_ARG(len),
        SIZET_ARG(size),
        str,
-        (uint)mem_in_use);
+        (uint)memory_usage_current());
    abort();
    return NULL;
  }
@ -340,16 +321,14 @@ void *MEM_lockfree_mallocN_aligned(size_t len, size_t alignment, const char *str

    memh->len = len | (size_t)MEMHEAD_ALIGN_FLAG;
    memh->alignment = (short)alignment;
-    atomic_add_and_fetch_u(&totblock, 1);
-    atomic_add_and_fetch_z(&mem_in_use, len);
-    update_maximum(&peak_mem, mem_in_use);
+    memory_usage_block_alloc(len);

    return PTR_FROM_MEMHEAD(memh);
  }
  print_error("Malloc returns null: len=" SIZET_FORMAT " in %s, total %u\n",
              SIZET_ARG(len),
              str,
-              (uint)mem_in_use);
+              (uint)memory_usage_current());
  return NULL;
 }

@ -369,8 +348,8 @@ void MEM_lockfree_callbackmemlist(void (*func)(void *))

 void MEM_lockfree_printmemlist_stats(void)
 {
-  printf("\ntotal memory len: %.3f MB\n", (double)mem_in_use / (double)(1024 * 1024));
-  printf("peak memory len: %.3f MB\n", (double)peak_mem / (double)(1024 * 1024));
+  printf("\ntotal memory len: %.3f MB\n", (double)memory_usage_current() / (double)(1024 * 1024));
+  printf("peak memory len: %.3f MB\n", (double)memory_usage_peak() / (double)(1024 * 1024));
  printf(
      "\nFor more detailed per-block statistics run Blender with memory debugging command line "
      "argument.\n");
@ -398,23 +377,23 @@ void MEM_lockfree_set_memory_debug(void)

 size_t MEM_lockfree_get_memory_in_use(void)
 {
-  return mem_in_use;
+  return memory_usage_current();
 }

 uint MEM_lockfree_get_memory_blocks_in_use(void)
 {
-  return totblock;
+  return (uint)memory_usage_block_num();
 }

 /* dummy */
 void MEM_lockfree_reset_peak_memory(void)
 {
-  peak_mem = mem_in_use;
+  memory_usage_peak_reset();
 }

 size_t MEM_lockfree_get_peak_memory(void)
 {
-  return peak_mem;
+  return memory_usage_peak();
 }

 #ifndef NDEBUG
--- a/intern/guardedalloc/intern/memory_usage.cc
+++ b/intern/guardedalloc/intern/memory_usage.cc
@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <algorithm>
+#include <atomic>
+#include <cassert>
+#include <iostream>
+#include <mutex>
+#include <vector>
+
+#include "MEM_guardedalloc.h"
+#include "mallocn_intern.h"
+
+#include "../../source/blender/blenlib/BLI_strict_flags.h"
+
+namespace {
+
+/**
+ * This is stored per thread. Align to cache line size to avoid false sharing.
+ */
+struct alignas(64) Local {
+  /** Helps to find bugs during program shutdown. */
+  bool destructed = false;
+  /**
+   * This is the first created #Local and on the main thread. When the main local data is
+   * destructed, we know that Blender is quitting and that we can't rely on thread locals being
+   * available still.
+   */
+  bool is_main = false;
+  /**
+   * Number of bytes. This can be negative when e.g. one thread allocates a lot of memory, and
+   * another frees it. It has to be an atomic, because it may be accessed by other threads when the
+   * total memory usage is counted.
+   */
+  std::atomic<int64_t> mem_in_use = 0;
+  /**
+   * Number of allocated blocks. Can be negative and is atomic for the same reason as above.
+   */
+  std::atomic<int64_t> blocks_num = 0;
+  /**
+   * Amount of memory used when the peak was last updated. This is used so that we don't have to
+   * update the peak memory usage after every memory allocation. Instead it's only updated when "a
+   * lot" of new memory has been allocated. This makes the peak memory usage a little bit less
+   * accurate, but it's still good enough for practical purposes.
+   */
+  std::atomic<int64_t> mem_in_use_during_peak_update = 0;
+
+  Local();
+  ~Local();
+};
+
+/**
+ * This is a singleton that stores global data.
+ */
+struct Global {
+  /**
+   * Mutex that protects the vector below.
+   */
+  std::mutex locals_mutex;
+  /**
+   * All currently constructed #Local. This must only be accessed when the mutex above is
+   * locked. Individual threads insert and remove themselves here.
+   */
+  std::vector<Local *> locals;
+  /**
+   * Number of bytes that are not tracked by #Local. This is necessary because when a thread exits,
+   * its #Local data is freed. The memory counts stored there would be lost. The memory counts may
+   * be non-zero during thread destruction, if the thread did an unequal amount of allocations and
+   * frees (which is perfectly valid behavior as long as other threads have the responsibility to
+   * free any memory that the thread allocated).
+   *
+   * To solve this, the memory counts are added to these global counters when the thread
+   * exists. The global counters are also used when the entire process starts to exit, because the
+   * #Local data of the main thread is already destructed when the leak detection happens (during
+   * destruction of static variables which happens after destruction of threadlocals).
+   */
+  std::atomic<int64_t> mem_in_use_outside_locals = 0;
+  /**
+   * Number of blocks that are not tracked by #Local, for the same reason as above.
+   */
+  std::atomic<int64_t> blocks_num_outside_locals = 0;
+  /**
+   * Peak memory usage since the last reset.
+   */
+  std::atomic<size_t> peak = 0;
+};
+
+}  // namespace
+
+/**
+ * This is true for most of the lifetime of the program. Only when it starts exiting this becomes
+ * false indicating that global counters should be used for correctness.
+ */
+static std::atomic<bool> use_local_counters = true;
+/**
+ * When a thread allocated this amount of memory, the peak memory usage is updated. An alternative
+ * would be to update the global peak memory after every allocation, but that would cause much more
+ * overhead with little benefit.
+ */
+static constexpr int64_t peak_update_threshold = 1024 * 1024;
+
+static Global &get_global()
+{
+  static Global global;
+  return global;
+}
+
+static Local &get_local_data()
+{
+  static thread_local Local local;
+  assert(!local.destructed);
+  return local;
+}
+
+Local::Local()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  if (global.locals.empty()) {
+    /* This is the first thread creating #Local, it is therefore the main thread because it's
+     * created through #memory_usage_init. */
+    this->is_main = true;
+  }
+  /* Register self in the global list. */
+  global.locals.push_back(this);
+}
+
+Local::~Local()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  /* Unregister self from the global list. */
+  global.locals.erase(std::find(global.locals.begin(), global.locals.end(), this));
+  /* Don't forget the memory counts stored locally. */
+  global.blocks_num_outside_locals.fetch_add(this->blocks_num, std::memory_order_relaxed);
+  global.mem_in_use_outside_locals.fetch_add(this->mem_in_use, std::memory_order_relaxed);
+
+  if (this->is_main) {
+    /* The main thread started shutting down. Use global counters from now on to avoid accessing
+     * threadlocals after they have been destructed. */
+    use_local_counters.store(false, std::memory_order_relaxed);
+  }
+  /* Helps to detect when thread locals are accidentally accessed after destruction. */
+  this->destructed = true;
+}
+
+/** Check if the current memory usage is higher than the peak and update it if yes. */
+static void update_global_peak()
+{
+  Global &global = get_global();
+  /* Update peak. */
+  global.peak = std::max<size_t>(global.peak, memory_usage_current());
+
+  std::lock_guard lock{global.locals_mutex};
+
+  for (Local *local : global.locals) {
+    assert(!local->destructed);
+    /* Updating this makes sure that the peak is not updated too often, which would degrade
+     * performance. */
+    local->mem_in_use_during_peak_update = local->mem_in_use.load(std::memory_order_relaxed);
+  }
+}
+
+void memory_usage_init()
+{
+  /* Makes sure that the static and threadlocal variables on the main thread are initialized. */
+  get_local_data();
+}
+
+void memory_usage_block_alloc(const size_t size)
+{
+  if (LIKELY(use_local_counters.load(std::memory_order_relaxed))) {
+    Local &local = get_local_data();
+    /* Increase local memory counts. This does not cause thread synchronization in the majority of
+     * cases, because each thread has these counters on a separate cache line. It may only cause
+     * synchronization if another thread is computing the total current memory usage at the same
+     * time, which is very rare compared to doing allocations. */
+    local.blocks_num.fetch_add(1, std::memory_order_relaxed);
+    local.mem_in_use.fetch_add(int64_t(size), std::memory_order_relaxed);
+
+    /* If a certain amount of new memory has been allocated, update the peak. */
+    if (local.mem_in_use - local.mem_in_use_during_peak_update > peak_update_threshold) {
+      update_global_peak();
+    }
+  }
+  else {
+    Global &global = get_global();
+    /* Increase global memory counts. */
+    global.blocks_num_outside_locals.fetch_add(1, std::memory_order_relaxed);
+    global.mem_in_use_outside_locals.fetch_add(int64_t(size), std::memory_order_relaxed);
+  }
+}
+
+void memory_usage_block_free(const size_t size)
+{
+  if (LIKELY(use_local_counters)) {
+    /* Decrease local memory counts. See comment in #memory_usage_block_alloc for details regarding
+     * thread synchronization. */
+    Local &local = get_local_data();
+    local.mem_in_use.fetch_sub(int64_t(size), std::memory_order_relaxed);
+    local.blocks_num.fetch_sub(1, std::memory_order_relaxed);
+  }
+  else {
+    Global &global = get_global();
+    /* Decrease global memory counts. */
+    global.blocks_num_outside_locals.fetch_sub(1, std::memory_order_relaxed);
+    global.mem_in_use_outside_locals.fetch_sub(int64_t(size), std::memory_order_relaxed);
+  }
+}
+
+size_t memory_usage_block_num()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  /* Count the number of active blocks. */
+  int64_t blocks_num = global.blocks_num_outside_locals;
+  for (Local *local : global.locals) {
+    blocks_num += local->blocks_num;
+  }
+  return size_t(blocks_num);
+}
+
+size_t memory_usage_current()
+{
+  Global &global = get_global();
+  std::lock_guard lock{global.locals_mutex};
+
+  /* Count the memory that's currently in use. */
+  int64_t mem_in_use = global.mem_in_use_outside_locals;
+  for (Local *local : global.locals) {
+    mem_in_use += local->mem_in_use;
+  }
+  return size_t(mem_in_use);
+}
+
+/**
+ * Get the approximate peak memory usage since the last call to #memory_usage_peak_reset.
+ * This is approximate, because the peak usage is not updated after every allocation (see
+ * #peak_update_threshold).
+ *
+ * In the worst case, the peak memory usage is underestimated by
+ * `peak_update_threshold * #threads`. After large allocations (larger than the threshold), the
+ * peak usage is always updated so those allocations will always be taken into account.
+ */
+size_t memory_usage_peak()
+{
+  update_global_peak();
+  Global &global = get_global();
+  return global.peak;
+}
+
+void memory_usage_peak_reset()
+{
+  Global &global = get_global();
+  global.peak = memory_usage_current();
+}
--- a/intern/locale/boost_locale_wrapper.cpp
+++ b/intern/locale/boost_locale_wrapper.cpp
@ -6,6 +6,7 @@
 */

 #include <boost/locale.hpp>
+#include <iostream>
 #include <stdio.h>

 #include "boost_locale_wrapper.h"
--- a/release/scripts/presets/keyconfig/keymap_data/blender_default.py
+++ b/release/scripts/presets/keyconfig/keymap_data/blender_default.py
@ -5050,30 +5050,33 @@ def km_sculpt(params):
        # Expand
        ("sculpt.expand", {"type": 'A', "value": 'PRESS', "shift": True},
         {"properties": [
-            ("target", "MASK"),
-            ("falloff_type", "GEODESIC"),
-            ("invert", True),
-            ("use_auto_mask", True),
-            ("use_mask_preserve" , True)]}),
+             ("target", "MASK"),
+             ("falloff_type", "GEODESIC"),
+             ("invert", True),
+             ("use_auto_mask", True),
+             ("use_mask_preserve", True),
+         ]}),
        ("sculpt.expand", {"type": 'A', "value": 'PRESS', "shift": True, "alt": True},
         {"properties": [
-            ("target", "MASK"),
-            ("falloff_type", "NORMALS"),
-            ("invert", False),
-            ("use_mask_preserve" , True)]}),
+             ("target", "MASK"),
+             ("falloff_type", "NORMALS"),
+             ("invert", False),
+             ("use_mask_preserve", True),
+         ]}),
        ("sculpt.expand", {"type": 'W', "value": 'PRESS', "shift": True},
         {"properties": [
             ("target", "FACE_SETS"),
             ("falloff_type", "GEODESIC"),
             ("invert", False),
-             ("use_mask_preserve" , False),
-             ("use_modify_active", False)]}),
+             ("use_mask_preserve", False),
+             ("use_modify_active", False),
+         ]}),
        ("sculpt.expand", {"type": 'W', "value": 'PRESS', "shift": True, "alt": True},
         {"properties": [
             ("target", "FACE_SETS"),
             ("falloff_type", "BOUNDARY_FACE_SET"),
             ("invert", False),
-             ("use_mask_preserve" , False),
+             ("use_mask_preserve", False),
             ("use_modify_active", True),
         ]}),
        # Partial Visibility Show/hide
--- a/release/scripts/startup/bl_operators/clip.py
+++ b/release/scripts/startup/bl_operators/clip.py
@ -540,7 +540,7 @@ class CLIP_OT_setup_tracking_scene(Operator):
        sc = context.space_data
        if sc and sc.type == 'CLIP_EDITOR':
            clip = sc.clip
-            if clip and clip.tracking.reconstruction.is_valid:
+            if clip and clip.tracking.objects.active.reconstruction.is_valid:
                return True
        return False

--- a/release/scripts/startup/bl_ui/properties_animviz.py
+++ b/release/scripts/startup/bl_ui/properties_animviz.py
@ -57,19 +57,19 @@ class MotionPathButtonsPanel:
            # Update Selected.
            col = layout.column(align=True)
            row = col.row(align=True)
-            row.operator(f"{op_category}.paths_update", text="Update Path", icon=icon)
-            row.operator(f"{op_category}.paths_clear", text="", icon='X').only_selected = True
+            row.operator(op_category + ".paths_update", text="Update Path", icon=icon)
+            row.operator(op_category + ".paths_clear", text="", icon='X').only_selected = True
        else:
            # Calculate.
            col = layout.column(align=True)
            col.label(text="Nothing to show yet...", icon='ERROR')
-            col.operator(f"{op_category}.paths_calculate", text="Calculate...", icon=icon)
+            col.operator(op_category + ".paths_calculate", text="Calculate...", icon=icon)

        # Update All & Clear All.
        # Note that 'col' is from inside the preceeding `if` or `else` block.
        row = col.row(align=True)
        row.operator("object.paths_update_visible", text="Update All Paths", icon='WORLD')
-        row.operator(f"{op_category}.paths_clear", text="", icon='X').only_selected = False
+        row.operator(op_category + ".paths_clear", text="", icon='X').only_selected = False


 class MotionPathButtonsPanel_display:
--- a/release/scripts/startup/bl_ui/properties_grease_pencil_common.py
+++ b/release/scripts/startup/bl_ui/properties_grease_pencil_common.py
@ -326,7 +326,7 @@ class GPENCIL_MT_cleanup(Menu):

        layout.separator()

-        layout.operator("gpencil.frame_clean_duplicate", text="Delete Duplicated Frames")
+        layout.operator("gpencil.frame_clean_duplicate", text="Delete Duplicate Frames")
        layout.operator("gpencil.recalc_geometry", text="Recalculate Geometry")
        if ob.mode != 'PAINT_GPENCIL':
            layout.operator("gpencil.reproject")
--- a/release/scripts/startup/bl_ui/space_dopesheet.py
+++ b/release/scripts/startup/bl_ui/space_dopesheet.py
@ -238,8 +238,7 @@ class DOPESHEET_HT_editor_buttons:
        # Layer management
        if st.mode == 'GPENCIL':
            ob = context.active_object
-            selected = st.dopesheet.show_only_selected
-            enable_but = selected and ob is not None and ob.type == 'GPENCIL'
+            enable_but = ob is not None and ob.type == 'GPENCIL'

            row = layout.row(align=True)
            row.enabled = enable_but
--- a/release/scripts/startup/bl_ui/space_node.py
+++ b/release/scripts/startup/bl_ui/space_node.py
@ -318,7 +318,9 @@ class NODE_MT_node(Menu):

        layout.separator()
        layout.operator("node.clipboard_copy", text="Copy")
-        layout.operator("node.clipboard_paste", text="Paste")
+        row = layout.row()
+        row.operator_context = 'EXEC_DEFAULT'
+        row.operator("node.clipboard_paste", text="Paste")
        layout.operator("node.duplicate_move")
        layout.operator("node.duplicate_move_linked")
        layout.operator("node.delete")
--- a/release/scripts/startup/bl_ui/space_view3d.py
+++ b/release/scripts/startup/bl_ui/space_view3d.py
@ -723,8 +723,18 @@ class VIEW3D_HT_header(Header):

                row = layout.row(align=True)
                domain = curves.selection_domain
-                row.operator("curves.set_selection_domain", text="", icon='CURVE_BEZCIRCLE', depress=(domain == 'POINT')).domain = 'POINT'
-                row.operator("curves.set_selection_domain", text="", icon='CURVE_PATH', depress=(domain == 'CURVE')).domain = 'CURVE'
+                row.operator(
+                    "curves.set_selection_domain",
+                    text="",
+                    icon='CURVE_BEZCIRCLE',
+                    depress=(domain == 'POINT'),
+                ).domain = 'POINT'
+                row.operator(
+                    "curves.set_selection_domain",
+                    text="",
+                    icon='CURVE_PATH',
+                    depress=(domain == 'CURVE'),
+                ).domain = 'CURVE'

        # Grease Pencil
        if obj and obj.type == 'GPENCIL' and context.gpencil_data:
--- a/source/blender/blenkernel/BKE_blender_version.h
+++ b/source/blender/blenkernel/BKE_blender_version.h
@ -25,7 +25,7 @@ extern "C" {

 /* Blender file format version. */
 #define BLENDER_FILE_VERSION BLENDER_VERSION
-#define BLENDER_FILE_SUBVERSION 6
+#define BLENDER_FILE_SUBVERSION 7

 /* Minimum Blender version that supports reading file written with the current
 * version. Older Blender versions will test this and show a warning if the file
--- a/source/blender/blenkernel/BKE_curves.hh
+++ b/source/blender/blenkernel/BKE_curves.hh
@ -287,11 +287,6 @@ class CurvesGeometry : public ::CurvesGeometry {
  Span<float2> surface_uv_coords() const;
  MutableSpan<float2> surface_uv_coords_for_write();

-  VArray<float> selection_point_float() const;
-  MutableSpan<float> selection_point_float_for_write();
-  VArray<float> selection_curve_float() const;
-  MutableSpan<float> selection_curve_float_for_write();
-
  /**
   * Calculate the largest and smallest position values, only including control points
   * (rather than evaluated points). The existing values of `min` and `max` are taken into account.
--- a/source/blender/blenkernel/BKE_node.h
+++ b/source/blender/blenkernel/BKE_node.h
@ -713,6 +713,11 @@ bNode *node_copy_with_mapping(bNodeTree *dst_tree,

 bNode *node_copy(bNodeTree *dst_tree, const bNode &src_node, int flag, bool use_unique);

+/**
+ * Free the node itself.
+ *
+ * \note ID user reference-counting and changing the `nodes_by_id` vector are up to the caller.
+ */
 void node_free_node(bNodeTree *tree, bNode *node);

 }  // namespace blender::bke
--- a/source/blender/blenkernel/BKE_node_runtime.hh
+++ b/source/blender/blenkernel/BKE_node_runtime.hh
@ -169,7 +169,10 @@ class bNodeSocketRuntime : NonCopyable, NonMovable {
  float locx = 0;
  float locy = 0;

-  /* Runtime-only cache of the number of input links, for multi-input sockets. */
+  /**
+   * Runtime-only cache of the number of input links, for multi-input sockets,
+   * including dragged node links that aren't actually in the tree.
+   */
  short total_inputs = 0;

  /** Only valid when #topology_cache_is_dirty is false. */
@ -652,6 +655,11 @@ inline bool bNodeLink::is_available() const
  return this->fromsock->is_available() && this->tosock->is_available();
 }

+inline bool bNodeLink::is_used() const
+{
+  return !this->is_muted() && this->is_available();
+}
+
 /** \} */

 /* -------------------------------------------------------------------- */
@ -670,6 +678,20 @@ inline int bNodeSocket::index_in_tree() const
  return this->runtime->index_in_all_sockets;
 }

+inline int bNodeSocket::index_in_all_inputs() const
+{
+  BLI_assert(blender::bke::node_tree_runtime::topology_cache_is_available(*this));
+  BLI_assert(this->is_input());
+  return this->runtime->index_in_inout_sockets;
+}
+
+inline int bNodeSocket::index_in_all_outputs() const
+{
+  BLI_assert(blender::bke::node_tree_runtime::topology_cache_is_available(*this));
+  BLI_assert(this->is_output());
+  return this->runtime->index_in_inout_sockets;
+}
+
 inline bool bNodeSocket::is_hidden() const
 {
  return (this->flag & SOCK_HIDDEN) != 0;
--- a/source/blender/blenkernel/BKE_pose_backup.h
+++ b/source/blender/blenkernel/BKE_pose_backup.h
@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup bke
+ *
+ * Pose Backups can be created from the current pose, and later restored. The
+ * backup is restricted to those bones animated by a given Action, so that
+ * operations are as fast as possible.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "BLI_listbase.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct PoseBackup;
+
+/**
+ * Create a backup of those bones that are selected AND animated in the given action.
+ *
+ * The backup is owned by the caller, and should be freed with `BKE_pose_backup_free()`.
+ */
+struct PoseBackup *BKE_pose_backup_create_selected_bones(
+    const struct Object *ob, const struct bAction *action) ATTR_WARN_UNUSED_RESULT;
+
+/**
+ * Create a backup of those bones that are animated in the given action.
+ *
+ * The backup is owned by the caller, and should be freed with `BKE_pose_backup_free()`.
+ */
+struct PoseBackup *BKE_pose_backup_create_all_bones(
+    const struct Object *ob, const struct bAction *action) ATTR_WARN_UNUSED_RESULT;
+bool BKE_pose_backup_is_selection_relevant(const struct PoseBackup *pose_backup);
+void BKE_pose_backup_restore(const struct PoseBackup *pbd);
+void BKE_pose_backup_free(struct PoseBackup *pbd);
+
+/**
+ * Create a backup of those bones that are animated in the given action.
+ *
+ * The backup is owned by the Object, and there can be only one backup at a time.
+ * It should be freed with `BKE_pose_backup_clear(ob)`.
+ */
+void BKE_pose_backup_create_on_object(struct Object *ob, const struct bAction *action);
+
+/**
+ * Restore the pose backup owned by this OBject.
+ *
+ * \return true on success, false if there was no pose backup to restore.
+ *
+ * \see #BKE_pose_backup_create_on_object
+ */
+bool BKE_pose_backup_restore_on_object(struct Object *ob);
+
+/**
+ * Free the pose backup that was stored on this object's runtime data.
+ */
+void BKE_pose_backup_clear(struct Object *ob);
+
+#ifdef __cplusplus
+}
+#endif
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@ -254,6 +254,7 @@ set(SRC
  intern/pbvh_uv_islands.cc
  intern/pointcache.c
  intern/pointcloud.cc
+  intern/pose_backup.cc
  intern/preferences.c
  intern/report.c
  intern/rigidbody.c
@ -451,6 +452,7 @@ set(SRC
  BKE_pbvh_pixels.hh
  BKE_pointcache.h
  BKE_pointcloud.h
+  BKE_pose_backup.h
  BKE_preferences.h
  BKE_report.h
  BKE_rigidbody.h
--- a/source/blender/blenkernel/intern/action.c
+++ b/source/blender/blenkernel/intern/action.c
@ -108,7 +108,7 @@ static void action_copy_data(Main *UNUSED(bmain), ID *id_dst, const ID *id_src,
    /* Duplicate F-Curve. */

    /* XXX TODO: pass subdata flag?
-     * But surprisingly does not seem to be doing any ID refcounting... */
+     * But surprisingly does not seem to be doing any ID reference-counting. */
    fcurve_dst = BKE_fcurve_copy(fcurve_src);

    BLI_addtail(&action_dst->curves, fcurve_dst);
--- a/source/blender/blenkernel/intern/blendfile.c
+++ b/source/blender/blenkernel/intern/blendfile.c
@ -416,9 +416,9 @@ static void setup_app_data(bContext *C,
     * means that we do not reset their user count, however we do increase that one when doing
     * lib_link on local IDs using linked ones.
     * There is no real way to predict amount of changes here, so we have to fully redo
-     * refcounting.
-     * Now that we re-use (and do not liblink in readfile.c) most local datablocks as well, we have
-     * to recompute refcount for all local IDs too. */
+     * reference-counting.
+     * Now that we re-use (and do not liblink in readfile.c) most local data-blocks as well,
+     * we have to recompute reference-counts for all local IDs too. */
    BKE_main_id_refcount_recompute(bmain, false);
  }

--- a/source/blender/blenkernel/intern/cloth.cc
+++ b/source/blender/blenkernel/intern/cloth.cc
@ -1460,9 +1460,9 @@ static bool cloth_build_springs(ClothModifierData *clmd, Mesh *mesh)
  Cloth *cloth = clmd->clothObject;
  ClothSpring *spring = nullptr, *tspring = nullptr, *tspring2 = nullptr;
  uint struct_springs = 0, shear_springs = 0, bend_springs = 0, struct_springs_real = 0;
-  uint mvert_num = (uint)mesh->totvert;
+  uint mvert_num = uint(mesh->totvert);
  uint numedges = uint(mesh->totedge);
-  uint numpolys = (uint)mesh->totpoly;
+  uint numpolys = uint(mesh->totpoly);
  float shrink_factor;
  const MEdge *medge = BKE_mesh_edges(mesh);
  const MPoly *mpoly = BKE_mesh_polys(mesh);
@ -1647,7 +1647,7 @@ static bool cloth_build_springs(ClothModifierData *clmd, Mesh *mesh)
  for (int i = 0; i < mvert_num; i++) {
    if (cloth->verts[i].spring_count > 0) {
      cloth->verts[i].avg_spring_len = cloth->verts[i].avg_spring_len * 0.49f /
-                                       (float(cloth->verts[i].spring_count));
+                                       float(cloth->verts[i].spring_count);
    }
  }

--- a/source/blender/blenkernel/intern/context.cc
+++ b/source/blender/blenkernel/intern/context.cc
@ -1511,8 +1511,8 @@ Depsgraph *CTX_data_expect_evaluated_depsgraph(const bContext *C)
 {
  Depsgraph *depsgraph = CTX_data_depsgraph_pointer(C);
  /* TODO(sergey): Assert that the dependency graph is fully evaluated.
-   * Note that first the depsgraph and scene post-eval hooks needs to run extra round of updates
-   * first to make check here really reliable. */
+   * Note that first the depsgraph and scene post-evaluation hooks needs to run extra round of
+   * updates first to make check here really reliable. */
  return depsgraph;
 }

--- a/source/blender/blenkernel/intern/curves_geometry.cc
+++ b/source/blender/blenkernel/intern/curves_geometry.cc
@ -38,8 +38,6 @@ static const std::string ATTR_HANDLE_POSITION_RIGHT = "handle_right";
 static const std::string ATTR_NURBS_ORDER = "nurbs_order";
 static const std::string ATTR_NURBS_WEIGHT = "nurbs_weight";
 static const std::string ATTR_NURBS_KNOTS_MODE = "knots_mode";
-static const std::string ATTR_SELECTION_POINT_FLOAT = ".selection_point_float";
-static const std::string ATTR_SELECTION_CURVE_FLOAT = ".selection_curve_float";
 static const std::string ATTR_SURFACE_UV_COORDINATE = "surface_uv_coordinate";

 /* -------------------------------------------------------------------- */
@ -433,26 +431,6 @@ MutableSpan<float2> CurvesGeometry::surface_uv_coords_for_write()
  return get_mutable_attribute<float2>(*this, ATTR_DOMAIN_CURVE, ATTR_SURFACE_UV_COORDINATE);
 }

-VArray<float> CurvesGeometry::selection_point_float() const
-{
-  return get_varray_attribute<float>(*this, ATTR_DOMAIN_POINT, ATTR_SELECTION_POINT_FLOAT, 1.0f);
-}
-
-MutableSpan<float> CurvesGeometry::selection_point_float_for_write()
-{
-  return get_mutable_attribute<float>(*this, ATTR_DOMAIN_POINT, ATTR_SELECTION_POINT_FLOAT, 1.0f);
-}
-
-VArray<float> CurvesGeometry::selection_curve_float() const
-{
-  return get_varray_attribute<float>(*this, ATTR_DOMAIN_CURVE, ATTR_SELECTION_CURVE_FLOAT, 1.0f);
-}
-
-MutableSpan<float> CurvesGeometry::selection_curve_float_for_write()
-{
-  return get_mutable_attribute<float>(*this, ATTR_DOMAIN_CURVE, ATTR_SELECTION_CURVE_FLOAT, 1.0f);
-}
-
 /** \} */

 /* -------------------------------------------------------------------- */
--- a/source/blender/blenkernel/intern/geometry_component_mesh.cc
+++ b/source/blender/blenkernel/intern/geometry_component_mesh.cc
@ -1117,15 +1117,18 @@ class VertexGroupsAttributeProvider final : public DynamicAttributesProvider {
      return true;
    }

-    for (MDeformVert &dvert : mesh->deform_verts_for_write()) {
-      MDeformWeight *weight = BKE_defvert_find_index(&dvert, index);
-      BKE_defvert_remove_group(&dvert, weight);
-      for (MDeformWeight &weight : MutableSpan(dvert.dw, dvert.totweight)) {
-        if (weight.def_nr > index) {
-          weight.def_nr--;
+    MutableSpan<MDeformVert> dverts = mesh->deform_verts_for_write();
+    threading::parallel_for(dverts.index_range(), 1024, [&](IndexRange range) {
+      for (MDeformVert &dvert : dverts.slice(range)) {
+        MDeformWeight *weight = BKE_defvert_find_index(&dvert, index);
+        BKE_defvert_remove_group(&dvert, weight);
+        for (MDeformWeight &weight : MutableSpan(dvert.dw, dvert.totweight)) {
+          if (weight.def_nr > index) {
+            weight.def_nr--;
+          }
        }
      }
-    }
+    });
    return true;
  }

--- a/source/blender/blenkernel/intern/key.cc
+++ b/source/blender/blenkernel/intern/key.cc
@ -732,7 +732,7 @@ static void cp_key(const int start,

    if (flagflo) {
      ktot += start * kd;
-      a = (int)floor(ktot);
+      a = int(floor(ktot));
      if (a) {
        ktot -= a;
        k1 += a * key->elemsize;
@ -1078,7 +1078,7 @@ static void do_key(const int start,
    if (flagdo & 1) {
      if (flagflo & 1) {
        k1tot += start * k1d;
-        a = (int)floor(k1tot);
+        a = int(floor(k1tot));
        if (a) {
          k1tot -= a;
          k1 += a * key->elemsize;
@ -1091,7 +1091,7 @@ static void do_key(const int start,
    if (flagdo & 2) {
      if (flagflo & 2) {
        k2tot += start * k2d;
-        a = (int)floor(k2tot);
+        a = int(floor(k2tot));
        if (a) {
          k2tot -= a;
          k2 += a * key->elemsize;
@ -1104,7 +1104,7 @@ static void do_key(const int start,
    if (flagdo & 4) {
      if (flagflo & 4) {
        k3tot += start * k3d;
-        a = (int)floor(k3tot);
+        a = int(floor(k3tot));
        if (a) {
          k3tot -= a;
          k3 += a * key->elemsize;
@ -1117,7 +1117,7 @@ static void do_key(const int start,
    if (flagdo & 8) {
      if (flagflo & 8) {
        k4tot += start * k4d;
-        a = (int)floor(k4tot);
+        a = int(floor(k4tot));
        if (a) {
          k4tot -= a;
          k4 += a * key->elemsize;
@ -1661,7 +1661,7 @@ int BKE_keyblock_element_count(const Key *key)

 size_t BKE_keyblock_element_calc_size_from_shape(const Key *key, const int shape_index)
 {
-  return (size_t)BKE_keyblock_element_count_from_shape(key, shape_index) * key->elemsize;
+  return size_t(BKE_keyblock_element_count_from_shape(key, shape_index)) * key->elemsize;
 }

 size_t BKE_keyblock_element_calc_size(const Key *key)
--- a/source/blender/blenkernel/intern/layer.cc
+++ b/source/blender/blenkernel/intern/layer.cc
@ -1057,7 +1057,7 @@ static void layer_collection_objects_sync(ViewLayer *view_layer,
    }

    /* Holdout and indirect only */
-    if ((layer->flag & LAYER_COLLECTION_HOLDOUT)) {
+    if (layer->flag & LAYER_COLLECTION_HOLDOUT) {
      base->flag_from_collection |= BASE_HOLDOUT;
    }
    if (layer->flag & LAYER_COLLECTION_INDIRECT_ONLY) {
--- a/source/blender/blenkernel/intern/lib_id.c
+++ b/source/blender/blenkernel/intern/lib_id.c
@ -322,8 +322,8 @@ void id_us_min(ID *id)

    if (id->us <= limit) {
      if (!ID_TYPE_IS_DEPRECATED(GS(id->name))) {
-        /* Do not assert on deprecated ID types, we cannot really ensure that their ID refcounting
-         * is valid... */
+        /* Do not assert on deprecated ID types, we cannot really ensure that their ID
+         * reference-counting is valid. */
        CLOG_ERROR(&LOG,
                   "ID user decrement error: %s (from '%s'): %d <= %d",
                   id->name,
--- a/source/blender/blenkernel/intern/lib_query.c
+++ b/source/blender/blenkernel/intern/lib_query.c
@ -261,7 +261,7 @@ static bool library_foreach_ID_link(Main *bmain,
     * (the node tree), but re-use those generated for the 'owner' ID (the material). */
    if (inherit_data == NULL) {
      data.cb_flag = ID_IS_LINKED(id) ? IDWALK_CB_INDIRECT_USAGE : 0;
-      /* When an ID is defined as not refcounting its ID usages, it should never do it. */
+      /* When an ID is defined as not reference-counting its ID usages, it should never do it. */
      data.cb_flag_clear = (id->tag & LIB_TAG_NO_USER_REFCOUNT) ?
                               IDWALK_CB_USER | IDWALK_CB_USER_ONE :
                               0;
--- a/source/blender/blenkernel/intern/mesh_convert.cc
+++ b/source/blender/blenkernel/intern/mesh_convert.cc
@ -143,9 +143,7 @@ static void make_edges_mdata_extend(Mesh &mesh)
 static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispbase)
 {
  using namespace blender::bke;
-  const float *data;
-  int a, b, ofs, vertcount, startvert, totvert = 0, totedge = 0, totloop = 0, totpoly = 0;
-  int p1, p2, p3, p4, *index;
+  int a, b, ofs;
  const bool conv_polys = (
      /* 2D polys are filled with #DispList.type == #DL_INDEX3. */
      (CU_DO_2DFILL(cu) == false) ||
@ -153,6 +151,10 @@ static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispba
      BKE_curve_type_get(cu) == OB_SURF);

  /* count */
+  int totvert = 0;
+  int totedge = 0;
+  int totpoly = 0;
+  int totloop = 0;
  LISTBASE_FOREACH (const DispList *, dl, dispbase) {
    if (dl->type == DL_SEGM) {
      totvert += dl->parts * dl->nr;
@ -193,117 +195,110 @@ static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispba
  MutableSpan<MPoly> polys = mesh->polys_for_write();
  MutableSpan<MLoop> loops = mesh->loops_for_write();

-  MVert *mvert = verts.data();
-  MEdge *medge = edges.data();
-  MPoly *mpoly = polys.data();
-  MLoop *mloop = loops.data();
  MutableAttributeAccessor attributes = mesh->attributes_for_write();
  SpanAttributeWriter<int> material_indices = attributes.lookup_or_add_for_write_only_span<int>(
      "material_index", ATTR_DOMAIN_FACE);
  MLoopUV *mloopuv = static_cast<MLoopUV *>(CustomData_add_layer_named(
      &mesh->ldata, CD_MLOOPUV, CD_SET_DEFAULT, nullptr, mesh->totloop, DATA_("UVMap")));

-  /* verts and faces */
-  vertcount = 0;
-
+  int dst_vert = 0;
+  int dst_edge = 0;
+  int dst_poly = 0;
+  int dst_loop = 0;
  LISTBASE_FOREACH (const DispList *, dl, dispbase) {
    const bool is_smooth = (dl->rt & CU_SMOOTH) != 0;

    if (dl->type == DL_SEGM) {
-      startvert = vertcount;
+      const int startvert = dst_vert;
      a = dl->parts * dl->nr;
-      data = dl->verts;
+      const float *data = dl->verts;
      while (a--) {
-        copy_v3_v3(mvert->co, data);
+        copy_v3_v3(verts[dst_vert].co, data);
        data += 3;
-        vertcount++;
-        mvert++;
+        dst_vert++;
      }

      for (a = 0; a < dl->parts; a++) {
        ofs = a * dl->nr;
        for (b = 1; b < dl->nr; b++) {
-          medge->v1 = startvert + ofs + b - 1;
-          medge->v2 = startvert + ofs + b;
-          medge->flag = ME_EDGEDRAW;
+          edges[dst_edge].v1 = startvert + ofs + b - 1;
+          edges[dst_edge].v2 = startvert + ofs + b;
+          edges[dst_edge].flag = ME_EDGEDRAW;

-          medge++;
+          dst_edge++;
        }
      }
    }
    else if (dl->type == DL_POLY) {
      if (conv_polys) {
-        startvert = vertcount;
+        const int startvert = dst_vert;
        a = dl->parts * dl->nr;
-        data = dl->verts;
+        const float *data = dl->verts;
        while (a--) {
-          copy_v3_v3(mvert->co, data);
+          copy_v3_v3(verts[dst_vert].co, data);
          data += 3;
-          vertcount++;
-          mvert++;
+          dst_vert++;
        }

        for (a = 0; a < dl->parts; a++) {
          ofs = a * dl->nr;
          for (b = 0; b < dl->nr; b++) {
-            medge->v1 = startvert + ofs + b;
+            edges[dst_edge].v1 = startvert + ofs + b;
            if (b == dl->nr - 1) {
-              medge->v2 = startvert + ofs;
+              edges[dst_edge].v2 = startvert + ofs;
            }
            else {
-              medge->v2 = startvert + ofs + b + 1;
+              edges[dst_edge].v2 = startvert + ofs + b + 1;
            }
-            medge->flag = ME_EDGEDRAW;
-            medge++;
+            edges[dst_edge].flag = ME_EDGEDRAW;
+            dst_edge++;
          }
        }
      }
    }
    else if (dl->type == DL_INDEX3) {
-      startvert = vertcount;
+      const int startvert = dst_vert;
      a = dl->nr;
-      data = dl->verts;
+      const float *data = dl->verts;
      while (a--) {
-        copy_v3_v3(mvert->co, data);
+        copy_v3_v3(verts[dst_vert].co, data);
        data += 3;
-        vertcount++;
-        mvert++;
+        dst_vert++;
      }

      a = dl->parts;
-      index = dl->index;
+      const int *index = dl->index;
      while (a--) {
-        mloop[0].v = startvert + index[0];
-        mloop[1].v = startvert + index[2];
-        mloop[2].v = startvert + index[1];
-        mpoly->loopstart = int(mloop - loops.data());
-        mpoly->totloop = 3;
-        material_indices.span[mpoly - polys.data()] = dl->col;
+        loops[dst_loop + 0].v = startvert + index[0];
+        loops[dst_loop + 1].v = startvert + index[2];
+        loops[dst_loop + 2].v = startvert + index[1];
+        polys[dst_poly].loopstart = dst_loop;
+        polys[dst_poly].totloop = 3;
+        material_indices.span[dst_poly] = dl->col;

        if (mloopuv) {
          for (int i = 0; i < 3; i++, mloopuv++) {
-            mloopuv->uv[0] = (mloop[i].v - startvert) / float(dl->nr - 1);
+            mloopuv->uv[0] = (loops[dst_loop + i].v - startvert) / float(dl->nr - 1);
            mloopuv->uv[1] = 0.0f;
          }
        }

        if (is_smooth) {
-          mpoly->flag |= ME_SMOOTH;
+          polys[dst_poly].flag |= ME_SMOOTH;
        }
-        mpoly++;
-        mloop += 3;
+        dst_poly++;
+        dst_loop += 3;
        index += 3;
      }
    }
    else if (dl->type == DL_SURF) {
-      startvert = vertcount;
+      const int startvert = dst_vert;
      a = dl->parts * dl->nr;
-      data = dl->verts;
+      const float *data = dl->verts;
      while (a--) {
-        copy_v3_v3(mvert->co, data);
+        copy_v3_v3(verts[dst_vert].co, data);
        data += 3;
-        vertcount++;
-        mvert++;
+        dst_vert++;
      }

      for (a = 0; a < dl->parts; a++) {
@ -312,6 +307,7 @@ static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispba
          break;
        }

+        int p1, p2, p3, p4;
        if (dl->flag & DL_CYCL_U) {    /* p2 -> p1 -> */
          p1 = startvert + dl->nr * a; /* p4 -> p3 -> */
          p2 = p1 + dl->nr - 1;        /* -----> next row */
@ -332,13 +328,13 @@ static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispba
        }

        for (; b < dl->nr; b++) {
-          mloop[0].v = p1;
-          mloop[1].v = p3;
-          mloop[2].v = p4;
-          mloop[3].v = p2;
-          mpoly->loopstart = int(mloop - loops.data());
-          mpoly->totloop = 4;
-          material_indices.span[mpoly - polys.data()] = dl->col;
+          loops[dst_loop + 0].v = p1;
+          loops[dst_loop + 1].v = p3;
+          loops[dst_loop + 2].v = p4;
+          loops[dst_loop + 3].v = p2;
+          polys[dst_poly].loopstart = dst_loop;
+          polys[dst_poly].totloop = 4;
+          material_indices.span[dst_poly] = dl->col;

          if (mloopuv) {
            int orco_sizeu = dl->nr - 1;
@ -357,7 +353,7 @@ static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispba

            for (int i = 0; i < 4; i++, mloopuv++) {
              /* find uv based on vertex index into grid array */
-              int v = mloop[i].v - startvert;
+              int v = loops[dst_loop + i].v - startvert;

              mloopuv->uv[0] = (v / dl->nr) / float(orco_sizev);
              mloopuv->uv[1] = (v % dl->nr) / float(orco_sizeu);
@ -373,10 +369,10 @@ static Mesh *mesh_nurbs_displist_to_mesh(const Curve *cu, const ListBase *dispba
          }

          if (is_smooth) {
-            mpoly->flag |= ME_SMOOTH;
+            polys[dst_poly].flag |= ME_SMOOTH;
          }
-          mpoly++;
-          mloop += 4;
+          dst_poly++;
+          dst_loop += 4;

          p4 = p3;
          p3++;
@ -1066,7 +1062,7 @@ Mesh *BKE_mesh_new_from_object_to_bmain(Main *bmain,
   * everything is only allowed to reference original data-blocks.
   *
   * Note that user-count updates has to be done *after* mesh has been transferred to Main database
-   * (since doing refcounting on non-Main IDs is forbidden). */
+   * (since doing reference-counting on non-Main IDs is forbidden). */
  BKE_library_foreach_ID_link(
      nullptr, &mesh->id, foreach_libblock_make_original_callback, nullptr, IDWALK_NOP);

--- a/source/blender/blenkernel/intern/mesh_normals.cc
+++ b/source/blender/blenkernel/intern/mesh_normals.cc
@ -972,7 +972,7 @@ static void loop_manifold_fan_around_vert_next(const Span<MLoop> loops,
  const uint vert_fan_next = loops[*r_mlfan_curr_index].v;
  const MPoly &mpfan_next = polys[*r_mpfan_curr_index];
  if ((vert_fan_orig == vert_fan_next && vert_fan_orig == mv_pivot_index) ||
-      (vert_fan_orig != vert_fan_next && vert_fan_orig != mv_pivot_index)) {
+      (!ELEM(vert_fan_orig, vert_fan_next, mv_pivot_index))) {
    /* We need the previous loop, but current one is our vertex's loop. */
    *r_mlfan_vert_index = *r_mlfan_curr_index;
    if (--(*r_mlfan_curr_index) < mpfan_next.loopstart) {
--- a/source/blender/blenkernel/intern/mesh_remap.cc
+++ b/source/blender/blenkernel/intern/mesh_remap.cc
@ -1592,7 +1592,7 @@ void BKE_mesh_remap_calc_loops_from_mesh(const int mode,
        }
      }

-      if ((size_t)mp_dst->totloop > islands_res_buff_size) {
+      if (size_t(mp_dst->totloop) > islands_res_buff_size) {
        islands_res_buff_size = size_t(mp_dst->totloop) + MREMAP_DEFAULT_BUFSIZE;
        for (tindex = 0; tindex < num_trees; tindex++) {
          islands_res[tindex] = static_cast<IslandResult *>(
@ -2257,7 +2257,7 @@ void BKE_mesh_remap_calc_polys_from_mesh(const int mode,
       */
      RNG *rng = BLI_rng_new(0);

-      const size_t numpolys_src = (size_t)me_src->totpoly;
+      const size_t numpolys_src = size_t(me_src->totpoly);

      /* Here it's simpler to just allocate for all polys :/ */
      int *indices = static_cast<int *>(MEM_mallocN(sizeof(*indices) * numpolys_src, __func__));
--- a/source/blender/blenkernel/intern/node.cc
+++ b/source/blender/blenkernel/intern/node.cc
@ -2952,11 +2952,6 @@ void nodeRebuildIDVector(bNodeTree *node_tree)

 namespace blender::bke {

-/**
- * Free the node itself.
- *
- * \note: ID user refcounting and changing the `nodes_by_id` vector are up to the caller.
- */
 void node_free_node(bNodeTree *ntree, bNode *node)
 {
  /* since it is called while free database, node->id is undefined */
@ -3031,7 +3026,7 @@ void ntreeFreeLocalNode(bNodeTree *ntree, bNode *node)
 void nodeRemoveNode(Main *bmain, bNodeTree *ntree, bNode *node, bool do_id_user)
 {
  /* This function is not for localized node trees, we do not want
-   * do to ID user refcounting and removal of animdation data then. */
+   * do to ID user reference-counting and removal of animdation data then. */
  BLI_assert((ntree->id.tag & LIB_TAG_LOCALIZED) == 0);

  bool node_has_id = false;
@ -3558,8 +3553,6 @@ void nodeSetActive(bNodeTree *ntree, bNode *node)
  node->flag |= flags_to_set;
 }

-
-
 void nodeSetSocketAvailability(bNodeTree *ntree, bNodeSocket *sock, bool is_available)
 {
  const bool was_available = (sock->flag & SOCK_UNAVAIL) == 0;
--- a/source/blender/blenkernel/intern/node_runtime.cc
+++ b/source/blender/blenkernel/intern/node_runtime.cc
@ -51,6 +51,10 @@ static void update_link_vector(const bNodeTree &ntree)
  bNodeTreeRuntime &tree_runtime = *ntree.runtime;
  tree_runtime.links.clear();
  LISTBASE_FOREACH (bNodeLink *, link, &ntree.links) {
+    /* Check that the link connects nodes within this tree. */
+    BLI_assert(tree_runtime.nodes_by_id.contains(link->fromnode));
+    BLI_assert(tree_runtime.nodes_by_id.contains(link->tonode));
+
    tree_runtime.links.append(link);
  }
 }
--- a/source/blender/blenkernel/intern/object.cc
+++ b/source/blender/blenkernel/intern/object.cc
@ -117,6 +117,7 @@
 #include "BKE_pbvh.h"
 #include "BKE_pointcache.h"
 #include "BKE_pointcloud.h"
+#include "BKE_pose_backup.h"
 #include "BKE_rigidbody.h"
 #include "BKE_scene.h"
 #include "BKE_shader_fx.h"
@ -1814,6 +1815,7 @@ void BKE_object_free_derived_caches(Object *ob)
  }

  BKE_object_to_mesh_clear(ob);
+  BKE_pose_backup_clear(ob);
  BKE_object_to_curve_clear(ob);
  BKE_object_free_curve_cache(ob);

@ -2889,6 +2891,7 @@ void BKE_object_obdata_size_init(struct Object *ob, const float size)
    case OB_LAMP: {
      Light *lamp = (Light *)ob->data;
      lamp->dist *= size;
+      lamp->radius *= size;
      lamp->area_size *= size;
      lamp->area_sizey *= size;
      lamp->area_sizez *= size;
@ -5132,6 +5135,7 @@ void BKE_object_runtime_reset_on_copy(Object *object, const int /*flag*/)
  runtime->mesh_deform_eval = nullptr;
  runtime->curve_cache = nullptr;
  runtime->object_as_temp_mesh = nullptr;
+  runtime->pose_backup = nullptr;
  runtime->object_as_temp_curve = nullptr;
  runtime->geometry_set_eval = nullptr;

--- a/source/blender/blenkernel/intern/pbvh_uv_islands.cc
+++ b/source/blender/blenkernel/intern/pbvh_uv_islands.cc
@ -36,7 +36,7 @@ MeshUVVert *MeshPrimitive::get_other_uv_vertex(const MeshVertex *v1, const MeshV
  BLI_assert(vertices[0].vertex == v1 || vertices[1].vertex == v1 || vertices[2].vertex == v1);
  BLI_assert(vertices[0].vertex == v2 || vertices[1].vertex == v2 || vertices[2].vertex == v2);
  for (MeshUVVert &uv_vertex : vertices) {
-    if (uv_vertex.vertex != v1 && uv_vertex.vertex != v2) {
+    if (!ELEM(uv_vertex.vertex, v1, v2)) {
      return &uv_vertex;
    }
  }
--- a/source/blender/blenkernel/intern/pose_backup.cc
+++ b/source/blender/blenkernel/intern/pose_backup.cc
@ -1,10 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */

 /** \file
- * \ingroup edarmature
+ * \ingroup bke
 */

-#include "ED_armature.h"
+#include "BKE_pose_backup.h"

 #include <cstring>

@ -38,6 +38,14 @@ struct PoseBackup {
  ListBase /* PoseChannelBackup* */ backups;
 };

+/**
+ * Create a backup of the pose, for only those bones that are animated in the
+ * given Action. If `selected_bone_names` is not empty, the set of bones to back
+ * up is intersected with these bone names such that only the selected subset is
+ * backed up.
+ *
+ * The returned pointer is owned by the caller.
+ */
 static PoseBackup *pose_backup_create(const Object *ob,
                                      const bAction *action,
                                      const BoneNameSet &selected_bone_names)
@ -86,24 +94,24 @@ static PoseBackup *pose_backup_create(const Object *ob,
  return pose_backup;
 }

-PoseBackup *ED_pose_backup_create_all_bones(const Object *ob, const bAction *action)
+PoseBackup *BKE_pose_backup_create_all_bones(const Object *ob, const bAction *action)
 {
  return pose_backup_create(ob, action, BoneNameSet());
 }

-PoseBackup *ED_pose_backup_create_selected_bones(const Object *ob, const bAction *action)
+PoseBackup *BKE_pose_backup_create_selected_bones(const Object *ob, const bAction *action)
 {
  const bArmature *armature = static_cast<const bArmature *>(ob->data);
  const BoneNameSet selected_bone_names = BKE_armature_find_selected_bone_names(armature);
  return pose_backup_create(ob, action, selected_bone_names);
 }

-bool ED_pose_backup_is_selection_relevant(const struct PoseBackup *pose_backup)
+bool BKE_pose_backup_is_selection_relevant(const struct PoseBackup *pose_backup)
 {
  return pose_backup->is_bone_selection_relevant;
 }

-void ED_pose_backup_restore(const PoseBackup *pbd)
+void BKE_pose_backup_restore(const PoseBackup *pbd)
 {
  LISTBASE_FOREACH (PoseChannelBackup *, chan_bak, &pbd->backups) {
    memcpy(chan_bak->pchan, &chan_bak->olddata, sizeof(chan_bak->olddata));
@ -117,7 +125,7 @@ void ED_pose_backup_restore(const PoseBackup *pbd)
  }
 }

-void ED_pose_backup_free(PoseBackup *pbd)
+void BKE_pose_backup_free(PoseBackup *pbd)
 {
  LISTBASE_FOREACH_MUTABLE (PoseChannelBackup *, chan_bak, &pbd->backups) {
    if (chan_bak->oldprops) {
@ -127,3 +135,29 @@ void ED_pose_backup_free(PoseBackup *pbd)
  }
  MEM_freeN(pbd);
 }
+
+void BKE_pose_backup_create_on_object(Object *ob, const bAction *action)
+{
+  BKE_pose_backup_clear(ob);
+  PoseBackup *pose_backup = BKE_pose_backup_create_all_bones(ob, action);
+  ob->runtime.pose_backup = pose_backup;
+}
+
+bool BKE_pose_backup_restore_on_object(struct Object *ob)
+{
+  if (ob->runtime.pose_backup == nullptr) {
+    return false;
+  }
+  BKE_pose_backup_restore(ob->runtime.pose_backup);
+  return true;
+}
+
+void BKE_pose_backup_clear(Object *ob)
+{
+  if (ob->runtime.pose_backup == nullptr) {
+    return;
+  }
+
+  BKE_pose_backup_free(ob->runtime.pose_backup);
+  ob->runtime.pose_backup = nullptr;
+}
--- a/source/blender/blenlib/BLI_array_utils.hh
+++ b/source/blender/blenlib/BLI_array_utils.hh
@ -112,4 +112,6 @@ inline void gather(const VArray<T> &src,
  });
 }

+void invert_booleans(MutableSpan<bool> span);
+
 }  // namespace blender::array_utils
--- a/source/blender/blenlib/intern/array_utils.cc
+++ b/source/blender/blenlib/intern/array_utils.cc
@ -33,4 +33,13 @@ void gather(const GSpan src, const IndexMask indices, GMutableSpan dst, const in
  gather(GVArray::ForSpan(src), indices, dst, grain_size);
 }

+void invert_booleans(MutableSpan<bool> span)
+{
+  threading::parallel_for(span.index_range(), 4096, [&](IndexRange range) {
+    for (const int i : range) {
+      span[i] = !span[i];
+    }
+  });
+}
+
 }  // namespace blender::array_utils
--- a/source/blender/blenloader/intern/versioning_280.c
+++ b/source/blender/blenloader/intern/versioning_280.c
@ -1782,12 +1782,6 @@ void blo_do_versions_280(FileData *fd, Library *UNUSED(lib), Main *bmain)
  }

  if (!MAIN_VERSION_ATLEAST(bmain, 280, 1)) {
-    if (!DNA_struct_elem_find(fd->filesdna, "Lamp", "float", "bleedexp")) {
-      for (Light *la = bmain->lights.first; la; la = la->id.next) {
-        la->bleedexp = 2.5f;
-      }
-    }
-
    if (!DNA_struct_elem_find(fd->filesdna, "GPUDOFSettings", "float", "ratio")) {
      for (Camera *ca = bmain->cameras.first; ca; ca = ca->id.next) {
        ca->gpu_dof.ratio = 1.0f;
@ -1820,7 +1814,6 @@ void blo_do_versions_280(FileData *fd, Library *UNUSED(lib), Main *bmain)
      for (Light *la = bmain->lights.first; la; la = la->id.next) {
        la->contact_dist = 0.2f;
        la->contact_bias = 0.03f;
-        la->contact_spread = 0.2f;
        la->contact_thickness = 0.2f;
      }
    }
--- a/source/blender/blenloader/intern/versioning_300.cc
+++ b/source/blender/blenloader/intern/versioning_300.cc
@ -28,6 +28,7 @@
 #include "DNA_curves_types.h"
 #include "DNA_genfile.h"
 #include "DNA_gpencil_modifier_types.h"
+#include "DNA_light_types.h"
 #include "DNA_lineart_types.h"
 #include "DNA_listBase.h"
 #include "DNA_mask_types.h"
@ -50,6 +51,7 @@
 #include "BKE_collection.h"
 #include "BKE_colortools.h"
 #include "BKE_curve.h"
+#include "BKE_curves.hh"
 #include "BKE_data_transfer.h"
 #include "BKE_deform.h"
 #include "BKE_fcurve.h"
@ -3789,7 +3791,7 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
    LISTBASE_FOREACH (MovieClip *, clip, &bmain->movieclips) {
      MovieTracking *tracking = &clip->tracking;

-      const float frame_center_x = (float(clip->lastsize[0])) / 2;
+      const float frame_center_x = float(clip->lastsize[0]) / 2;
      const float frame_center_y = float(clip->lastsize[1]) / 2;

      tracking->camera.principal_point[0] = (tracking->camera.principal_legacy[0] -
@ -3828,13 +3830,20 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
        LISTBASE_FOREACH (SpaceLink *, sl, &area->spacedata) {
          if (sl->spacetype == SPACE_VIEW3D) {
            View3D *v3d = (View3D *)sl;
-            v3d->overlay.flag |= (int)(V3D_OVERLAY_SCULPT_SHOW_MASK |
-                                       V3D_OVERLAY_SCULPT_SHOW_FACE_SETS);
+            v3d->overlay.flag |= int(V3D_OVERLAY_SCULPT_SHOW_MASK |
+                                     V3D_OVERLAY_SCULPT_SHOW_FACE_SETS);
          }
        }
      }
    }
  }
+
+  if (!MAIN_VERSION_ATLEAST(bmain, 305, 7)) {
+    LISTBASE_FOREACH (Light *, light, &bmain->lights) {
+      light->radius = light->area_size;
+    }
+  }
+
  /**
   * Versioning code until next subversion bump goes here.
   *
@ -3850,5 +3859,9 @@ void blo_do_versions_300(FileData *fd, Library * /*lib*/, Main *bmain)
    LISTBASE_FOREACH (Curves *, curves_id, &bmain->hair_curves) {
      curves_id->flag &= ~CV_SCULPT_SELECTION_ENABLED;
    }
+    LISTBASE_FOREACH (Curves *, curves_id, &bmain->hair_curves) {
+      BKE_id_attribute_rename(&curves_id->id, ".selection_point_float", ".selection", nullptr);
+      BKE_id_attribute_rename(&curves_id->id, ".selection_curve_float", ".selection", nullptr);
+    }
  }
 }
--- a/source/blender/blenloader/intern/versioning_common.h
+++ b/source/blender/blenloader/intern/versioning_common.h
@ -88,8 +88,8 @@ struct bNodeSocket *version_node_add_socket_if_not_exist(struct bNodeTree *ntree
                                                         const char *name);

 /**
- * The versioning code generally expects `SOCK_IS_LINKED` to be set correctly. This function updates
- * the flag on all sockets after changes to the node tree.
+ * The versioning code generally expects `SOCK_IS_LINKED` to be set correctly. This function
+ * updates the flag on all sockets after changes to the node tree.
 */
 void version_socket_update_is_used(bNodeTree *ntree);
 ARegion *do_versions_add_region(int regiontype, const char *name);
--- a/source/blender/bmesh/intern/bmesh_mesh_normals.cc
+++ b/source/blender/bmesh/intern/bmesh_mesh_normals.cc
@ -919,7 +919,7 @@ static void bm_mesh_loops_calc_normals_for_vert_with_clnors(BMesh *bm,
        BLI_linklist_prepend_alloca(&loops_of_vert, l_curr);
        loops_of_vert_count += 1;

-        const uint index_test = (uint)BM_elem_index_get(l_curr);
+        const uint index_test = uint(BM_elem_index_get(l_curr));
        if (index_best > index_test) {
          index_best = index_test;
          link_best = loops_of_vert;
--- a/source/blender/compositor/nodes/COM_OutputFileNode.cc
+++ b/source/blender/compositor/nodes/COM_OutputFileNode.cc
@ -104,7 +104,13 @@ void OutputFileNode::convert_to_operations(NodeConverter &converter,
        char path[FILE_MAX];

        /* combine file path for the input */
-        BLI_path_join(path, FILE_MAX, storage->base_path, sockdata->path);
+        if (sockdata->path[0]) {
+          BLI_path_join(path, FILE_MAX, storage->base_path, sockdata->path);
+        }
+        else {
+          BLI_strncpy(path, storage->base_path, FILE_MAX);
+          BLI_path_slash_ensure(path, FILE_MAX);
+        }

        NodeOperation *output_operation = nullptr;

--- a/source/blender/depsgraph/intern/builder/deg_builder_relations.cc
+++ b/source/blender/depsgraph/intern/builder/deg_builder_relations.cc
@ -1675,8 +1675,11 @@ void DepsgraphRelationBuilder::build_driver_data(ID *id, FCurve *fcu)
        continue;
      }

-      OperationCode target_op = driver_targets_bbone ? OperationCode::BONE_SEGMENTS :
-                                                       OperationCode::BONE_LOCAL;
+      OperationCode target_op = OperationCode::BONE_LOCAL;
+      if (driver_targets_bbone) {
+        target_op = check_pchan_has_bbone_segments(object, pchan) ? OperationCode::BONE_SEGMENTS :
+                                                                    OperationCode::BONE_DONE;
+      }
      OperationKey bone_key(&object->id, NodeType::BONE, pchan->name, target_op);
      add_relation(driver_key, bone_key, "Arm Bone -> Driver -> Bone");
    }
--- a/source/blender/depsgraph/intern/eval/deg_eval.cc
+++ b/source/blender/depsgraph/intern/eval/deg_eval.cc
@ -105,8 +105,7 @@ void evaluate_node(const DepsgraphEvalState *state, OperationNode *operation_nod
   * times.
   * This is a thread-safe modification as the node's flags are only read for a non-scheduled nodes
   * and this node has been scheduled. */
-  operation_node->flag &= ~(DEPSOP_FLAG_DIRECTLY_MODIFIED | DEPSOP_FLAG_NEEDS_UPDATE |
-                            DEPSOP_FLAG_USER_MODIFIED);
+  operation_node->flag &= ~DEPSOP_FLAG_CLEAR_ON_EVAL;
 }

 void deg_task_run_func(TaskPool *pool, void *taskdata)
@ -270,6 +269,10 @@ void schedule_node(DepsgraphEvalState *state,
  bool is_scheduled = atomic_fetch_and_or_uint8((uint8_t *)&node->scheduled, uint8_t(true));
  if (!is_scheduled) {
    if (node->is_noop()) {
+      /* Clear flags to avoid affecting subsequent update propagation.
+       * For normal nodes these are cleared when it is evaluated. */
+      node->flag &= ~DEPSOP_FLAG_CLEAR_ON_EVAL;
+
      /* skip NOOP node, schedule children right away */
      schedule_children(state, node, schedule_fn);
    }
--- a/source/blender/depsgraph/intern/node/deg_node_operation.h
+++ b/source/blender/depsgraph/intern/node/deg_node_operation.h
@ -224,6 +224,10 @@ enum OperationFlag {

  /* Set of flags which gets flushed along the relations. */
  DEPSOP_FLAG_FLUSH = (DEPSOP_FLAG_USER_MODIFIED),
+
+  /* Set of flags which get cleared upon evaluation. */
+  DEPSOP_FLAG_CLEAR_ON_EVAL = (DEPSOP_FLAG_DIRECTLY_MODIFIED | DEPSOP_FLAG_NEEDS_UPDATE |
+                               DEPSOP_FLAG_USER_MODIFIED),
 };

 /* Atomic Operation - Base type for all operations */
--- a/source/blender/draw/engines/eevee/eevee_lights.c
+++ b/source/blender/draw/engines/eevee/eevee_lights.c
@ -45,7 +45,7 @@ static void light_shape_parameters_set(EEVEE_Light *evli, const Light *la, const
    evli->sizey = scale[1] / scale[2];
    evli->spotsize = cosf(la->spotsize * 0.5f);
    evli->spotblend = (1.0f - evli->spotsize) * la->spotblend;
-    evli->radius = max_ff(0.001f, la->area_size);
+    evli->radius = max_ff(0.001f, la->radius);
  }
  else if (la->type == LA_AREA) {
    evli->sizex = max_ff(0.003f, la->area_size * scale[0] * 0.5f);
@ -62,7 +62,7 @@ static void light_shape_parameters_set(EEVEE_Light *evli, const Light *la, const
    evli->radius = max_ff(0.001f, tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f));
  }
  else {
-    evli->radius = max_ff(0.001f, la->area_size);
+    evli->radius = max_ff(0.001f, la->radius);
  }
 }

--- a/source/blender/draw/engines/eevee_next/eevee_light.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_light.cc
@ -178,7 +178,7 @@ void Light::shape_parameters_set(const ::Light *la, const float scale[3])
      _area_size_x = tanf(min_ff(la->sun_angle, DEG2RADF(179.9f)) / 2.0f);
    }
    else {
-      _area_size_x = la->area_size;
+      _area_size_x = la->radius;
    }
    _area_size_y = _area_size_x = max_ff(0.001f, _area_size_x);
    radius_squared = square_f(_area_size_x);
--- a/source/blender/draw/engines/overlay/overlay_engine.cc
+++ b/source/blender/draw/engines/overlay/overlay_engine.cc
@ -92,10 +92,10 @@ static void OVERLAY_engine_init(void *vedata)
  }

  if (ts->sculpt) {
-    if (!(v3d->overlay.flag & (int)V3D_OVERLAY_SCULPT_SHOW_FACE_SETS)) {
+    if (!(v3d->overlay.flag & int(V3D_OVERLAY_SCULPT_SHOW_FACE_SETS))) {
      pd->overlay.sculpt_mode_face_sets_opacity = 0.0f;
    }
-    if (!(v3d->overlay.flag & (int)V3D_OVERLAY_SCULPT_SHOW_MASK)) {
+    if (!(v3d->overlay.flag & int(V3D_OVERLAY_SCULPT_SHOW_MASK))) {
      pd->overlay.sculpt_mode_mask_opacity = 0.0f;
    }
  }
--- a/source/blender/draw/engines/overlay/overlay_extra.cc
+++ b/source/blender/draw/engines/overlay/overlay_extra.cc
@ -637,7 +637,7 @@ void OVERLAY_light_cache_populate(OVERLAY_Data *vedata, Object *ob)
  DRW_buffer_add_entry(cb->groundline, instdata.pos);

  if (la->type == LA_LOCAL) {
-    instdata.area_size_x = instdata.area_size_y = la->area_size;
+    instdata.area_size_x = instdata.area_size_y = la->radius;
    DRW_buffer_add_entry(cb->light_point, color, &instdata);
  }
  else if (la->type == LA_SUN) {
@ -661,7 +661,7 @@ void OVERLAY_light_cache_populate(OVERLAY_Data *vedata, Object *ob)
    instdata.spot_blend = sqrtf((-a - c * a) / (c - c * a));
    instdata.spot_cosine = a;
    /* HACK: We pack the area size in alpha color. This is decoded by the shader. */
-    color[3] = -max_ff(la->area_size, FLT_MIN);
+    color[3] = -max_ff(la->radius, FLT_MIN);
    DRW_buffer_add_entry(cb->light_spot, color, &instdata);

    if ((la->mode & LA_SHOW_CONE) && !DRW_state_is_select()) {
--- a/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
+++ b/source/blender/draw/engines/overlay/overlay_sculpt_curves.cc
@ -10,6 +10,7 @@
 #include "draw_cache_impl.h"
 #include "overlay_private.hh"

+#include "BKE_attribute.hh"
 #include "BKE_curves.hh"

 void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata)
@ -31,18 +32,11 @@ void OVERLAY_sculpt_curves_cache_init(OVERLAY_Data *vedata)

 static bool everything_selected(const Curves &curves_id)
 {
-  const blender::bke::CurvesGeometry &curves = blender::bke::CurvesGeometry::wrap(
-      curves_id.geometry);
-  blender::VArray<float> selection;
-  switch (curves_id.selection_domain) {
-    case ATTR_DOMAIN_POINT:
-      selection = curves.selection_point_float();
-      break;
-    case ATTR_DOMAIN_CURVE:
-      selection = curves.selection_curve_float();
-      break;
-  }
-  return selection.is_single() && selection.get_internal_single() == 1.0f;
+  using namespace blender;
+  const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  const VArray<bool> selection = curves.attributes().lookup_or_default<bool>(
+      ".selection", ATTR_DOMAIN_POINT, true);
+  return selection.is_single() && selection.get_internal_single();
 }

 void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object)
@ -56,12 +50,9 @@ void OVERLAY_sculpt_curves_cache_populate(OVERLAY_Data *vedata, Object *object)
  }

  /* Retrieve the location of the texture. */
-  const char *name = curves->selection_domain == ATTR_DOMAIN_POINT ? ".selection_point_float" :
-                                                                     ".selection_curve_float";
-
  bool is_point_domain;
  GPUVertBuf **texture = DRW_curves_texture_for_evaluated_attribute(
-      curves, name, &is_point_domain);
+      curves, ".selection", &is_point_domain);
  if (texture == nullptr) {
    return;
  }
--- a/source/blender/draw/intern/draw_cache_impl_curves.cc
+++ b/source/blender/draw/intern/draw_cache_impl_curves.cc
@ -11,6 +11,7 @@

 #include "MEM_guardedalloc.h"

+#include "BLI_devirtualize_parameters.hh"
 #include "BLI_listbase.h"
 #include "BLI_math_base.h"
 #include "BLI_math_vec_types.hh"
@ -334,17 +335,16 @@ static void curves_batch_cache_ensure_edit_points_data(const Curves &curves_id,
  GPU_vertbuf_init_with_format(cache.edit_points_data, &format_data);
  GPU_vertbuf_data_alloc(cache.edit_points_data, curves.points_num());

-  VArray<float> selection;
+  const VArray<bool> selection = curves.attributes().lookup_or_default<bool>(
+      ".selection", eAttrDomain(curves_id.selection_domain), true);
  switch (curves_id.selection_domain) {
    case ATTR_DOMAIN_POINT:
-      selection = curves.selection_point_float();
      for (const int point_i : selection.index_range()) {
        const float point_selection = (selection[point_i] > 0.0f) ? 1.0f : 0.0f;
        GPU_vertbuf_attr_set(cache.edit_points_data, color, point_i, &point_selection);
      }
      break;
    case ATTR_DOMAIN_CURVE:
-      selection = curves.selection_curve_float();
      for (const int curve_i : curves.curves_range()) {
        const float curve_selection = (selection[curve_i] > 0.0f) ? 1.0f : 0.0f;
        const IndexRange points = curves.points_for_curve(curve_i);
--- a/source/blender/draw/intern/draw_cache_impl_gpencil.cc
+++ b/source/blender/draw/intern/draw_cache_impl_gpencil.cc
@ -276,7 +276,7 @@ BLI_INLINE int32_t pack_rotation_aspect_hardness(float rot, float asp, float har
  int32_t packed = 0;
  /* Aspect uses 9 bits */
  float asp_normalized = (asp > 1.0f) ? (1.0f / asp) : asp;
-  packed |= (int32_t)unit_float_to_uchar_clamp(asp_normalized);
+  packed |= int32_t(unit_float_to_uchar_clamp(asp_normalized));
  /* Store if inversed in the 9th bit. */
  if (asp > 1.0f) {
    packed |= 1 << 8;
@ -284,13 +284,13 @@ BLI_INLINE int32_t pack_rotation_aspect_hardness(float rot, float asp, float har
  /* Rotation uses 9 bits */
  /* Rotation are in [-90°..90°] range, so we can encode the sign of the angle + the cosine
   * because the cosine will always be positive. */
-  packed |= (int32_t)unit_float_to_uchar_clamp(cosf(rot)) << 9;
+  packed |= int32_t(unit_float_to_uchar_clamp(cosf(rot))) << 9;
  /* Store sine sign in 9th bit. */
  if (rot < 0.0f) {
    packed |= 1 << 17;
  }
  /* Hardness uses 8 bits */
-  packed |= (int32_t)unit_float_to_uchar_clamp(hard) << 18;
+  packed |= int32_t(unit_float_to_uchar_clamp(hard)) << 18;
  return packed;
 }

@ -315,7 +315,7 @@ static void gpencil_buffer_add_point(GPUIndexBufBuilder *ibo,
  /* Encode fill opacity defined by opacity modifier in vertex color alpha. If
   * no opacity modifier, the value will be always 1.0f. The opacity factor can be any
   * value between 0.0f and 2.0f */
-  col->fcol[3] = ((int)(col->fcol[3] * 10000.0f) * 10.0f) + gps->fill_opacity_fac;
+  col->fcol[3] = (int(col->fcol[3] * 10000.0f) * 10.0f) + gps->fill_opacity_fac;

  vert->strength = (round_cap0) ? pt->strength : -pt->strength;
  vert->u_stroke = pt->uv_fac;
@ -579,7 +579,7 @@ bGPDstroke *DRW_cache_gpencil_sbuffer_stroke_data_get(Object *ob)
    gps->runtime.stroke_start = 0;
    copy_v4_v4(gps->vert_color_fill, gpd->runtime.vert_color_fill);
    /* Caps. */
-    gps->caps[0] = gps->caps[1] = (short)brush->gpencil_settings->caps_type;
+    gps->caps[0] = gps->caps[1] = short(brush->gpencil_settings->caps_type);

    gpd->runtime.sbuffer_gps = gps;
  }
--- a/source/blender/editors/armature/CMakeLists.txt
+++ b/source/blender/editors/armature/CMakeLists.txt
@ -30,7 +30,6 @@ set(SRC
  armature_utils.c
  editarmature_undo.c
  meshlaplacian.c
-  pose_backup.cc
  pose_edit.c
  pose_group.c
  pose_lib_2.c
--- a/source/blender/editors/armature/pose_lib_2.c
+++ b/source/blender/editors/armature/pose_lib_2.c
@ -24,6 +24,7 @@
 #include "BKE_context.h"
 #include "BKE_lib_id.h"
 #include "BKE_object.h"
+#include "BKE_pose_backup.h"
 #include "BKE_report.h"

 #include "DEG_depsgraph.h"
@ -37,7 +38,6 @@

 #include "UI_interface.h"

-#include "ED_armature.h"
 #include "ED_asset.h"
 #include "ED_keyframing.h"
 #include "ED_screen.h"
@ -66,13 +66,14 @@ typedef struct PoseBlendData {
  /* For temp-loading the Action from the pose library. */
  AssetTempIDConsumer *temp_id_consumer;

-  /* Blend factor, interval [0, 1] for interpolating between current and given pose. */
+  /* Blend factor, interval [-1, 1] for interpolating between current and given pose.
+   * Positive factors will blend in `act`, whereas negative factors will blend in `act_flipped`. */
  float blend_factor;
  struct PoseBackup *pose_backup;

-  Object *ob;   /* Object to work on. */
-  bAction *act; /* Pose to blend into the current pose. */
-  bool free_action;
+  Object *ob;           /* Object to work on. */
+  bAction *act;         /* Pose to blend into the current pose. */
+  bAction *act_flipped; /* Flipped copy of `act`. */

  Scene *scene;  /* For auto-keying. */
  ScrArea *area; /* For drawing status text. */
@ -83,10 +84,19 @@ typedef struct PoseBlendData {
  char headerstr[UI_MAX_DRAW_STR];
 } PoseBlendData;

+/** Return the bAction that should be blended.
+ * This is either pbd->act or pbd->act_flipped, depending on the sign of the blend factor.
+ */
+static bAction *poselib_action_to_blend(PoseBlendData *pbd)
+{
+  return (pbd->blend_factor >= 0) ? pbd->act : pbd->act_flipped;
+}
+
 /* Makes a copy of the current pose for restoration purposes - doesn't do constraints currently */
 static void poselib_backup_posecopy(PoseBlendData *pbd)
 {
-  pbd->pose_backup = ED_pose_backup_create_selected_bones(pbd->ob, pbd->act);
+  const bAction *action = poselib_action_to_blend(pbd);
+  pbd->pose_backup = BKE_pose_backup_create_selected_bones(pbd->ob, action);

  if (pbd->state == POSE_BLEND_INIT) {
    /* Ready for blending now. */
@ -125,7 +135,7 @@ static void poselib_keytag_pose(bContext *C, Scene *scene, PoseBlendData *pbd)
      continue;
    }

-    if (ED_pose_backup_is_selection_relevant(pbd->pose_backup) &&
+    if (BKE_pose_backup_is_selection_relevant(pbd->pose_backup) &&
        !PBONE_SELECTED(armature, pchan->bone)) {
      continue;
    }
@ -152,7 +162,7 @@ static void poselib_blend_apply(bContext *C, wmOperator *op)
  }
  pbd->needs_redraw = false;

-  ED_pose_backup_restore(pbd->pose_backup);
+  BKE_pose_backup_restore(pbd->pose_backup);

  /* The pose needs updating, whether it's for restoring the original pose or for showing the
   * result of the blend. */
@ -166,15 +176,28 @@ static void poselib_blend_apply(bContext *C, wmOperator *op)
  /* Perform the actual blending. */
  struct Depsgraph *depsgraph = CTX_data_depsgraph_pointer(C);
  AnimationEvalContext anim_eval_context = BKE_animsys_eval_context_construct(depsgraph, 0.0f);
-  BKE_pose_apply_action_blend(pbd->ob, pbd->act, &anim_eval_context, pbd->blend_factor);
+  bAction *to_blend = poselib_action_to_blend(pbd);
+  BKE_pose_apply_action_blend(pbd->ob, to_blend, &anim_eval_context, fabs(pbd->blend_factor));
 }

 /* ---------------------------- */

 static void poselib_blend_set_factor(PoseBlendData *pbd, const float new_factor)
 {
-  pbd->blend_factor = CLAMPIS(new_factor, 0.0f, 1.0f);
+  const bool sign_changed = signf(new_factor) != signf(pbd->blend_factor);
+  if (sign_changed) {
+    /* The zero point was crossed, meaning that the pose will be flipped. This means the pose
+     * backup has to change, as it only contains the bones for one side. */
+    BKE_pose_backup_restore(pbd->pose_backup);
+    BKE_pose_backup_free(pbd->pose_backup);
+  }
+
+  pbd->blend_factor = CLAMPIS(new_factor, -1.0f, 1.0f);
  pbd->needs_redraw = true;
+
+  if (sign_changed) {
+    poselib_backup_posecopy(pbd);
+  }
 }

 /* Return operator return value. */
@ -224,8 +247,6 @@ static int poselib_blend_handle_event(bContext *UNUSED(C), wmOperator *op, const
      pbd->state = pbd->state == POSE_BLEND_BLENDING ? POSE_BLEND_ORIGINAL : POSE_BLEND_BLENDING;
      pbd->needs_redraw = true;
      break;
-
-      /* TODO(Sybren): use better UI for slider. */
  }

  return OPERATOR_RUNNING_MODAL;
@ -292,18 +313,21 @@ static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *
  PoseBlendData *pbd;
  op->customdata = pbd = MEM_callocN(sizeof(PoseBlendData), "PoseLib Preview Data");

-  bAction *action = poselib_blend_init_get_action(C, op);
-  if (action == NULL) {
+  pbd->act = poselib_blend_init_get_action(C, op);
+  if (pbd->act == NULL) {
    return false;
  }

-  /* Maybe flip the Action. */
+  /* Passing `flipped=True` is the same as flipping the sign of the blend factor. */
  const bool apply_flipped = RNA_boolean_get(op->ptr, "flipped");
-  if (apply_flipped) {
-    action = flip_pose(C, ob, action);
-    pbd->free_action = true;
+  const float multiply_factor = apply_flipped ? -1.0f : 1.0f;
+  pbd->blend_factor = multiply_factor * RNA_float_get(op->ptr, "blend_factor");
+
+  /* Only construct the flipped pose if there is a chance it's actually needed. */
+  const bool is_interactive = (event != NULL);
+  if (is_interactive || pbd->blend_factor < 0) {
+    pbd->act_flipped = flip_pose(C, ob, pbd->act);
  }
-  pbd->act = action;

  /* Get the basic data. */
  pbd->ob = ob;
@ -314,12 +338,12 @@ static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *

  pbd->state = POSE_BLEND_INIT;
  pbd->needs_redraw = true;
-  pbd->blend_factor = RNA_float_get(op->ptr, "blend_factor");
+
  /* Just to avoid a clang-analyzer warning (false positive), it's set properly below. */
  pbd->release_confirm_info.use_release_confirm = false;

  /* Release confirm data. Only available if there's an event to work with. */
-  if (event != NULL) {
+  if (is_interactive) {
    PropertyRNA *release_confirm_prop = RNA_struct_find_property(op->ptr, "release_confirm");
    pbd->release_confirm_info.use_release_confirm = (release_confirm_prop != NULL) &&
                                                    RNA_property_boolean_get(op->ptr,
@ -328,10 +352,11 @@ static bool poselib_blend_init_data(bContext *C, wmOperator *op, const wmEvent *
    ED_slider_init(pbd->slider, event);
    ED_slider_factor_set(pbd->slider, pbd->blend_factor);
    ED_slider_allow_overshoot_set(pbd->slider, false);
+    ED_slider_is_bidirectional_set(pbd->slider, true);
  }

  if (pbd->release_confirm_info.use_release_confirm) {
-    BLI_assert(event != NULL);
+    BLI_assert(is_interactive);
    pbd->release_confirm_info.init_event_type = WM_userdef_event_type_from_keymap_type(
        event->type);
  }
@ -369,7 +394,8 @@ static void poselib_blend_cleanup(bContext *C, wmOperator *op)
      poselib_keytag_pose(C, scene, pbd);

      /* Ensure the redo panel has the actually-used value, instead of the initial value. */
-      RNA_float_set(op->ptr, "blend_factor", pbd->blend_factor);
+      RNA_float_set(op->ptr, "blend_factor", fabs(pbd->blend_factor));
+      RNA_boolean_set(op->ptr, "flipped", pbd->blend_factor < 0);
      break;
    }

@ -381,7 +407,7 @@ static void poselib_blend_cleanup(bContext *C, wmOperator *op)
      BKE_report(op->reports, RPT_ERROR, "Internal pose library error, canceling operator");
      ATTR_FALLTHROUGH;
    case POSE_BLEND_CANCEL:
-      ED_pose_backup_restore(pbd->pose_backup);
+      BKE_pose_backup_restore(pbd->pose_backup);
      break;
  }

@ -398,15 +424,13 @@ static void poselib_blend_free(wmOperator *op)
    return;
  }

-  if (pbd->free_action) {
-    /* Run before #poselib_tempload_exit to avoid any problems from indirectly
-     * referenced ID pointers. */
-    BKE_id_free(NULL, pbd->act);
+  if (pbd->act_flipped) {
+    BKE_id_free(NULL, pbd->act_flipped);
  }
  poselib_tempload_exit(pbd);

  /* Free temp data for operator */
-  ED_pose_backup_free(pbd->pose_backup);
+  BKE_pose_backup_free(pbd->pose_backup);
  pbd->pose_backup = NULL;

  MEM_SAFE_FREE(op->customdata);
@ -526,6 +550,8 @@ static bool poselib_blend_poll(bContext *C)

 void POSELIB_OT_apply_pose_asset(wmOperatorType *ot)
 {
+  PropertyRNA *prop;
+
  /* Identifiers: */
  ot->name = "Apply Pose Asset";
  ot->idname = "POSELIB_OT_apply_pose_asset";
@ -542,17 +568,21 @@ void POSELIB_OT_apply_pose_asset(wmOperatorType *ot)
  RNA_def_float_factor(ot->srna,
                       "blend_factor",
                       1.0f,
-                       0.0f,
+                       -1.0f,
                       1.0f,
                       "Blend Factor",
-                       "Amount that the pose is applied on top of the existing poses",
-                       0.0f,
+                       "Amount that the pose is applied on top of the existing poses. A negative "
+                       "value will apply the pose flipped over the X-axis",
+                       -1.0f,
                       1.0f);
-  RNA_def_boolean(ot->srna,
-                  "flipped",
-                  false,
-                  "Apply Flipped",
-                  "When enabled, applies the pose flipped over the X-axis");
+  prop = RNA_def_boolean(
+      ot->srna,
+      "flipped",
+      false,
+      "Apply Flipped",
+      "When enabled, applies the pose flipped over the X-axis. This is the same as "
+      "passing a negative `blend_factor`");
+  RNA_def_property_flag(prop, PROP_SKIP_SAVE);
 }

 void POSELIB_OT_blend_pose_asset(wmOperatorType *ot)
@ -578,22 +608,26 @@ void POSELIB_OT_blend_pose_asset(wmOperatorType *ot)
  prop = RNA_def_float_factor(ot->srna,
                              "blend_factor",
                              0.0f,
-                              0.0f,
+                              -1.0f,
                              1.0f,
                              "Blend Factor",
-                              "Amount that the pose is applied on top of the existing poses",
-                              0.0f,
+                              "Amount that the pose is applied on top of the existing poses. A "
+                              "negative value will apply the pose flipped over the X-axis",
+                              -1.0f,
                              1.0f);
  /* Blending should always start at 0%, and not at whatever percentage was last used. This RNA
   * property just exists for symmetry with the Apply operator (and thus simplicity of the rest of
   * the code, which can assume this property exists). */
  RNA_def_property_flag(prop, PROP_SKIP_SAVE);

-  RNA_def_boolean(ot->srna,
-                  "flipped",
-                  false,
-                  "Apply Flipped",
-                  "When enabled, applies the pose flipped over the X-axis");
+  prop = RNA_def_boolean(ot->srna,
+                         "flipped",
+                         false,
+                         "Apply Flipped",
+                         "When enabled, applies the pose flipped over the X-axis. This is the "
+                         "same as passing a negative `blend_factor`");
+  RNA_def_property_flag(prop, PROP_SKIP_SAVE);
+
  prop = RNA_def_boolean(ot->srna,
                         "release_confirm",
                         false,
--- a/source/blender/editors/curves/CMakeLists.txt
+++ b/source/blender/editors/curves/CMakeLists.txt
@ -22,6 +22,7 @@ set(INC
 set(SRC
  intern/curves_add.cc
  intern/curves_ops.cc
+  intern/curves_selection.cc
 )

 set(LIB
--- a/source/blender/editors/curves/intern/curves_ops.cc
+++ b/source/blender/editors/curves/intern/curves_ops.cc
@ -6,7 +6,9 @@

 #include <atomic>

+#include "BLI_array_utils.hh"
 #include "BLI_devirtualize_parameters.hh"
+#include "BLI_index_mask_ops.hh"
 #include "BLI_utildefines.h"
 #include "BLI_vector_set.hh"

@ -748,7 +750,6 @@ static int curves_set_selection_domain_exec(bContext *C, wmOperator *op)
      continue;
    }

-    const eAttrDomain old_domain = eAttrDomain(curves_id->selection_domain);
    curves_id->selection_domain = domain;

    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id->geometry);
@ -756,18 +757,21 @@ static int curves_set_selection_domain_exec(bContext *C, wmOperator *op)
    if (curves.points_num() == 0) {
      continue;
    }
-
-    if (old_domain == ATTR_DOMAIN_POINT && domain == ATTR_DOMAIN_CURVE) {
-      VArray<float> curve_selection = curves.adapt_domain(
-          curves.selection_point_float(), ATTR_DOMAIN_POINT, ATTR_DOMAIN_CURVE);
-      curve_selection.materialize(curves.selection_curve_float_for_write());
-      attributes.remove(".selection_point_float");
+    const GVArray src = attributes.lookup(".selection", domain);
+    if (src.is_empty()) {
+      continue;
    }
-    else if (old_domain == ATTR_DOMAIN_CURVE && domain == ATTR_DOMAIN_POINT) {
-      VArray<float> point_selection = curves.adapt_domain(
-          curves.selection_curve_float(), ATTR_DOMAIN_CURVE, ATTR_DOMAIN_POINT);
-      point_selection.materialize(curves.selection_point_float_for_write());
-      attributes.remove(".selection_curve_float");
+
+    const CPPType &type = src.type();
+    void *dst = MEM_malloc_arrayN(attributes.domain_size(domain), type.size(), __func__);
+    src.materialize(dst);
+
+    attributes.remove(".selection");
+    if (!attributes.add(".selection",
+                        domain,
+                        bke::cpp_type_to_custom_data_type(type),
+                        bke::AttributeInitMoveArray(dst))) {
+      MEM_freeN(dst);
    }

    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
@ -801,46 +805,54 @@ static void CURVES_OT_set_selection_domain(wmOperatorType *ot)
  RNA_def_property_flag(prop, (PropertyFlag)(PROP_HIDDEN | PROP_SKIP_SAVE));
 }

-static bool varray_contains_nonzero(const VArray<float> &data)
+static bool contains(const VArray<bool> &varray, const bool value)
 {
-  bool contains_nonzero = false;
-  devirtualize_varray(data, [&](const auto array) {
-    for (const int i : data.index_range()) {
-      if (array[i] != 0.0f) {
-        contains_nonzero = true;
-        break;
-      }
-    }
-  });
-  return contains_nonzero;
+  const CommonVArrayInfo info = varray.common_info();
+  if (info.type == CommonVArrayInfo::Type::Single) {
+    return *static_cast<const bool *>(info.data) == value;
+  }
+  if (info.type == CommonVArrayInfo::Type::Span) {
+    const Span<bool> span(static_cast<const bool *>(info.data), varray.size());
+    return threading::parallel_reduce(
+        span.index_range(),
+        4096,
+        false,
+        [&](const IndexRange range, const bool init) {
+          return init || span.slice(range).contains(value);
+        },
+        [&](const bool a, const bool b) { return a || b; });
+  }
+  return threading::parallel_reduce(
+      varray.index_range(),
+      2048,
+      false,
+      [&](const IndexRange range, const bool init) {
+        if (init) {
+          return init;
+        }
+        /* Alternatively, this could use #materialize to retrieve many values at once. */
+        for (const int64_t i : range) {
+          if (varray[i] == value) {
+            return true;
+          }
+        }
+        return false;
+      },
+      [&](const bool a, const bool b) { return a || b; });
 }

 bool has_anything_selected(const Curves &curves_id)
 {
  const CurvesGeometry &curves = CurvesGeometry::wrap(curves_id.geometry);
-  switch (curves_id.selection_domain) {
-    case ATTR_DOMAIN_POINT:
-      return varray_contains_nonzero(curves.selection_point_float());
-    case ATTR_DOMAIN_CURVE:
-      return varray_contains_nonzero(curves.selection_curve_float());
-  }
-  BLI_assert_unreachable();
-  return false;
+  const VArray<bool> selection = curves.attributes().lookup<bool>(".selection");
+  return !selection || contains(selection, true);
 }

-static bool any_point_selected(const CurvesGeometry &curves)
+static bool has_anything_selected(const Span<Curves *> curves_ids)
 {
-  return varray_contains_nonzero(curves.selection_point_float());
-}
-
-static bool any_point_selected(const Span<Curves *> curves_ids)
-{
-  for (const Curves *curves_id : curves_ids) {
-    if (any_point_selected(CurvesGeometry::wrap(curves_id->geometry))) {
-      return true;
-    }
-  }
-  return false;
+  return std::any_of(curves_ids.begin(), curves_ids.end(), [](const Curves *curves_id) {
+    return has_anything_selected(*curves_id);
+  });
 }

 namespace select_all {
@ -854,6 +866,16 @@ static void invert_selection(MutableSpan<float> selection)
  });
 }

+static void invert_selection(GMutableSpan selection)
+{
+  if (selection.type().is<bool>()) {
+    array_utils::invert_booleans(selection.typed<bool>());
+  }
+  else if (selection.type().is<float>()) {
+    invert_selection(selection.typed<float>());
+  }
+}
+
 static int select_all_exec(bContext *C, wmOperator *op)
 {
  int action = RNA_enum_get(op->ptr, "action");
@ -861,27 +883,34 @@ static int select_all_exec(bContext *C, wmOperator *op)
  VectorSet<Curves *> unique_curves = get_unique_editable_curves(*C);

  if (action == SEL_TOGGLE) {
-    action = any_point_selected(unique_curves) ? SEL_DESELECT : SEL_SELECT;
+    action = has_anything_selected(unique_curves) ? SEL_DESELECT : SEL_SELECT;
  }

  for (Curves *curves_id : unique_curves) {
    CurvesGeometry &curves = CurvesGeometry::wrap(curves_id->geometry);
+    bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
    if (action == SEL_SELECT) {
      /* As an optimization, just remove the selection attributes when everything is selected. */
-      bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
-      attributes.remove(".selection_point_float");
-      attributes.remove(".selection_curve_float");
+      attributes.remove(".selection");
+    }
+    else if (!attributes.contains(".selection")) {
+      BLI_assert(ELEM(action, SEL_INVERT, SEL_DESELECT));
+      /* If the attribute doesn't exist and it's either deleted or inverted, create
+       * it with nothing selected, since that means everything was selected before. */
+      attributes.add(".selection",
+                     eAttrDomain(curves_id->selection_domain),
+                     CD_PROP_BOOL,
+                     bke::AttributeInitDefaultValue());
    }
    else {
-      MutableSpan<float> selection = curves_id->selection_domain == ATTR_DOMAIN_POINT ?
-                                         curves.selection_point_float_for_write() :
-                                         curves.selection_curve_float_for_write();
+      bke::GSpanAttributeWriter selection = attributes.lookup_for_write_span(".selection");
      if (action == SEL_DESELECT) {
-        selection.fill(0.0f);
+        fill_selection_false(selection.span);
      }
      else if (action == SEL_INVERT) {
-        invert_selection(selection);
+        invert_selection(selection.span);
      }
+      selection.finish();
    }

    /* Use #ID_RECALC_GEOMETRY instead of #ID_RECALC_SELECT because it is handled as a generic
--- a/source/blender/editors/curves/intern/curves_selection.cc
+++ b/source/blender/editors/curves/intern/curves_selection.cc
@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup edcurves
+ */
+
+#include "BLI_index_mask_ops.hh"
+
+#include "BKE_attribute.hh"
+#include "BKE_curves.hh"
+
+#include "ED_curves.h"
+#include "ED_object.h"
+
+namespace blender::ed::curves {
+
+static IndexMask retrieve_selected_curves(const bke::CurvesGeometry &curves,
+                                          Vector<int64_t> &r_indices)
+{
+  const IndexRange curves_range = curves.curves_range();
+  const bke::AttributeAccessor attributes = curves.attributes();
+
+  /* Interpolate from points to curves manually as a performance improvement, since we are only
+   * interested in whether any point in each curve is selected. Retrieve meta data since
+   * #lookup_or_default from the attribute API doesn't give the domain of the attribute. */
+  std::optional<bke::AttributeMetaData> meta_data = attributes.lookup_meta_data(".selection");
+  if (meta_data && meta_data->domain == ATTR_DOMAIN_POINT) {
+    /* Avoid the interpolation from interpolating the attribute to the
+     * curve domain by retrieving the point domain values directly. */
+    const VArray<bool> selection = attributes.lookup_or_default<bool>(
+        ".selection", ATTR_DOMAIN_POINT, true);
+    if (selection.is_single()) {
+      return selection.get_internal_single() ? IndexMask(curves_range) : IndexMask();
+    }
+    return index_mask_ops::find_indices_based_on_predicate(
+        curves_range, 512, r_indices, [&](const int64_t curve_i) {
+          const IndexRange points = curves.points_for_curve(curve_i);
+          /* The curve is selected if any of its points are selected. */
+          Array<bool, 32> point_selection(points.size());
+          selection.materialize_compressed(points, point_selection);
+          return point_selection.as_span().contains(true);
+        });
+  }
+  const VArray<bool> selection = attributes.lookup_or_default<bool>(
+      ".selection", ATTR_DOMAIN_CURVE, true);
+  return index_mask_ops::find_indices_from_virtual_array(curves_range, selection, 2048, r_indices);
+}
+
+IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_indices)
+{
+  const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  return retrieve_selected_curves(curves, r_indices);
+}
+
+static IndexMask retrieve_selected_points(const bke::CurvesGeometry &curves,
+                                          Vector<int64_t> &r_indices)
+{
+  return index_mask_ops::find_indices_from_virtual_array(
+      curves.points_range(),
+      curves.attributes().lookup_or_default<bool>(".selection", ATTR_DOMAIN_POINT, true),
+      2048,
+      r_indices);
+}
+
+IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices)
+{
+  const bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  return retrieve_selected_points(curves, r_indices);
+}
+
+void ensure_selection_attribute(Curves &curves_id, const eCustomDataType create_type)
+{
+  bke::CurvesGeometry &curves = bke::CurvesGeometry::wrap(curves_id.geometry);
+  bke::MutableAttributeAccessor attributes = curves.attributes_for_write();
+  if (attributes.contains(".selection")) {
+    return;
+  }
+  const eAttrDomain domain = eAttrDomain(curves_id.selection_domain);
+  const int domain_size = attributes.domain_size(domain);
+  switch (create_type) {
+    case CD_PROP_BOOL:
+      attributes.add(".selection",
+                     domain,
+                     CD_PROP_BOOL,
+                     bke::AttributeInitVArray(VArray<bool>::ForSingle(true, domain_size)));
+      break;
+    case CD_PROP_FLOAT:
+      attributes.add(".selection",
+                     domain,
+                     CD_PROP_FLOAT,
+                     bke::AttributeInitVArray(VArray<float>::ForSingle(1.0f, domain_size)));
+      break;
+    default:
+      BLI_assert_unreachable();
+  }
+}
+
+void fill_selection_false(GMutableSpan selection)
+{
+  if (selection.type().is<bool>()) {
+    selection.typed<bool>().fill(false);
+  }
+  else if (selection.type().is<float>()) {
+    selection.typed<float>().fill(0.0f);
+  }
+}
+void fill_selection_true(GMutableSpan selection)
+{
+  if (selection.type().is<bool>()) {
+    selection.typed<bool>().fill(true);
+  }
+  else if (selection.type().is<float>()) {
+    selection.typed<float>().fill(1.0f);
+  }
+}
+
+}  // namespace blender::ed::curves
--- a/source/blender/editors/gpencil/gpencil_data.c
+++ b/source/blender/editors/gpencil/gpencil_data.c
@ -881,7 +881,7 @@ void GPENCIL_OT_frame_clean_loose(wmOperatorType *ot)
              INT_MAX);
 }

-/* ********************* Clean Duplicated Frames ************************** */
+/* ********************* Clean Duplicate Frames ************************** */
 static bool gpencil_frame_is_equal(const bGPDframe *gpf_a, const bGPDframe *gpf_b)
 {
  if ((gpf_a == NULL) || (gpf_b == NULL)) {
@ -1015,9 +1015,9 @@ void GPENCIL_OT_frame_clean_duplicate(wmOperatorType *ot)
  };

  /* identifiers */
-  ot->name = "Clean Duplicated Frames";
+  ot->name = "Clean Duplicate Frames";
  ot->idname = "GPENCIL_OT_frame_clean_duplicate";
-  ot->description = "Remove any duplicated frame";
+  ot->description = "Remove duplicate keyframes";

  /* callbacks */
  ot->exec = gpencil_frame_clean_duplicate_exec;
--- a/source/blender/editors/gpencil/gpencil_utils.c
+++ b/source/blender/editors/gpencil/gpencil_utils.c
@ -3356,7 +3356,7 @@ void ED_gpencil_layer_merge(bGPdata *gpd,
  }
 }

-void gpencil_layer_new_name_get(bGPdata *gpd, char *rname)
+static void gpencil_layer_new_name_get(bGPdata *gpd, char *rname)
 {
  int index = 0;
  LISTBASE_FOREACH (bGPDlayer *, gpl, &gpd->layers) {
--- a/source/blender/editors/include/ED_armature.h
+++ b/source/blender/editors/include/ED_armature.h
@ -369,19 +369,6 @@ void ED_mesh_deform_bind_callback(struct Object *object,
                                  int verts_num,
                                  float cagemat[4][4]);

-/* Pose backups, pose_backup.c */
-struct PoseBackup;
-/**
- * Create a backup of those bones that are animated in the given action.
- */
-struct PoseBackup *ED_pose_backup_create_selected_bones(
-    const struct Object *ob, const struct bAction *action) ATTR_WARN_UNUSED_RESULT;
-struct PoseBackup *ED_pose_backup_create_all_bones(
-    const struct Object *ob, const struct bAction *action) ATTR_WARN_UNUSED_RESULT;
-bool ED_pose_backup_is_selection_relevant(const struct PoseBackup *pose_backup);
-void ED_pose_backup_restore(const struct PoseBackup *pbd);
-void ED_pose_backup_free(struct PoseBackup *pbd);
-
 #ifdef __cplusplus
 }
 #endif
--- a/source/blender/editors/include/ED_curves.h
+++ b/source/blender/editors/include/ED_curves.h
@ -20,20 +20,69 @@ void ED_operatortypes_curves(void);

 #ifdef __cplusplus

-#  include "BKE_curves.hh"
+#  include "BKE_attribute.hh"
+#  include "BLI_index_mask.hh"
+#  include "BLI_vector.hh"
 #  include "BLI_vector_set.hh"

+#  include "BKE_curves.hh"
+
 namespace blender::ed::curves {

 bke::CurvesGeometry primitive_random_sphere(int curves_size, int points_per_curve);
-bool has_anything_selected(const Curves &curves_id);
 VectorSet<Curves *> get_unique_editable_curves(const bContext &C);
 void ensure_surface_deformation_node_exists(bContext &C, Object &curves_ob);

+/* -------------------------------------------------------------------- */
+/** \name Poll Functions
+ * \{ */
+
 bool editable_curves_with_surface_poll(bContext *C);
 bool curves_with_surface_poll(bContext *C);
 bool editable_curves_poll(bContext *C);
 bool curves_poll(bContext *C);

+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Selection
+ *
+ * Selection on curves can be stored on either attribute domain: either per-curve or per-point. It
+ * can be stored with a float or boolean data-type. The boolean data-type is faster, smaller, and
+ * corresponds better to edit-mode selections, but the float data type is useful for soft selection
+ * (like masking) in sculpt mode.
+ *
+ * The attribute API is used to do the necessary type and domain conversions when necessary, and
+ * can handle most interaction with the selection attribute, but these functions implement some
+ * helpful utilities on top of that.
+ * \{ */
+
+void fill_selection_false(GMutableSpan span);
+void fill_selection_true(GMutableSpan span);
+
+/**
+ * Return true if any element is selected, on either domain with either type.
+ */
+bool has_anything_selected(const Curves &curves_id);
+
+/**
+ * Find curves that have any point selected (a selection factor greater than zero),
+ * or curves that have their own selection factor greater than zero.
+ */
+IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_indices);
+
+/**
+ * Find points that are selected (a selection factor greater than zero),
+ * or points in curves with a selection factor greater than zero).
+ */
+IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices);
+
+/**
+ * If the ".selection" attribute doesn't exist, create it with the requested type (bool or float).
+ */
+void ensure_selection_attribute(Curves &curves_id, const eCustomDataType create_type);
+
+/** \} */
+
 }  // namespace blender::ed::curves
 #endif
--- a/source/blender/editors/include/ED_curves_sculpt.h
+++ b/source/blender/editors/include/ED_curves_sculpt.h
@ -17,26 +17,3 @@ void ED_operatortypes_sculpt_curves(void);
 #ifdef __cplusplus
 }
 #endif
-
-#ifdef __cplusplus
-
-#  include "BLI_index_mask.hh"
-#  include "BLI_vector.hh"
-
-namespace blender::ed::sculpt_paint {
-
-/**
- * Find curves that have any point selected (a selection factor greater than zero),
- * or curves that have their own selection factor greater than zero.
- */
-IndexMask retrieve_selected_curves(const Curves &curves_id, Vector<int64_t> &r_indices);
-
-/**
- * Find points that are selected (a selection factor greater than zero),
- * or points in curves with a selection factor greater than zero).
- */
-IndexMask retrieve_selected_points(const Curves &curves_id, Vector<int64_t> &r_indices);
-
-}  // namespace blender::ed::sculpt_paint
-
-#endif
--- a/source/blender/editors/include/ED_node.h
+++ b/source/blender/editors/include/ED_node.h
@ -78,15 +78,6 @@ void ED_node_draw_snap(

 /* node_draw.cc */

-/**
- * Draw a single node socket at default size.
- * \note this is only called from external code, internally #node_socket_draw_nested() is used for
- *       optimized drawing of multiple/all sockets of a node.
- */
-void ED_node_socket_draw(struct bNodeSocket *sock,
-                         const struct rcti *rect,
-                         const float color[4],
-                         float scale);
 void ED_node_tree_update(const struct bContext *C);
 void ED_node_tag_update_id(struct ID *id);

--- a/source/blender/editors/include/ED_node.hh
+++ b/source/blender/editors/include/ED_node.hh
@ -9,6 +9,7 @@ struct SpaceNode;
 struct ARegion;
 struct Main;
 struct bNodeTree;
+struct rcti;

 namespace blender::ed::space_node {

@ -22,4 +23,11 @@ void node_insert_on_link_flags_set(SpaceNode &snode, const ARegion &region);
 void node_insert_on_link_flags(Main &bmain, SpaceNode &snode);
 void node_insert_on_link_flags_clear(bNodeTree &node_tree);

+/**
+ * Draw a single node socket at default size.
+ * \note this is only called from external code, internally #node_socket_draw_nested() is used for
+ *       optimized drawing of multiple/all sockets of a node.
+ */
+void node_socket_draw(bNodeSocket *sock, const rcti *rect, const float color[4], float scale);
+
 }  // namespace blender::ed::space_node
--- a/source/blender/editors/include/ED_screen.h
+++ b/source/blender/editors/include/ED_screen.h
@ -217,7 +217,8 @@ void ED_area_tag_refresh(ScrArea *area);
 void ED_area_do_refresh(struct bContext *C, ScrArea *area);
 struct AZone *ED_area_azones_update(ScrArea *area, const int mouse_xy[2]);
 /**
- * Use NULL to disable it.
+ * Show the given text in the area's header, instead of its regular contents.
+ * Use NULL to disable this and show the regular header contents again.
 */
 void ED_area_status_text(ScrArea *area, const char *str);
 /**
--- a/source/blender/editors/include/ED_util.h
+++ b/source/blender/editors/include/ED_util.h
@ -98,6 +98,9 @@ void ED_slider_factor_set(struct tSlider *slider, float factor);
 bool ED_slider_allow_overshoot_get(struct tSlider *slider);
 void ED_slider_allow_overshoot_set(struct tSlider *slider, bool value);

+bool ED_slider_is_bidirectional_get(struct tSlider *slider);
+void ED_slider_is_bidirectional_set(struct tSlider *slider, bool value);
+
 /* ************** XXX OLD CRUFT WARNING ************* */

 /**
--- a/source/blender/editors/interface/eyedroppers/eyedropper_color.cc
+++ b/source/blender/editors/interface/eyedroppers/eyedropper_color.cc
@ -175,8 +175,8 @@ static bool eyedropper_cryptomatte_sample_renderlayer_fl(RenderLayer *render_lay
    if (STRPREFIX(render_pass->name, render_pass_name_prefix) &&
        !STREQLEN(render_pass->name, render_pass_name_prefix, sizeof(render_pass->name))) {
      BLI_assert(render_pass->channels == 4);
-      const int x = (int)(fpos[0] * render_pass->rectx);
-      const int y = (int)(fpos[1] * render_pass->recty);
+      const int x = int(fpos[0] * render_pass->rectx);
+      const int y = int(fpos[1] * render_pass->recty);
      const int offset = 4 * (y * render_pass->rectx + x);
      zero_v3(r_col);
      r_col[0] = render_pass->rect[offset];
--- a/Show More
+++ b/Show More